Skip to content

Commit 21f6782

Browse files
authored
feat: Add stats to ApifyRequestQueueClient (#574)
### Description - Add specialized `ApifyRequestQueueMetadata` that contains Apify-specific `RequestQueueStats` - This enhances metadata of `ApifyRequestQueueClient` ### Issues - Closes: apify/crawlee-python#1344
1 parent 7bd51c7 commit 21f6782

File tree

3 files changed

+60
-5
lines changed

3 files changed

+60
-5
lines changed

src/apify/storage_clients/_apify/_models.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from pydantic import BaseModel, ConfigDict, Field
77

8-
from crawlee.storage_clients.models import KeyValueStoreMetadata
8+
from crawlee.storage_clients.models import KeyValueStoreMetadata, RequestQueueMetadata
99

1010
from apify import Request
1111
from apify._utils import docs_group
@@ -105,3 +105,27 @@ class CachedRequest(BaseModel):
105105

106106
lock_expires_at: datetime | None = None
107107
"""The expiration time of the lock on the request."""
108+
109+
110+
class RequestQueueStats(BaseModel):
111+
model_config = ConfigDict(populate_by_name=True)
112+
113+
delete_count: Annotated[int, Field(alias='deleteCount', default=0)]
114+
""""The number of request queue deletes."""
115+
116+
head_item_read_count: Annotated[int, Field(alias='headItemReadCount', default=0)]
117+
"""The number of request queue head reads."""
118+
119+
read_count: Annotated[int, Field(alias='readCount', default=0)]
120+
"""The number of request queue reads."""
121+
122+
storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)]
123+
"""Storage size in Bytes."""
124+
125+
write_count: Annotated[int, Field(alias='writeCount', default=0)]
126+
"""The number of request queue writes."""
127+
128+
129+
class ApifyRequestQueueMetadata(RequestQueueMetadata):
130+
stats: Annotated[RequestQueueStats, Field(alias='stats', default_factory=RequestQueueStats)]
131+
"""Additional statistics about the request queue."""

src/apify/storage_clients/_apify/_request_queue_client.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,13 @@
1818
from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
1919
from crawlee.storages import RequestQueue
2020

21-
from ._models import CachedRequest, ProlongRequestLockResponse, RequestQueueHead
21+
from ._models import (
22+
ApifyRequestQueueMetadata,
23+
CachedRequest,
24+
ProlongRequestLockResponse,
25+
RequestQueueHead,
26+
RequestQueueStats,
27+
)
2228
from ._utils import AliasResolver
2329
from apify import Request
2430

@@ -108,7 +114,7 @@ async def _get_metadata_estimate(self) -> RequestQueueMetadata:
108114
return self._metadata
109115

110116
@override
111-
async def get_metadata(self) -> RequestQueueMetadata:
117+
async def get_metadata(self) -> ApifyRequestQueueMetadata:
112118
"""Get metadata about the request queue.
113119
114120
Returns:
@@ -119,7 +125,7 @@ async def get_metadata(self) -> RequestQueueMetadata:
119125
if response is None:
120126
raise ValueError('Failed to fetch request queue metadata from the API.')
121127
# Enhance API response by local estimations (API can be delayed few seconds, while local estimation not.)
122-
return RequestQueueMetadata(
128+
return ApifyRequestQueueMetadata(
123129
id=response['id'],
124130
name=response['name'],
125131
total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
@@ -129,6 +135,7 @@ async def get_metadata(self) -> RequestQueueMetadata:
129135
modified_at=max(response['modifiedAt'], self._metadata.modified_at),
130136
accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
131137
had_multiple_clients=response['hadMultipleClients'] or self._metadata.had_multiple_clients,
138+
stats=RequestQueueStats.model_validate(response['stats'], by_alias=True),
132139
)
133140

134141
@classmethod

tests/integration/test_request_queue.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import annotations
22

33
import asyncio
4-
from typing import TYPE_CHECKING
4+
from typing import TYPE_CHECKING, cast
55

66
import pytest
77

@@ -14,6 +14,7 @@
1414
from crawlee.storages import RequestQueue
1515

1616
from .conftest import MakeActorFunction, RunActorFunction
17+
from apify.storage_clients._apify._models import ApifyRequestQueueMetadata
1718

1819

1920
async def test_add_and_fetch_requests(
@@ -1278,3 +1279,26 @@ async def test_request_queue_not_had_multiple_clients(
12781279
api_response = await api_client.get()
12791280
assert api_response
12801281
assert api_response['hadMultipleClients'] is False
1282+
1283+
1284+
async def test_request_queue_has_stats(request_queue_force_cloud: RequestQueue) -> None:
1285+
"""Test that Apify based request queue has stats in metadata."""
1286+
1287+
add_request_count = 3
1288+
read_request_count = 2
1289+
1290+
await request_queue_force_cloud.add_requests(
1291+
[Request.from_url(f'http://example.com/{i}') for i in range(add_request_count)]
1292+
)
1293+
for _ in range(read_request_count):
1294+
await request_queue_force_cloud.get_request(Request.from_url('http://example.com/1').unique_key)
1295+
1296+
# Wait for stats to become stable
1297+
await asyncio.sleep(10)
1298+
1299+
metadata = await request_queue_force_cloud.get_metadata()
1300+
1301+
assert hasattr(metadata, 'stats')
1302+
apify_metadata = cast('ApifyRequestQueueMetadata', metadata)
1303+
assert apify_metadata.stats.read_count == read_request_count
1304+
assert apify_metadata.stats.write_count == add_request_count

0 commit comments

Comments
 (0)