32
32
AdaptiveContextError ,
33
33
)
34
34
from crawlee .statistics import Statistics
35
+ from crawlee .storages import KeyValueStore
35
36
36
37
if TYPE_CHECKING :
37
- from collections .abc import Iterator
38
+ from collections .abc import AsyncGenerator , Iterator
38
39
39
40
import respx
40
41
41
42
from crawlee .browsers ._browser_plugin import BrowserPlugin
42
43
from crawlee .browsers ._types import CrawleePage
43
44
from crawlee .proxy_configuration import ProxyInfo
44
45
46
+
45
47
_H1_TEXT = 'Static'
46
48
_H2_TEXT = 'Only in browser'
47
49
_H3_CHANGED_TEXT = 'Changed by JS'
@@ -74,6 +76,13 @@ def test_urls(respx_mock: respx.MockRouter) -> list[str]:
74
76
return urls
75
77
76
78
79
+ @pytest .fixture
80
+ async def key_value_store () -> AsyncGenerator [KeyValueStore , None ]:
81
+ kvs = await KeyValueStore .open ()
82
+ yield kvs
83
+ await kvs .drop ()
84
+
85
+
77
86
class _StaticRedirectBrowserPool (BrowserPool ):
78
87
"""BrowserPool for redirecting browser requests to static content."""
79
88
@@ -384,7 +393,9 @@ async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
384
393
mocked_store_result .assert_called_once_with (requests [0 ], expected_result_rendering_type )
385
394
386
395
387
- async def test_adaptive_crawling_result_use_state_isolation (test_urls : list [str ]) -> None :
396
+ async def test_adaptive_crawling_result_use_state_isolation (
397
+ key_value_store : KeyValueStore , test_urls : list [str ]
398
+ ) -> None :
388
399
"""Tests that global state accessed through `use_state` is changed only by one sub crawler.
389
400
390
401
Enforced rendering type detection to run both sub crawlers."""
@@ -393,8 +404,7 @@ async def test_adaptive_crawling_result_use_state_isolation(test_urls: list[str]
393
404
rendering_type_predictor = static_only_predictor_enforce_detection ,
394
405
playwright_crawler_specific_kwargs = {'browser_pool' : _StaticRedirectBrowserPool .with_default_plugin ()},
395
406
)
396
- store = await crawler .get_key_value_store ()
397
- await store .set_value (BasicCrawler ._CRAWLEE_STATE_KEY , {'counter' : 0 })
407
+ await key_value_store .set_value (BasicCrawler ._CRAWLEE_STATE_KEY , {'counter' : 0 })
398
408
request_handler_calls = 0
399
409
400
410
@crawler .router .default_handler
@@ -406,12 +416,12 @@ async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
406
416
407
417
await crawler .run (test_urls [:1 ])
408
418
409
- await store .persist_autosaved_values ()
419
+ await key_value_store .persist_autosaved_values ()
410
420
411
421
# Request handler was called twice
412
422
assert request_handler_calls == 2
413
423
# Increment of global state happened only once
414
- assert (await store .get_value (BasicCrawler ._CRAWLEE_STATE_KEY ))['counter' ] == 1
424
+ assert (await key_value_store .get_value (BasicCrawler ._CRAWLEE_STATE_KEY ))['counter' ] == 1
415
425
416
426
417
427
async def test_adaptive_crawling_statistics (test_urls : list [str ]) -> None :
0 commit comments