22
22
from crawlee .fingerprint_suite ._browserforge_adapter import get_available_header_values
23
23
from crawlee .fingerprint_suite ._consts import BROWSER_TYPE_HEADER_KEYWORD
24
24
from crawlee .proxy_configuration import ProxyConfiguration
25
- from crawlee .sessions import SessionPool
25
+ from crawlee .sessions import Session , SessionPool
26
26
from crawlee .statistics import Statistics
27
27
from crawlee .statistics ._error_snapshotter import ErrorSnapshotter
28
28
from tests .unit .server_endpoints import GENERIC_RESPONSE , HELLO_WORLD
@@ -304,6 +304,7 @@ async def some_hook(context: PlaywrightPreNavCrawlingContext) -> None:
304
304
)
305
305
async def test_isolation_cookies (* , use_incognito_pages : bool , server_url : URL ) -> None :
306
306
sessions_ids : list [str ] = []
307
+ sessions : dict [str , Session ] = {}
307
308
sessions_cookies : dict [str , dict [str , str ]] = {}
308
309
response_cookies : dict [str , dict [str , str ]] = {}
309
310
@@ -319,13 +320,11 @@ async def handler(context: PlaywrightCrawlingContext) -> None:
319
320
return
320
321
321
322
sessions_ids .append (context .session .id )
323
+ sessions [context .session .id ] = context .session
322
324
323
325
if context .request .unique_key not in {'1' , '2' }:
324
326
return
325
327
326
- sessions_cookies [context .session .id ] = {
327
- cookie ['name' ]: cookie ['value' ] for cookie in context .session .cookies .get_cookies_as_dicts ()
328
- }
329
328
response_data = json .loads (await context .response .text ())
330
329
response_cookies [context .session .id ] = response_data .get ('cookies' )
331
330
@@ -343,11 +342,20 @@ async def handler(context: PlaywrightCrawlingContext) -> None:
343
342
]
344
343
)
345
344
346
- assert len (sessions_cookies ) == 2
347
345
assert len (response_cookies ) == 2
346
+ assert len (sessions ) == 2
348
347
349
348
assert sessions_ids [0 ] == sessions_ids [1 ]
350
349
350
+ sessions_cookies = {
351
+ sessions_id : {
352
+ cookie ['name' ]: cookie ['value' ] for cookie in sessions [sessions_id ].cookies .get_cookies_as_dicts ()
353
+ }
354
+ for sessions_id in sessions_ids
355
+ }
356
+
357
+ assert len (sessions_cookies ) == 2
358
+
351
359
cookie_session_id = sessions_ids [0 ]
352
360
clean_session_id = sessions_ids [2 ]
353
361
@@ -372,6 +380,33 @@ async def handler(context: PlaywrightCrawlingContext) -> None:
372
380
assert sessions_cookies [clean_session_id ] == response_cookies [clean_session_id ] == {'a' : '1' }
373
381
374
382
383
+ async def test_save_cookies_after_handler_processing (server_url : URL ) -> None :
384
+ """Test that cookies are saved correctly."""
385
+ async with SessionPool (max_pool_size = 1 ) as session_pool :
386
+ crawler = PlaywrightCrawler (session_pool = session_pool )
387
+
388
+ session_ids = []
389
+
390
+ @crawler .router .default_handler
391
+ async def request_handler (context : PlaywrightCrawlingContext ) -> None :
392
+ # Simulate cookies installed from an external source in the browser
393
+ await context .page .context .add_cookies ([{'name' : 'check' , 'value' : 'test' , 'url' : str (server_url )}])
394
+
395
+ if context .session :
396
+ session_ids .append (context .session .id )
397
+
398
+ await crawler .run ([str (server_url )])
399
+
400
+ assert len (session_ids ) == 1
401
+
402
+ check_session = await session_pool .get_session ()
403
+
404
+ assert check_session .id == session_ids [0 ]
405
+ session_cookies = {cookie ['name' ]: cookie ['value' ] for cookie in check_session .cookies .get_cookies_as_dicts ()}
406
+
407
+ assert session_cookies == {'check' : 'test' }
408
+
409
+
375
410
async def test_custom_fingerprint_uses_generator_options (server_url : URL ) -> None :
376
411
min_width = 300
377
412
max_width = 600
0 commit comments