2
2
3
3
import asyncio
4
4
import logging
5
+ import warnings
5
6
from functools import partial
6
7
from typing import TYPE_CHECKING , Any , Callable , Generic , Literal , Union
7
8
34
35
from collections .abc import AsyncGenerator , Awaitable , Mapping , Sequence
35
36
from pathlib import Path
36
37
37
- from playwright .async_api import Page
38
+ from playwright .async_api import Page , Route
39
+ from playwright .async_api import Request as PlaywrightRequest
38
40
from typing_extensions import Unpack
39
41
40
42
from crawlee import RequestTransformAction
41
- from crawlee ._types import BasicCrawlingContext , EnqueueLinksFunction , EnqueueLinksKwargs , ExtractLinksFunction
43
+ from crawlee ._types import (
44
+ BasicCrawlingContext ,
45
+ EnqueueLinksFunction ,
46
+ EnqueueLinksKwargs ,
47
+ ExtractLinksFunction ,
48
+ HttpHeaders ,
49
+ HttpMethod ,
50
+ HttpPayload ,
51
+ )
42
52
from crawlee .browsers ._types import BrowserType
43
53
44
54
@@ -210,6 +220,27 @@ async def _open_page(
210
220
await hook (pre_navigation_context )
211
221
yield pre_navigation_context
212
222
223
+ def _prepare_request_interceptor (
224
+ self ,
225
+ method : HttpMethod = 'GET' ,
226
+ headers : HttpHeaders | dict [str , str ] | None = None ,
227
+ payload : HttpPayload | None = None ,
228
+ ) -> Callable :
229
+ """Create a request interceptor for Playwright to support non-GET methods with custom parameters.
230
+
231
+ The interceptor modifies requests by adding custom headers and payload before they are sent.
232
+
233
+ Args:
234
+ method: HTTP method to use for the request.
235
+ headers: Custom HTTP headers to send with the request.
236
+ payload: Request body data for POST/PUT requests.
237
+ """
238
+
239
+ async def route_handler (route : Route , _ : PlaywrightRequest ) -> None :
240
+ await route .continue_ (method = method , headers = dict (headers ) if headers else None , post_data = payload )
241
+
242
+ return route_handler
243
+
213
244
async def _navigate (
214
245
self ,
215
246
context : PlaywrightPreNavCrawlingContext ,
@@ -235,6 +266,24 @@ async def _navigate(
235
266
if context .request .headers :
236
267
await context .page .set_extra_http_headers (context .request .headers .model_dump ())
237
268
# Navigate to the URL and get response.
269
+ if context .request .method != 'GET' :
270
+ # Call the notification only once
271
+ warnings .warn (
272
+ 'Using other request methods than GET or adding payloads has a high impact on performance'
273
+ ' in recent versions of Playwright. Use only when necessary.' ,
274
+ category = UserWarning ,
275
+ stacklevel = 2 ,
276
+ )
277
+
278
+ route_handler = self ._prepare_request_interceptor (
279
+ method = context .request .method ,
280
+ headers = context .request .headers ,
281
+ payload = context .request .payload ,
282
+ )
283
+
284
+ # Set route_handler only for current request
285
+ await context .page .route (context .request .url , route_handler )
286
+
238
287
response = await context .page .goto (context .request .url )
239
288
240
289
if response is None :
0 commit comments