Skip to content

Commit bd3bdd4

Browse files
authored
fix: fix page_options for PlaywrightBrowserPlugin (#796)
### Description - fix `page_options` for `PlaywrightBrowserPlugin` ### Issues - Closes: #755, #751 ### Testing - Add test for check workability `page_options` in `PlaywrightBrowserPlugin` ### Checklist - [ ] CI passed
1 parent bb848d3 commit bd3bdd4

File tree

4 files changed

+24
-17
lines changed

4 files changed

+24
-17
lines changed

src/crawlee/browsers/_browser_pool.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,9 @@ def with_default_plugin(
112112
browser_options: Keyword arguments to pass to the browser launch method. These options are provided
113113
directly to Playwright's `browser_type.launch` method. For more details, refer to the Playwright
114114
documentation: https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch.
115-
page_options: Keyword arguments to pass to the new page method. These options are provided directly to
116-
Playwright's `browser_context.new_page` method. For more details, refer to the Playwright documentation:
117-
https://playwright.dev/python/docs/api/class-browsercontext#browser-context-new-page.
115+
page_options: Keyword arguments to pass to the page object is set at the playwright context level.
116+
These options are provided directly to Playwright's `browser.new_context` method. For more details,
117+
refer to the Playwright documentation: https://playwright.dev/python/docs/api/class-browser#browser-new-context.
118118
headless: Whether to run the browser in headless mode.
119119
kwargs: Additional arguments for default constructor.
120120
"""

src/crawlee/browsers/_playwright_browser_controller.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -98,13 +98,12 @@ async def new_page(
9898
proxy_info: ProxyInfo | None = None,
9999
) -> Page:
100100
if not self._browser_context:
101-
self._browser_context = await self._create_browser_context(proxy_info)
101+
self._browser_context = await self._create_browser_context(page_options, proxy_info)
102102

103103
if not self.has_free_capacity:
104104
raise ValueError('Cannot open more pages in this browser.')
105105

106-
page_options = dict(page_options) if page_options else {}
107-
page = await self._browser_context.new_page(**page_options)
106+
page = await self._browser_context.new_page()
108107

109108
# Handle page close event
110109
page.on(event='close', f=self._on_page_close)
@@ -130,17 +129,20 @@ def _on_page_close(self, page: Page) -> None:
130129
"""Handle actions after a page is closed."""
131130
self._pages.remove(page)
132131

133-
async def _create_browser_context(self, proxy_info: ProxyInfo | None = None) -> BrowserContext:
132+
async def _create_browser_context(
133+
self, page_options: Mapping[str, Any] | None = None, proxy_info: ProxyInfo | None = None
134+
) -> BrowserContext:
134135
"""Create a new browser context with the specified proxy settings."""
135136
if self._header_generator:
136137
common_headers = self._header_generator.get_common_headers()
137138
sec_ch_ua_headers = self._header_generator.get_sec_ch_ua_headers(browser_type=self.browser_type)
138139
user_agent_header = self._header_generator.get_user_agent_header(browser_type=self.browser_type)
139140
extra_http_headers = dict(common_headers | sec_ch_ua_headers | user_agent_header)
140-
user_agent = user_agent_header.get('User-Agent')
141141
else:
142142
extra_http_headers = None
143-
user_agent = None
143+
144+
page_options = dict(page_options) if page_options else {}
145+
page_options['extra_http_headers'] = page_options.get('extra_http_headers', extra_http_headers)
144146

145147
proxy = (
146148
ProxySettings(
@@ -152,8 +154,4 @@ async def _create_browser_context(self, proxy_info: ProxyInfo | None = None) ->
152154
else None
153155
)
154156

155-
return await self._browser.new_context(
156-
user_agent=user_agent,
157-
extra_http_headers=extra_http_headers,
158-
proxy=proxy,
159-
)
157+
return await self._browser.new_context(proxy=proxy, **page_options)

src/crawlee/browsers/_playwright_browser_plugin.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ def __init__(
4646
browser_options: Keyword arguments to pass to the browser launch method. These options are provided
4747
directly to Playwright's `browser_type.launch` method. For more details, refer to the Playwright
4848
documentation: https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch.
49-
page_options: Keyword arguments to pass to the new page method. These options are provided directly to
50-
Playwright's `browser_context.new_page` method. For more details, refer to the Playwright documentation:
51-
https://playwright.dev/python/docs/api/class-browsercontext#browser-context-new-page.
49+
page_options: Keyword arguments to pass to the page object is set at the playwright context level.
50+
These options are provided directly to Playwright's `browser.new_context` method. For more details,
51+
refer to the Playwright documentation: https://playwright.dev/python/docs/api/class-browser#browser-new-context.
5252
max_open_pages_per_browser: The maximum number of pages that can be opened in a single browser instance.
5353
Once reached, a new browser instance will be launched to handle the excess.
5454
"""

tests/unit/browsers/test_browser_pool.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,3 +151,12 @@ async def test_methods_raise_error_when_not_active() -> None:
151151

152152
async with browser_pool:
153153
assert browser_pool.active is True
154+
155+
156+
async def test_with_plugin_contains_page_options(httpbin: URL) -> None:
157+
plugin = PlaywrightBrowserPlugin(page_options={'user_agent': 'My Best User-Agent'})
158+
async with BrowserPool(plugins=[plugin]) as browser_pool:
159+
test_page = await browser_pool.new_page()
160+
await test_page.page.goto(str(httpbin / 'user-agent'))
161+
assert 'My Best User-Agent' in await test_page.page.content()
162+
await test_page.page.close()

0 commit comments

Comments
 (0)