Skip to content

Commit 84b61a3

Browse files
authored
refactor!: Rename PwPreNavContext to PwPreNavCrawlingContext (#827)
- Rename `PlaywrightPreNavigationContext` to `PlaywrightPreNavigationCrawlingContext`. - Of course, this is really long. Do we want to make it shorter, e.g.: `PlaywrightPreNavCrawlingContext`? - Or make shorter all the crawling contexts... e.g. use just `CrawlContext`, (`PlaywrightPreNavCrawlContext`) - Opinions?
1 parent 8525dc8 commit 84b61a3

File tree

6 files changed

+26
-15
lines changed

6 files changed

+26
-15
lines changed

docs/examples/code/playwright_crawler.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import asyncio
22

3-
from crawlee.playwright_crawler import PlaywrightCrawler, PlaywrightCrawlingContext, PlaywrightPreNavigationContext
3+
from crawlee.playwright_crawler import (
4+
PlaywrightCrawler,
5+
PlaywrightCrawlingContext,
6+
PlaywrightPreNavCrawlingContext,
7+
)
48

59

610
async def main() -> None:
@@ -52,7 +56,7 @@ async def request_handler(context: PlaywrightCrawlingContext) -> None:
5256
# browser page among other things. In this example, we log the URL being
5357
# navigated to.
5458
@crawler.pre_navigation_hook
55-
async def log_navigation_url(context: PlaywrightPreNavigationContext) -> None:
59+
async def log_navigation_url(context: PlaywrightPreNavCrawlingContext) -> None:
5660
context.log.info(f'Navigating to {context.request.url} ...')
5761

5862
# Run the crawler with the initial list of URLs.

docs/upgrading/upgrading_to_v0x.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ This section summarizes the breaking changes between v0.4.x and v0.5.0.
2626

2727
- Removed properties `json_` and `order_no`.
2828

29+
### PlaywrightCrawler
30+
31+
- The `PlaywrightPreNavigationContext` was renamed to `PlaywrightPreNavCrawlingContext`.
32+
2933
## Upgrading to v0.4
3034

3135
This section summarizes the breaking changes between v0.3.x and v0.4.0.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
try:
22
from ._playwright_crawler import PlaywrightCrawler
33
from ._playwright_crawling_context import PlaywrightCrawlingContext
4-
from ._playwright_pre_navigation_context import PlaywrightPreNavigationContext
4+
from ._playwright_pre_nav_crawling_context import PlaywrightPreNavCrawlingContext
55
except ImportError as exc:
66
raise ImportError(
77
"To import anything from this subpackage, you need to install the 'playwright' extra."
88
"For example, if you use pip, run `pip install 'crawlee[playwright]'`.",
99
) from exc
1010

11-
__all__ = ['PlaywrightCrawler', 'PlaywrightCrawlingContext', 'PlaywrightPreNavigationContext']
11+
__all__ = ['PlaywrightCrawler', 'PlaywrightCrawlingContext', 'PlaywrightPreNavCrawlingContext']

src/crawlee/playwright_crawler/_playwright_crawler.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from crawlee.browsers import BrowserPool
1515
from crawlee.errors import SessionError
1616
from crawlee.playwright_crawler._playwright_crawling_context import PlaywrightCrawlingContext
17-
from crawlee.playwright_crawler._playwright_pre_navigation_context import PlaywrightPreNavigationContext
17+
from crawlee.playwright_crawler._playwright_pre_nav_crawling_context import PlaywrightPreNavCrawlingContext
1818
from crawlee.playwright_crawler._utils import infinite_scroll
1919

2020
if TYPE_CHECKING:
@@ -119,18 +119,21 @@ def __init__(
119119
)
120120
kwargs['_additional_context_managers'] = [self._browser_pool]
121121
kwargs.setdefault('_logger', logging.getLogger(__name__))
122-
self._pre_navigation_hooks: list[Callable[[PlaywrightPreNavigationContext], Awaitable[None]]] = []
122+
self._pre_navigation_hooks: list[Callable[[PlaywrightPreNavCrawlingContext], Awaitable[None]]] = []
123123

124124
super().__init__(**kwargs)
125125

126-
async def _open_page(self, context: BasicCrawlingContext) -> AsyncGenerator[PlaywrightPreNavigationContext, None]:
126+
async def _open_page(
127+
self,
128+
context: BasicCrawlingContext,
129+
) -> AsyncGenerator[PlaywrightPreNavCrawlingContext, None]:
127130
if self._browser_pool is None:
128131
raise ValueError('Browser pool is not initialized.')
129132

130133
# Create a new browser page
131134
crawlee_page = await self._browser_pool.new_page(proxy_info=context.proxy_info)
132135

133-
pre_navigation_context = PlaywrightPreNavigationContext(
136+
pre_navigation_context = PlaywrightPreNavCrawlingContext(
134137
request=context.request,
135138
session=context.session,
136139
add_requests=context.add_requests,
@@ -150,7 +153,7 @@ async def _open_page(self, context: BasicCrawlingContext) -> AsyncGenerator[Play
150153

151154
async def _navigate(
152155
self,
153-
context: PlaywrightPreNavigationContext,
156+
context: PlaywrightPreNavCrawlingContext,
154157
) -> AsyncGenerator[PlaywrightCrawlingContext, None]:
155158
"""Executes an HTTP request utilizing the `BrowserPool` and the `Playwright` library.
156159
@@ -271,7 +274,7 @@ async def _handle_blocked_request(
271274

272275
yield context
273276

274-
def pre_navigation_hook(self, hook: Callable[[PlaywrightPreNavigationContext], Awaitable[None]]) -> None:
277+
def pre_navigation_hook(self, hook: Callable[[PlaywrightPreNavCrawlingContext], Awaitable[None]]) -> None:
275278
"""Register a hook to be called before each navigation.
276279
277280
Args:

src/crawlee/playwright_crawler/_playwright_crawling_context.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from typing import TYPE_CHECKING, Callable
55

66
from crawlee._utils.docs import docs_group
7-
from crawlee.playwright_crawler._playwright_pre_navigation_context import PlaywrightPreNavigationContext
7+
from crawlee.playwright_crawler._playwright_pre_nav_crawling_context import PlaywrightPreNavCrawlingContext
88

99
if TYPE_CHECKING:
1010
from collections.abc import Awaitable
@@ -16,7 +16,7 @@
1616

1717
@dataclass(frozen=True)
1818
@docs_group('Data structures')
19-
class PlaywrightCrawlingContext(PlaywrightPreNavigationContext):
19+
class PlaywrightCrawlingContext(PlaywrightPreNavCrawlingContext):
2020
"""The crawling context used by the `PlaywrightCrawler`.
2121
2222
It provides access to key objects as well as utility functions for handling crawling tasks.

src/crawlee/playwright_crawler/_playwright_pre_navigation_context.py renamed to src/crawlee/playwright_crawler/_playwright_pre_nav_crawling_context.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212

1313
@dataclass(frozen=True)
1414
@docs_group('Data structures')
15-
class PlaywrightPreNavigationContext(BasicCrawlingContext):
16-
"""Context used by PlaywrightCrawler.
15+
class PlaywrightPreNavCrawlingContext(BasicCrawlingContext):
16+
"""The pre navigation crawling context used by the `PlaywrightCrawler`.
1717
18-
It Provides access to the `Page` object for the current browser page.
18+
It provides access to the `Page` object, before the navigation to the URL is performed.
1919
"""
2020

2121
page: Page

0 commit comments

Comments
 (0)