Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions docs/examples/code_examples/using_browser_profiles_chrome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import asyncio
import shutil
from pathlib import Path
from tempfile import TemporaryDirectory

from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext

# Profile name to use (usually 'Default' for single profile setups)
PROFILE_NAME = 'Default'

# Paths to Chrome profiles in your system (example for Windows)
# Use `chrome://version/` to find your profile path
PROFILE_PATH = Path(Path.home(), 'AppData', 'Local', 'Google', 'Chrome', 'User Data')


async def main() -> None:
# Create a temporary folder to copy the profile to
with TemporaryDirectory(prefix='crawlee-') as tmpdirname:
tmp_profile_dir = Path(tmpdirname)

# Copy the profile to a temporary folder
shutil.copytree(
PROFILE_PATH / PROFILE_NAME,
tmp_profile_dir / PROFILE_NAME,
dirs_exist_ok=True,
)

crawler = PlaywrightCrawler(
headless=False,
# Use chromium for Chrome compatibility
browser_type='chromium',
# Disable fingerprints to preserve profile identity
fingerprint_generator=None,
# Set user data directory to temp folder
user_data_dir=tmp_profile_dir,
browser_launch_options={
# Use installed Chrome browser
'channel': 'chrome',
# Slow down actions to mimic human behavior
'slow_mo': 200,
'args': [
# Use the specified profile
f'--profile-directory={PROFILE_NAME}',
],
},
)

@crawler.router.default_handler
async def default_handler(context: PlaywrightCrawlingContext) -> None:
context.log.info(f'Visiting {context.request.url}')

await crawler.run(['https://crawlee.dev/'])


if __name__ == '__main__':
asyncio.run(main())
42 changes: 42 additions & 0 deletions docs/examples/code_examples/using_browser_profiles_firefox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import asyncio
from pathlib import Path

from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext

# Replace this with your actual Firefox profile name
# Find it at about:profiles in Firefox
PROFILE_NAME = 'your-profile-name-here'

# Paths to Firefox profiles in your system (example for Windows)
# Use `about:profiles` to find your profile path
PROFILE_PATH = Path(
Path.home(), 'AppData', 'Roaming', 'Mozilla', 'Firefox', 'Profiles', PROFILE_NAME
)


async def main() -> None:
crawler = PlaywrightCrawler(
# Use Firefox browser type
browser_type='firefox',
# Disable fingerprints to use the profile as is
fingerprint_generator=None,
headless=False,
# Path to your Firefox profile
user_data_dir=PROFILE_PATH,
browser_launch_options={
'args': [
# Required to avoid version conflicts
'--allow-downgrade'
]
},
)

@crawler.router.default_handler
async def default_handler(context: PlaywrightCrawlingContext) -> None:
context.log.info(f'Visiting {context.request.url}')

await crawler.run(['https://crawlee.dev/'])


if __name__ == '__main__':
asyncio.run(main())
41 changes: 41 additions & 0 deletions docs/examples/using_browser_profile.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
---
id: using_browser_profile
title: Using browser profile
---

import ApiLink from '@site/src/components/ApiLink';

import CodeBlock from '@theme/CodeBlock';

import ChromeProfileExample from '!!raw-loader!./code_examples/using_browser_profiles_chrome.py';
import FirefoxProfileExample from '!!raw-loader!./code_examples/using_browser_profiles_firefox.py';

This example demonstrates how to run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> using your local browser profile from [Chrome](https://www.google.com/intl/us/chrome/) or [Firefox](https://www.firefox.com/).

Using browser profiles allows you to leverage existing login sessions, saved passwords, bookmarks, and other personalized browser data during crawling. This can be particularly useful for testing scenarios or when you need to access content that requires authentication.

## Chrome browser

To run <ApiLink to="class/PlaywrightCrawler">`PlaywrightCrawler`</ApiLink> with your Chrome profile, you need to know the path to your profile files. You can find this information by entering `chrome://version/` as a URL in your Chrome browser. If you have multiple profiles, pay attention to the profile name - if you only have one profile, it's always `Default`.

You also need to use the [`channel`](https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-channel) parameter in `browser_launch_options` to use the Chrome browser installed on your system instead of Playwright's Chromium.

:::warning Profile access limitation
Due to [Chrome's security policies](https://developer.chrome.com/blog/remote-debugging-port), automation cannot use your main browsing profile directly. The example copies your profile to a temporary location as a workaround.
:::

Make sure you don't have any running Chrome browser processes before running this code:

<CodeBlock className="language-python" language="python">
{ChromeProfileExample}
</CodeBlock>

## Firefox browser

To find the path to your Firefox profile, enter `about:profiles` as a URL in your Firefox browser. Unlike Chrome, you can use your standard profile path directly without copying it first.

Make sure you don't have any running Firefox browser processes before running this code:

<CodeBlock className="language-python" language="python">
{FirefoxProfileExample}
</CodeBlock>
Loading