apify
diff --git a/‎docs/guides/code/request_storage/rl_basic_example.py‎
Lines changed: 1 addition & 12 deletions b/‎docs/guides/code/request_storage/rl_basic_example.py‎
Lines changed: 1 addition & 12 deletions
diff --git a/‎docs/guides/code/request_storage/rl_with_crawler_example.py‎
Lines changed: 6 additions & 4 deletions b/‎docs/guides/code/request_storage/rl_with_crawler_example.py‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎docs/guides/code/request_storage/rq_with_crawler_explicit_example.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/guides/code/request_storage/rq_with_crawler_explicit_example.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/guides/code/request_storage/tandem_example.py‎
Lines changed: 23 additions & 0 deletions b/‎docs/guides/code/request_storage/tandem_example.py‎
Lines changed: 23 additions & 0 deletions
diff --git a/‎docs/guides/code/request_storage/tandem_example_explicit.py‎
Lines changed: 27 additions & 0 deletions b/‎docs/guides/code/request_storage/tandem_example_explicit.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎docs/guides/request_storage.mdx‎
Lines changed: 25 additions & 1 deletion b/‎docs/guides/request_storage.mdx‎
Lines changed: 25 additions & 1 deletion
diff --git a/‎docs/introduction/code/02_bs.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/introduction/code/02_bs.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/upgrading/upgrading_to_v0x.md‎
Lines changed: 11 additions & 0 deletions b/‎docs/upgrading/upgrading_to_v0x.md‎
Lines changed: 11 additions & 0 deletions
@@ -1,6 +1,6 @@
 import asyncio
 
-from crawlee.storages import RequestList
+from crawlee.request_loaders import RequestList
 
 
 async def main() -> None:
@@ -11,24 +11,13 @@ async def main() -> None:
         requests=['https://apify.com/', 'https://crawlee.dev/', 'https://crawlee.dev/python/'],
     )
 
-    # You can interact with the request list in the same way as with the request queue.
-    await request_list.add_requests_batched(
-        [
-            'https://crawlee.dev/python/docs/quick-start',
-            'https://crawlee.dev/python/api',
-        ]
-    )
-
     # Fetch and process requests from the queue.
     while request := await request_list.fetch_next_request():
         # Do something with it...
 
         # And mark it as handled.
         await request_list.mark_request_as_handled(request)
 
-    # Remove the request queue.
-    await request_list.drop()
-
 
 if __name__ == '__main__':
     asyncio.run(main())
@@ -1,7 +1,7 @@
 import asyncio
 
 from crawlee.http_crawler import HttpCrawler, HttpCrawlingContext
-from crawlee.storages import RequestList
+from crawlee.request_loaders import RequestList
 
 
 async def main() -> None:
@@ -12,9 +12,11 @@ async def main() -> None:
         requests=['https://apify.com/', 'https://crawlee.dev/'],
     )
 
-    # Create a new crawler (it can be any subclass of BasicCrawler) and pass the request
-    # list as request provider to it. It will be managed by the crawler.
-    crawler = HttpCrawler(request_provider=request_list)
+    # Join the request list into a tandem with the default request queue
+    request_manager = await request_list.to_tandem()
+
+    # Create a new crawler (it can be any subclass of BasicCrawler) and pass the request manager tandem
+    crawler = HttpCrawler(request_manager=request_manager)
 
     # Define the default request handler, which will be called for every request.
     @crawler.router.default_handler
 
@@ -14,7 +14,7 @@ async def main() -> None:
 
     # Create a new crawler (it can be any subclass of BasicCrawler) and pass the request
     # list as request provider to it. It will be managed by the crawler.
-    crawler = HttpCrawler(request_provider=request_queue)
+    crawler = HttpCrawler(request_manager=request_queue)
 
     # Define the default request handler, which will be called for every request.
     @crawler.router.default_handler
 
@@ -0,0 +1,23 @@
+import asyncio
+
+from crawlee.parsel_crawler import ParselCrawler, ParselCrawlingContext
+from crawlee.request_loaders import RequestList
+
+
+async def main() -> None:
+    # Create a static request list
+    request_list = RequestList(['https://crawlee.dev', 'https://apify.com'])
+
+    crawler = ParselCrawler(
+        # Requests from the list will be processed first, but they will be enqueued in the default request queue first
+        request_manager=await request_list.to_tandem(),
+    )
+
+    @crawler.router.default_handler
+    async def handler(context: ParselCrawlingContext) -> None:
+        await context.enqueue_links()  # New links will be enqueued directly to the queue
+
+    await crawler.run()
+
+
+asyncio.run(main())
@@ -0,0 +1,27 @@
+import asyncio
+
+from crawlee.parsel_crawler import ParselCrawler, ParselCrawlingContext
+from crawlee.request_loaders import RequestList, RequestManagerTandem
+from crawlee.storages import RequestQueue
+
+
+async def main() -> None:
+    # Create a static request list
+    request_list = RequestList(['https://crawlee.dev', 'https://apify.com'])
+
+    # Open the default request queue
+    request_queue = await RequestQueue.open()
+
+    crawler = ParselCrawler(
+        # Requests from the list will be processed first, but they will be enqueued in the default request queue first
+        request_manager=RequestManagerTandem(request_list, request_queue),
+    )
+
+    @crawler.router.default_handler
+    async def handler(context: ParselCrawlingContext) -> None:
+        await context.enqueue_links()  # New links will be enqueued directly to the queue
+
+    await crawler.run()
+
+
+asyncio.run(main())
@@ -18,12 +18,14 @@ import RsHelperAddRequestsExample from '!!raw-loader!./code/request_storage/help
 import RsHelperEnqueueLinksExample from '!!raw-loader!./code/request_storage/helper_enqueue_links_example.py';
 import RsDoNotPurgeExample from '!!raw-loader!./code/request_storage/do_not_purge_example.py';
 import RsPurgeExplicitlyExample from '!!raw-loader!./code/request_storage/purge_explicitly_example.py';
+import TandemExample from '!!raw-loader!./code/request_storage/tandem_example.py';
+import ExplicitTandemExample from '!!raw-loader!./code/request_storage/tandem_example_explicit.py';
 
 This guide explains the different types of request storage available in Crawlee, how to store the requests that your crawler will process, and which storage type to choose based on your needs.
 
 ## Introduction
 
-All request storage types in Crawlee implement the same interface - <ApiLink to="class/RequestProvider">`RequestProvider`</ApiLink>. This unified interface allows them to be used in a consistent manner, regardless of the storage backend. The request providers are managed by storage clients - subclasses of <ApiLink to="class/BaseStorageClient">`BaseStorageClient`</ApiLink>. For instance, <ApiLink to="class/MemoryStorageClient">`MemoryStorageClient`</ApiLink> stores data in memory while it can also offload them to the local directory. Data are stored in the following directory structure:
+All request storage types in Crawlee implement the same interface - <ApiLink to="class/RequestManager">`RequestManager`</ApiLink>. This unified interface allows them to be used in a consistent manner, regardless of the storage backend. The request providers are managed by storage clients - subclasses of <ApiLink to="class/BaseStorageClient">`BaseStorageClient`</ApiLink>. For instance, <ApiLink to="class/MemoryStorageClient">`MemoryStorageClient`</ApiLink> stores data in memory while it can also offload them to the local directory. Data are stored in the following directory structure:
 
 ```text
 {CRAWLEE_STORAGE_DIR}/{request_provider}/{QUEUE_ID}/
@@ -95,6 +97,28 @@ TODO: write this section, once https://github.com/apify/crawlee-python/issues/99
 
 */}
 
+## Processing requests from multiple sources
+
+In some cases, you might need to combine requests from multiple sources, most frequently from a static list of URLs (such as <ApiLink to="class/RequestList">`RequestList`</ApiLink>) and a <ApiLink to="class/RequestQueue">`RequestQueue`</ApiLink>, where the queue takes care of persistence and retrying failed requests.
+
+This use case is supported via the <ApiLink to="class/RequestManagerTandem">`RequestManagerTandem`</ApiLink> class. You may also use the <ApiLink to="class/RequestLoader#to_tandem">`RequestLoader.to_tandem`</ApiLink> method as a shortcut.
+
+<CodeBlock className="language-python">
+    {TandemExample}
+</CodeBlock>
+<Tabs groupId="request_manager_tandem">
+    <TabItem value="request_manager_tandem_helper" label="Using to_tandem helper" default>
+        <CodeBlock className="language-python">
+            {TandemExample}
+        </CodeBlock>
+    </TabItem>
+    <TabItem value="request_manager_tandem_explicit" label="Explicitly using RequestManagerTandem">
+        <CodeBlock className="language-python">
+            {ExplicitTandemExample}
+        </CodeBlock>
+    </TabItem>
+</Tabs>
+
 ## Request-related helpers
 
 We offer several helper functions to simplify interactions with request storages:
 
@@ -12,7 +12,7 @@ async def main() -> None:
     # And then you add one or more requests to it.
     await rq.add_request('https://crawlee.dev')
 
-    crawler = BeautifulSoupCrawler(request_provider=rq)
+    crawler = BeautifulSoupCrawler(request_manager=rq)
 
     # Define a request handler and attach it to the crawler using the decorator.
     @crawler.router.default_handler
 
@@ -26,6 +26,17 @@ This section summarizes the breaking changes between v0.4.x and v0.5.0.
 
 - Removed properties `json_` and `order_no`.
 
+### Request storages and loaders
+
+- The `request_provider` parameter of `BasicCrawler.__init__` has been renamed to `request_manager`
+- The `BasicCrawler.get_request_provider` method has been renamed to `BasicCrawler.get_request_manager` and it does not accept the `id` and `name` arguments anymore
+    - If using a specific request queue is desired, pass it as the `request_manager` on `BasicCrawler` creation
+- The `RequestProvider` interface has been renamed to `RequestManager` and moved to the `crawlee.request_loaders` package
+- `RequestList` has been moved to the `crawlee.request_loaders` package
+- `RequestList` does not support `.drop()`, `.reclaim_request()`, `.add_request()` and `add_requests_batched()` anymore
+    - It implements the new `RequestLoader` interface instead of `RequestManager`
+    - `RequestManagerTandem` with a `RequestQueue` should be used to enable passing a `RequestList` (or any other `RequestLoader` implementation) as a `request_manager`, `await list.to_tandem()` can be used as a shortcut
+
 ### PlaywrightCrawler
 
 - The `PlaywrightPreNavigationContext` was renamed to `PlaywrightPreNavCrawlingContext`.