From 08348f279a72782b4716c17cf953122ed5bdc4f5 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Thu, 28 Aug 2025 10:49:48 +0200 Subject: [PATCH] feat: scrape prices as cents, avoid Decimal This makes the code simpler and the lessons are closer to their JavaScript counterparts. Also the real world practice uses cents, e.g. the Stripe API and others. --- .../07_extracting_data.md | 27 +++++++--- .../scraping_basics_python/08_saving_data.md | 48 ++++------------- .../09_getting_links.md | 51 +++++++------------ .../scraping_basics_python/10_crawling.md | 19 +++---- .../11_scraping_variants.md | 41 +++++++-------- .../scraping_basics_python/12_framework.md | 21 ++++---- .../scraping_basics_python/13_platform.md | 8 +-- 7 files changed, 90 insertions(+), 125 deletions(-) diff --git a/sources/academy/webscraping/scraping_basics_python/07_extracting_data.md b/sources/academy/webscraping/scraping_basics_python/07_extracting_data.md index 32290850ec..47023acd0c 100644 --- a/sources/academy/webscraping/scraping_basics_python/07_extracting_data.md +++ b/sources/academy/webscraping/scraping_basics_python/07_extracting_data.md @@ -159,12 +159,26 @@ Great! Only if we didn't overlook an important pitfall called [floating-point er 0.30000000000000004 ``` -These errors are small and usually don't matter, but sometimes they can add up and cause unpleasant discrepancies. That's why it's typically best to avoid floating point numbers when working with money. Let's instead use Python's built-in [`Decimal()`](https://docs.python.org/3/library/decimal.html) type: +These errors are small and usually don't matter, but sometimes they can add up and cause unpleasant discrepancies. That's why it's typically best to avoid floating point numbers when working with money. We won't store dollars, but cents: + +```py +price_text = ( + product + .select_one(".price") + .contents[-1] + .strip() + .replace("$", "") +# highlight-next-line + .replace(".", "") + .replace(",", "") +) +``` + +In this case, removing the dot from the price text is the same as if we multiplied all the numbers with 100, effectively converting dollars to cents. For converting the text to a number we'll use `int()` instead of `float()`. This is how the whole program looks like now: ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal url = "https://warehouse-theme-metal.myshopify.com/collections/sales" response = httpx.get(url) @@ -182,13 +196,14 @@ for product in soup.select(".product-item"): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) if price_text.startswith("From "): - min_price = Decimal(price_text.removeprefix("From ")) + min_price = int(price_text.removeprefix("From ")) price = None else: - min_price = Decimal(price_text) + min_price = int(price_text) price = min_price print(title, min_price, price, sep=" | ") @@ -198,8 +213,8 @@ If we run the code above, we have nice, clean data about all the products! ```text $ python main.py -JBL Flip 4 Waterproof Portable Bluetooth Speaker | 74.95 | 74.95 -Sony XBR-950G BRAVIA 4K HDR Ultra HD TV | 1398.00 | None +JBL Flip 4 Waterproof Portable Bluetooth Speaker | 7495 | 7495 +Sony XBR-950G BRAVIA 4K HDR Ultra HD TV | 139800 | None ... ``` diff --git a/sources/academy/webscraping/scraping_basics_python/08_saving_data.md b/sources/academy/webscraping/scraping_basics_python/08_saving_data.md index 6567e24efa..f75464fc37 100644 --- a/sources/academy/webscraping/scraping_basics_python/08_saving_data.md +++ b/sources/academy/webscraping/scraping_basics_python/08_saving_data.md @@ -29,7 +29,6 @@ Producing results line by line is an efficient approach to handling large datase ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal url = "https://warehouse-theme-metal.myshopify.com/collections/sales" response = httpx.get(url) @@ -49,13 +48,14 @@ for product in soup.select(".product-item"): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) if price_text.startswith("From "): - min_price = Decimal(price_text.removeprefix("From ")) + min_price = int(price_text.removeprefix("From ")) price = None else: - min_price = Decimal(price_text) + min_price = int(price_text) price = min_price # highlight-next-line @@ -69,7 +69,7 @@ Before looping over the products, we prepare an empty list. Then, instead of pri ```text $ python main.py -[{'title': 'JBL Flip 4 Waterproof Portable Bluetooth Speaker', 'min_price': Decimal('74.95'), 'price': Decimal('74.95')}, {'title': 'Sony XBR-950G BRAVIA 4K HDR Ultra HD TV', 'min_price': Decimal('1398.00'), 'price': None}, ...] +[{'title': 'JBL Flip 4 Waterproof Portable Bluetooth Speaker', 'min_price': 7495, 'price': 7495}, {'title': 'Sony XBR-950G BRAVIA 4K HDR Ultra HD TV', 'min_price': 139800, 'price': None}, ...] ``` :::tip Pretty print @@ -87,7 +87,6 @@ In Python, we can read and write JSON using the [`json`](https://docs.python.org ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal # highlight-next-line import json ``` @@ -99,39 +98,17 @@ with open("products.json", "w") as file: json.dump(data, file) ``` -That's it! If we run the program now, it should also create a `products.json` file in the current working directory: - -```text -$ python main.py -Traceback (most recent call last): - ... - raise TypeError(f'Object of type {o.__class__.__name__} ' -TypeError: Object of type Decimal is not JSON serializable -``` - -Ouch! JSON supports integers and floating-point numbers, but there's no guidance on how to handle `Decimal`. To maintain precision, it's common to store monetary values as strings in JSON files. But this is a convention, not a standard, so we need to handle it manually. We'll pass a custom function to `json.dump()` to serialize objects that it can't handle directly: - -```py -def serialize(obj): - if isinstance(obj, Decimal): - return str(obj) - raise TypeError("Object not JSON serializable") - -with open("products.json", "w") as file: - json.dump(data, file, default=serialize) -``` - -If we run our scraper now, it won't display any output, but it will create a `products.json` file in the current working directory, which contains all the data about the listed products: +That's it! If we run our scraper now, it won't display any output, but it will create a `products.json` file in the current working directory, which contains all the data about the listed products: ```json title=products.json -[{"title": "JBL Flip 4 Waterproof Portable Bluetooth Speaker", "min_price": "74.95", "price": "74.95"}, {"title": "Sony XBR-950G BRAVIA 4K HDR Ultra HD TV", "min_price": "1398.00", "price": null}, ...] +[{"title": "JBL Flip 4 Waterproof Portable Bluetooth Speaker", "min_price": "7495", "price": "7495"}, {"title": "Sony XBR-950G BRAVIA 4K HDR Ultra HD TV", "min_price": "139800", "price": null}, ...] ``` If you skim through the data, you'll notice that the `json.dump()` function handled some potential issues, such as escaping double quotes found in one of the titles by adding a backslash: ```json -{"title": "Sony SACS9 10\" Active Subwoofer", "min_price": "158.00", "price": "158.00"} +{"title": "Sony SACS9 10\" Active Subwoofer", "min_price": "15800", "price": "15800"} ``` :::tip Pretty JSON @@ -177,7 +154,6 @@ Now that's nice, but we didn't want Alice, Bob, kickbox, or TypeScript. What we ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal import json # highlight-next-line import csv @@ -186,13 +162,8 @@ import csv Next, let's add one more data export to end of the source code of our scraper: ```py -def serialize(obj): - if isinstance(obj, Decimal): - return str(obj) - raise TypeError("Object not JSON serializable") - with open("products.json", "w") as file: - json.dump(data, file, default=serialize) + json.dump(data, file) with open("products.csv", "w") as file: writer = csv.DictWriter(file, fieldnames=["title", "min_price", "price"]) @@ -223,13 +194,12 @@ Write a new Python program that reads `products.json`, finds all products with a ```py import json from pprint import pp - from decimal import Decimal with open("products.json", "r") as file: products = json.load(file) for product in products: - if Decimal(product["min_price"]) > 500: + if int(product["min_price"]) > 500: pp(product) ``` diff --git a/sources/academy/webscraping/scraping_basics_python/09_getting_links.md b/sources/academy/webscraping/scraping_basics_python/09_getting_links.md index 483958c221..79a39738e0 100644 --- a/sources/academy/webscraping/scraping_basics_python/09_getting_links.md +++ b/sources/academy/webscraping/scraping_basics_python/09_getting_links.md @@ -33,7 +33,6 @@ Over the course of the previous lessons, the code of our program grew to almost ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal import json import csv @@ -54,24 +53,20 @@ for product in soup.select(".product-item"): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) if price_text.startswith("From "): - min_price = Decimal(price_text.removeprefix("From ")) + min_price = int(price_text.removeprefix("From ")) price = None else: - min_price = Decimal(price_text) + min_price = int(price_text) price = min_price data.append({"title": title, "min_price": min_price, "price": price}) -def serialize(obj): - if isinstance(obj, Decimal): - return str(obj) - raise TypeError("Object not JSON serializable") - with open("products.json", "w") as file: - json.dump(data, file, default=serialize) + json.dump(data, file) with open("products.csv", "w") as file: writer = csv.DictWriter(file, fieldnames=["title", "min_price", "price"]) @@ -103,13 +98,14 @@ def parse_product(product): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) if price_text.startswith("From "): - min_price = Decimal(price_text.removeprefix("From ")) + min_price = int(price_text.removeprefix("From ")) price = None else: - min_price = Decimal(price_text) + min_price = int(price_text) price = min_price return {"title": title, "min_price": min_price, "price": price} @@ -119,13 +115,8 @@ Now the JSON export. For better readability of it, let's make a small change her ```py def export_json(file, data): - def serialize(obj): - if isinstance(obj, Decimal): - return str(obj) - raise TypeError("Object not JSON serializable") - # highlight-next-line - json.dump(data, file, default=serialize, indent=2) + json.dump(data, file, indent=2) ``` The last function we'll add will take care of the CSV export. We'll make a small change here as well. Having to specify the field names is not ideal. What if we add more field names in the parsing function? We'd always have to remember to go and edit the export function as well. If we could figure out the field names in place, we'd remove this dependency. One way would be to infer the field names from the dictionary keys of the first row: @@ -151,7 +142,6 @@ Now let's put it all together: ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal import json import csv @@ -171,24 +161,20 @@ def parse_product(product): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) if price_text.startswith("From "): - min_price = Decimal(price_text.removeprefix("From ")) + min_price = int(price_text.removeprefix("From ")) price = None else: - min_price = Decimal(price_text) + min_price = int(price_text) price = min_price return {"title": title, "min_price": min_price, "price": price} def export_json(file, data): - def serialize(obj): - if isinstance(obj, Decimal): - return str(obj) - raise TypeError("Object not JSON serializable") - - json.dump(data, file, default=serialize, indent=2) + json.dump(data, file, indent=2) def export_csv(file, data): fieldnames = list(data[0].keys()) @@ -254,13 +240,13 @@ In the previous code example, we've also added the URL to the dictionary returne [ { "title": "JBL Flip 4 Waterproof Portable Bluetooth Speaker", - "min_price": "74.95", - "price": "74.95", + "min_price": "7495", + "price": "7495", "url": "/products/jbl-flip-4-waterproof-portable-bluetooth-speaker" }, { "title": "Sony XBR-950G BRAVIA 4K HDR Ultra HD TV", - "min_price": "1398.00", + "min_price": "139800", "price": null, "url": "/products/sony-xbr-65x950g-65-class-64-5-diag-bravia-4k-hdr-ultra-hd-tv" }, @@ -277,7 +263,6 @@ Browsers reading the HTML know the base address and automatically resolve such l ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal import json import csv # highlight-next-line @@ -319,13 +304,13 @@ When we run the scraper now, we should see full URLs in our exports: [ { "title": "JBL Flip 4 Waterproof Portable Bluetooth Speaker", - "min_price": "74.95", - "price": "74.95", + "min_price": "7495", + "price": "7495", "url": "https://warehouse-theme-metal.myshopify.com/products/jbl-flip-4-waterproof-portable-bluetooth-speaker" }, { "title": "Sony XBR-950G BRAVIA 4K HDR Ultra HD TV", - "min_price": "1398.00", + "min_price": "139800", "price": null, "url": "https://warehouse-theme-metal.myshopify.com/products/sony-xbr-65x950g-65-class-64-5-diag-bravia-4k-hdr-ultra-hd-tv" }, diff --git a/sources/academy/webscraping/scraping_basics_python/10_crawling.md b/sources/academy/webscraping/scraping_basics_python/10_crawling.md index dc4d8cee26..5d7d0f993a 100644 --- a/sources/academy/webscraping/scraping_basics_python/10_crawling.md +++ b/sources/academy/webscraping/scraping_basics_python/10_crawling.md @@ -18,7 +18,6 @@ Thanks to the refactoring, we have functions ready for each of the tasks, so we ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal import json import csv from urllib.parse import urljoin @@ -41,24 +40,20 @@ def parse_product(product, base_url): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) if price_text.startswith("From "): - min_price = Decimal(price_text.removeprefix("From ")) + min_price = int(price_text.removeprefix("From ")) price = None else: - min_price = Decimal(price_text) + min_price = int(price_text) price = min_price return {"title": title, "min_price": min_price, "price": price, "url": url} def export_json(file, data): - def serialize(obj): - if isinstance(obj, Decimal): - return str(obj) - raise TypeError("Object not JSON serializable") - - json.dump(data, file, default=serialize, indent=2) + json.dump(data, file, indent=2) def export_csv(file, data): fieldnames = list(data[0].keys()) @@ -159,14 +154,14 @@ If we run the program now, it'll take longer to finish since it's making 24 more [ { "title": "JBL Flip 4 Waterproof Portable Bluetooth Speaker", - "min_price": "74.95", - "price": "74.95", + "min_price": "7495", + "price": "7495", "url": "https://warehouse-theme-metal.myshopify.com/products/jbl-flip-4-waterproof-portable-bluetooth-speaker", "vendor": "JBL" }, { "title": "Sony XBR-950G BRAVIA 4K HDR Ultra HD TV", - "min_price": "1398.00", + "min_price": "139800", "price": null, "url": "https://warehouse-theme-metal.myshopify.com/products/sony-xbr-65x950g-65-class-64-5-diag-bravia-4k-hdr-ultra-hd-tv", "vendor": "Sony" diff --git a/sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md b/sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md index 2d8b9e8226..2b38004673 100644 --- a/sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md +++ b/sources/academy/webscraping/scraping_basics_python/11_scraping_variants.md @@ -113,8 +113,8 @@ If we run the program now, we'll see 34 items in total. Some items don't have va { "variant_name": null, "title": "Klipsch R-120SW Powerful Detailed Home Speaker - Unit", - "min_price": "324.00", - "price": "324.00", + "min_price": "32400", + "price": "32400", "url": "https://warehouse-theme-metal.myshopify.com/products/klipsch-r-120sw-powerful-detailed-home-speaker-set-of-1", "vendor": "Klipsch" }, @@ -131,7 +131,7 @@ Some products will break into several items, each with a different variant name. { "variant_name": "Red - $178.00", "title": "Sony XB-950B1 Extra Bass Wireless Headphones with App Control", - "min_price": "128.00", + "min_price": "12800", "price": null, "url": "https://warehouse-theme-metal.myshopify.com/products/sony-xb950-extra-bass-wireless-headphones-with-app-control", "vendor": "Sony" @@ -139,7 +139,7 @@ Some products will break into several items, each with a different variant name. { "variant_name": "Black - $178.00", "title": "Sony XB-950B1 Extra Bass Wireless Headphones with App Control", - "min_price": "128.00", + "min_price": "12800", "price": null, "url": "https://warehouse-theme-metal.myshopify.com/products/sony-xb950-extra-bass-wireless-headphones-with-app-control", "vendor": "Sony" @@ -157,8 +157,8 @@ Perhaps surprisingly, some products with variants will have the price field set. { "variant_name": "Red - $74.95", "title": "JBL Flip 4 Waterproof Portable Bluetooth Speaker", - "min_price": "74.95", - "price": "74.95", + "min_price": "7495", + "price": "7495", "url": "https://warehouse-theme-metal.myshopify.com/products/jbl-flip-4-waterproof-portable-bluetooth-speaker", "vendor": "JBL" }, @@ -174,15 +174,16 @@ The items now contain the variant as text, which is good for a start, but we wan def parse_variant(variant): text = variant.text.strip() name, price_text = text.split(" - ") - price = Decimal( + price = int( price_text .replace("$", "") + .replace(".", "") .replace(",", "") ) return {"variant_name": name, "price": price} ``` -First, we split the text into two parts, then we parse the price as a decimal number. This part is similar to what we already do for parsing product listing prices. The function returns a dictionary we can merge with `item`. +First, we split the text into two parts, then we parse the price as a number. This part is similar to what we already do for parsing product listing prices. The function returns a dictionary we can merge with `item`. ## Saving price @@ -191,7 +192,6 @@ Now, if we use our new function, we should finally get a program that can scrape ```py import httpx from bs4 import BeautifulSoup -from decimal import Decimal import json import csv from urllib.parse import urljoin @@ -214,13 +214,14 @@ def parse_product(product, base_url): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) if price_text.startswith("From "): - min_price = Decimal(price_text.removeprefix("From ")) + min_price = int(price_text.removeprefix("From ")) price = None else: - min_price = Decimal(price_text) + min_price = int(price_text) price = min_price return {"title": title, "min_price": min_price, "price": price, "url": url} @@ -228,20 +229,16 @@ def parse_product(product, base_url): def parse_variant(variant): text = variant.text.strip() name, price_text = text.split(" - ") - price = Decimal( + price = int( price_text .replace("$", "") + .replace(".", "") .replace(",", "") ) return {"variant_name": name, "price": price} def export_json(file, data): - def serialize(obj): - if isinstance(obj, Decimal): - return str(obj) - raise TypeError("Object not JSON serializable") - - json.dump(data, file, default=serialize, indent=2) + json.dump(data, file, indent=2) def export_csv(file, data): fieldnames = list(data[0].keys()) @@ -283,16 +280,16 @@ Let's run the scraper and see if all the items in the data contain prices: { "variant_name": "Red", "title": "Sony XB-950B1 Extra Bass Wireless Headphones with App Control", - "min_price": "128.00", - "price": "178.00", + "min_price": "12800", + "price": "17800", "url": "https://warehouse-theme-metal.myshopify.com/products/sony-xb950-extra-bass-wireless-headphones-with-app-control", "vendor": "Sony" }, { "variant_name": "Black", "title": "Sony XB-950B1 Extra Bass Wireless Headphones with App Control", - "min_price": "128.00", - "price": "178.00", + "min_price": "12800", + "price": "17800", "url": "https://warehouse-theme-metal.myshopify.com/products/sony-xb950-extra-bass-wireless-headphones-with-app-control", "vendor": "Sony" }, diff --git a/sources/academy/webscraping/scraping_basics_python/12_framework.md b/sources/academy/webscraping/scraping_basics_python/12_framework.md index c8b5f64685..dccd5c5d1b 100644 --- a/sources/academy/webscraping/scraping_basics_python/12_framework.md +++ b/sources/academy/webscraping/scraping_basics_python/12_framework.md @@ -207,9 +207,9 @@ The code above assumes the `.select_one()` call doesn't return `None`. If your e ::: -Now for the price. We're not doing anything new here—just import `Decimal` and copy-paste the code from our old scraper. +Now for the price. We're not doing anything new here—just copy-paste the code from our old scraper. The only change will be in the selector. -The only change will be in the selector. In `main.py`, we looked for `.price` within a `product_soup` object representing a product card. Now, we're looking for `.price` within the entire product detail page. It's better to be more specific so we don't accidentally match another price on the same page: +In `main.py`, we looked for `.price` within a `product_soup` object representing a product card. Now, we're looking for `.price` within the entire product detail page. It's better to be more specific so we don't accidentally match another price on the same page: ```py async def main(): @@ -224,13 +224,14 @@ async def main(): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) item = { "url": context.request.url, "title": context.soup.select_one(".product-meta__title").text.strip(), "vendor": context.soup.select_one(".product-meta__vendor").text.strip(), - "price": Decimal(price_text), + "price": int(price_text), } print(item) ``` @@ -239,7 +240,6 @@ Finally, the variants. We can reuse the `parse_variant()` function as-is, and in ```py import asyncio -from decimal import Decimal from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext async def main(): @@ -257,13 +257,14 @@ async def main(): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) item = { "url": context.request.url, "title": context.soup.select_one(".product-meta__title").text.strip(), "vendor": context.soup.select_one(".product-meta__vendor").text.strip(), - "price": Decimal(price_text), + "price": int(price_text), "variant_name": None, } if variants := context.soup.select(".product-form__option.no-js option"): @@ -277,9 +278,10 @@ async def main(): def parse_variant(variant): text = variant.text.strip() name, price_text = text.split(" - ") - price = Decimal( + price = int( price_text .replace("$", "") + .replace(".", "") .replace(",", "") ) return {"variant_name": name, "price": price} @@ -342,7 +344,6 @@ Crawlee gives us stats about HTTP requests and concurrency, but we don't get muc ```py import asyncio -from decimal import Decimal from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext async def main(): @@ -364,13 +365,14 @@ async def main(): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) item = { "url": context.request.url, "title": context.soup.select_one(".product-meta__title").text.strip(), "vendor": context.soup.select_one(".product-meta__vendor").text.strip(), - "price": Decimal(price_text), + "price": int(price_text), "variant_name": None, } if variants := context.soup.select(".product-form__option.no-js option"): @@ -393,9 +395,10 @@ async def main(): def parse_variant(variant): text = variant.text.strip() name, price_text = text.split(" - ") - price = Decimal( + price = int( price_text .replace("$", "") + .replace(".", "") .replace(",", "") ) return {"variant_name": name, "price": price} diff --git a/sources/academy/webscraping/scraping_basics_python/13_platform.md b/sources/academy/webscraping/scraping_basics_python/13_platform.md index d039540a4d..44fedf4cb3 100644 --- a/sources/academy/webscraping/scraping_basics_python/13_platform.md +++ b/sources/academy/webscraping/scraping_basics_python/13_platform.md @@ -90,7 +90,6 @@ We'll now adjust the template so that it runs our program for watching prices. A ```py title=warehouse-watchdog/src/crawler.py import asyncio -from decimal import Decimal from crawlee.crawlers import BeautifulSoupCrawler async def main(): @@ -110,13 +109,14 @@ async def main(): .contents[-1] .strip() .replace("$", "") + .replace(".", "") .replace(",", "") ) item = { "url": context.request.url, "title": context.soup.select_one(".product-meta__title").text.strip(), "vendor": context.soup.select_one(".product-meta__vendor").text.strip(), - "price": Decimal(price_text), + "price": int(price_text), "variant_name": None, } if variants := context.soup.select(".product-form__option.no-js option"): @@ -136,9 +136,10 @@ async def main(): def parse_variant(variant): text = variant.text.strip() name, price_text = text.split(" - ") - price = Decimal( + price = int( price_text .replace("$", "") + .replace(".", "") .replace(",", "") ) return {"variant_name": name, "price": price} @@ -300,7 +301,6 @@ Next, we'll add `proxy_config` as an optional parameter in `warehouse-watchdog/s ```py title=warehouse-watchdog/src/crawler.py import asyncio -from decimal import Decimal from crawlee.crawlers import BeautifulSoupCrawler # highlight-next-line