From 208dc16dc3b88b8357efa86ce6cfe171ba53e49a Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 14:56:14 +0200 Subject: [PATCH 1/9] refactor: put old course to legacy folder, put new course to the JS folder --- .../01_devtools_inspecting.md | 0 .../02_devtools_locating_elements.md | 0 .../03_devtools_extracting_data.md | 0 .../04_downloading_html.md | 0 .../05_parsing_html.md | 0 .../06_locating_elements.md | 0 .../07_extracting_data.md | 0 .../08_saving_data.md | 0 .../09_getting_links.md | 0 .../10_crawling.md | 0 .../11_scraping_variants.md | 0 .../12_framework.md | 0 .../13_platform.md | 0 .../_exercises.mdx | 0 .../images | 0 .../scraping_basics_javascript/index.md | 83 ++++++++---------- .../scraping_basics_javascript2/index.md | 69 --------------- .../best_practices.md | 0 .../challenge/images/crawlee-create.png | Bin .../challenge/images/offers-page.jpg | Bin .../challenge/images/view-offers-button.jpg | Bin .../challenge/index.md | 0 .../challenge/initializing_and_setting_up.md | 0 .../challenge/modularity.md | 0 .../challenge/scraping_amazon.md | 0 .../crawling/exporting_data.md | 0 .../crawling/filtering_links.md | 0 .../crawling/finding_links.js | 0 .../crawling/finding_links.md | 0 .../crawling/first_crawl.md | 0 .../crawling/headless_browser.md | 0 .../images/filtering-product-detail-link.png | Bin .../images/filtering-product-urls.png | Bin .../crawling/images/filtering-regex-urls.png | Bin .../crawling/images/headless-dynamic-data.png | Bin .../crawling/images/scraping-title.png | Bin .../crawling/images/warehouse-links.png | Bin .../crawling/images/warehouse-store.png | Bin .../crawling/index.md | 0 .../crawling/pro_scraping.md | 0 .../crawling/recap_extraction_basics.md | 0 .../crawling/relative_urls.md | 0 .../crawling/scraping_the_data.md | 0 .../data_extraction/browser_devtools.md | 0 .../data_extraction/computer_preparation.md | 0 .../data_extraction/devtools_continued.md | 0 .../browser-devtools-console-commands.png | Bin .../images/browser-devtools-console.png | Bin .../browser-devtools-element-selection.png | Bin .../images/browser-devtools-elements-tab.png | Bin .../images/browser-devtools-hover.png | Bin .../images/browser-devtools-wikipedia.png | Bin .../images/csv-data-in-sheets.png | Bin .../images/devtools-clean-price.png | Bin .../images/devtools-cleaning-noise.png | Bin .../images/devtools-collection-class.png | Bin .../devtools-collection-product-hover.png | Bin .../devtools-collection-product-name.png | Bin .../images/devtools-collection-query-all.png | Bin .../devtools-collection-query-hover.png | Bin .../images/devtools-collection-query.png | Bin .../images/devtools-collection-warehouse.png | Bin .../images/devtools-count-products.png | Bin .../images/devtools-extract-product-price.png | Bin .../images/devtools-extract-product-title.png | Bin .../images/devtools-find-child-elements.png | Bin .../images/devtools-print-all-products.png | Bin .../images/devtools-print-parent-text.png | Bin .../images/devtools-product-titles.png | Bin .../images/devtools-split-price.png | Bin .../images/node-scraper-title.png | Bin .../images/vscode-create-file.png | Bin .../images/vscode-hello-world.png | Bin .../images/vscode-npm-init.png | Bin .../images/vscode-open-folder.png | Bin .../images/vscode-open-terminal.png | Bin .../images/vscode-test-setup.png | Bin .../images/vscode-type-module.png | Bin .../data_extraction/index.md | 0 .../data_extraction/node_continued.md | 0 .../data_extraction/node_js_scraper.md | 0 .../data_extraction/project_setup.md | 0 .../data_extraction/save_to_csv.md | 0 .../data_extraction/using_devtools.md | 0 .../images/beginners-data-collection.png | Bin .../images/beginners-data-extraction.png | Bin .../index.md | 74 ++++++++++++++++ .../introduction.md | 0 88 files changed, 113 insertions(+), 113 deletions(-) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/01_devtools_inspecting.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/02_devtools_locating_elements.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/03_devtools_extracting_data.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/04_downloading_html.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/05_parsing_html.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/06_locating_elements.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/07_extracting_data.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/08_saving_data.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/09_getting_links.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/10_crawling.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/11_scraping_variants.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/12_framework.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/13_platform.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/_exercises.mdx (100%) rename sources/academy/webscraping/{scraping_basics_javascript2 => scraping_basics_javascript}/images (100%) delete mode 100644 sources/academy/webscraping/scraping_basics_javascript2/index.md rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/best_practices.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/challenge/images/crawlee-create.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/challenge/images/offers-page.jpg (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/challenge/images/view-offers-button.jpg (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/challenge/index.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/challenge/initializing_and_setting_up.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/challenge/modularity.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/challenge/scraping_amazon.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/exporting_data.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/filtering_links.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/finding_links.js (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/finding_links.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/first_crawl.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/headless_browser.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/images/filtering-product-detail-link.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/images/filtering-product-urls.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/images/filtering-regex-urls.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/images/headless-dynamic-data.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/images/scraping-title.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/images/warehouse-links.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/images/warehouse-store.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/index.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/pro_scraping.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/recap_extraction_basics.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/relative_urls.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/crawling/scraping_the_data.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/browser_devtools.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/computer_preparation.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/devtools_continued.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/browser-devtools-console-commands.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/browser-devtools-console.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/browser-devtools-element-selection.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/browser-devtools-elements-tab.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/browser-devtools-hover.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/browser-devtools-wikipedia.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/csv-data-in-sheets.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-clean-price.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-cleaning-noise.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-collection-class.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-collection-product-hover.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-collection-product-name.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-collection-query-all.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-collection-query-hover.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-collection-query.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-collection-warehouse.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-count-products.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-extract-product-price.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-extract-product-title.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-find-child-elements.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-print-all-products.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-print-parent-text.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-product-titles.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/devtools-split-price.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/node-scraper-title.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/vscode-create-file.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/vscode-hello-world.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/vscode-npm-init.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/vscode-open-folder.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/vscode-open-terminal.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/vscode-test-setup.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/images/vscode-type-module.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/index.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/node_continued.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/node_js_scraper.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/project_setup.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/save_to_csv.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/data_extraction/using_devtools.md (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/images/beginners-data-collection.png (100%) rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/images/beginners-data-extraction.png (100%) create mode 100644 sources/academy/webscraping/scraping_basics_legacy_javascript/index.md rename sources/academy/webscraping/{scraping_basics_javascript => scraping_basics_legacy_javascript}/introduction.md (100%) diff --git a/sources/academy/webscraping/scraping_basics_javascript2/01_devtools_inspecting.md b/sources/academy/webscraping/scraping_basics_javascript/01_devtools_inspecting.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/01_devtools_inspecting.md rename to sources/academy/webscraping/scraping_basics_javascript/01_devtools_inspecting.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/02_devtools_locating_elements.md b/sources/academy/webscraping/scraping_basics_javascript/02_devtools_locating_elements.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/02_devtools_locating_elements.md rename to sources/academy/webscraping/scraping_basics_javascript/02_devtools_locating_elements.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/03_devtools_extracting_data.md b/sources/academy/webscraping/scraping_basics_javascript/03_devtools_extracting_data.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/03_devtools_extracting_data.md rename to sources/academy/webscraping/scraping_basics_javascript/03_devtools_extracting_data.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md b/sources/academy/webscraping/scraping_basics_javascript/04_downloading_html.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/04_downloading_html.md rename to sources/academy/webscraping/scraping_basics_javascript/04_downloading_html.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md b/sources/academy/webscraping/scraping_basics_javascript/05_parsing_html.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/05_parsing_html.md rename to sources/academy/webscraping/scraping_basics_javascript/05_parsing_html.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md b/sources/academy/webscraping/scraping_basics_javascript/06_locating_elements.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/06_locating_elements.md rename to sources/academy/webscraping/scraping_basics_javascript/06_locating_elements.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md b/sources/academy/webscraping/scraping_basics_javascript/07_extracting_data.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/07_extracting_data.md rename to sources/academy/webscraping/scraping_basics_javascript/07_extracting_data.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md b/sources/academy/webscraping/scraping_basics_javascript/08_saving_data.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/08_saving_data.md rename to sources/academy/webscraping/scraping_basics_javascript/08_saving_data.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md b/sources/academy/webscraping/scraping_basics_javascript/09_getting_links.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/09_getting_links.md rename to sources/academy/webscraping/scraping_basics_javascript/09_getting_links.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md b/sources/academy/webscraping/scraping_basics_javascript/10_crawling.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/10_crawling.md rename to sources/academy/webscraping/scraping_basics_javascript/10_crawling.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/11_scraping_variants.md b/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/11_scraping_variants.md rename to sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/12_framework.md b/sources/academy/webscraping/scraping_basics_javascript/12_framework.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/12_framework.md rename to sources/academy/webscraping/scraping_basics_javascript/12_framework.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/13_platform.md b/sources/academy/webscraping/scraping_basics_javascript/13_platform.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/13_platform.md rename to sources/academy/webscraping/scraping_basics_javascript/13_platform.md diff --git a/sources/academy/webscraping/scraping_basics_javascript2/_exercises.mdx b/sources/academy/webscraping/scraping_basics_javascript/_exercises.mdx similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/_exercises.mdx rename to sources/academy/webscraping/scraping_basics_javascript/_exercises.mdx diff --git a/sources/academy/webscraping/scraping_basics_javascript2/images b/sources/academy/webscraping/scraping_basics_javascript/images similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript2/images rename to sources/academy/webscraping/scraping_basics_javascript/images diff --git a/sources/academy/webscraping/scraping_basics_javascript/index.md b/sources/academy/webscraping/scraping_basics_javascript/index.md index 064723fc3a..3751f05efb 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript/index.md @@ -1,74 +1,69 @@ --- title: Web scraping basics for JavaScript devs -description: Learn how to develop web scrapers with this comprehensive and practical course. Go from beginner to expert, all in one place. -sidebar_position: 1 +description: Learn how to use JavaScript to extract information from websites in this practical course, starting from the absolute basics. +sidebar_position: 1.5 category: web scraping & automation -slug: /web-scraping-for-beginners +slug: /scraping-basics-javascript2 +unlisted: true --- -# Web scraping basics for JavaScript devs {#welcome} +import DocCardList from '@theme/DocCardList'; -**Learn how to develop web scrapers with this comprehensive and practical course. Go from beginner to expert, all in one place.** +**Learn how to use JavaScript to extract information from websites in this practical course, starting from the absolute basics.** --- -Welcome to **Web scraping basics for JavaScript devs**, a comprehensive, practical and long form web scraping course that will take you from an absolute beginner to a successful web scraper developer. If you're looking for a quick start, we recommend trying [this tutorial](https://blog.apify.com/web-scraping-javascript-nodejs/) instead. +In this course we'll use JavaScript to create an application for watching prices. It'll be able to scrape all product pages of an e-commerce website and record prices. Data from several runs of such program would be useful for seeing trends in price changes, detecting discounts, etc. -This course is made by [Apify](https://apify.com), the web scraping and automation platform, but we will use only open-source technologies throughout all academy lessons. This means that the skills you learn will be applicable to any scraping project, and you'll be able to run your scrapers on any computer. No Apify account needed. +![E-commerce listing on the left, JSON with data on the right](./images/scraping.webp) -If you would like to learn about the Apify platform and how it can help you build, run and scale your web scraping and automation projects, see the [Apify platform course](../../platform/apify_platform.md), where we'll teach you all about Apify serverless infrastructure, proxies, API, scheduling, webhooks and much more. +## What we'll do -## Why learn scraper development? {#why-learn} +- Inspect pages using browser DevTools. +- Download web pages using the Fetch API. +- Extract data from web pages using the Cheerio library. +- Save extracted data in various formats (e.g. CSV which MS Excel or Google Sheets can open) using the json2csv library. +- Follow links programmatically (crawling). +- Save time and effort with frameworks, such as Crawlee, and scraping platforms, such as Apify. -With so many point-and-click tools and no-code software that can help you extract data from websites, what is the point of learning web scraper development? Contrary to what their marketing departments say, a point-and-click or no-code tool will never be as flexible, as powerful, or as optimized as a custom-built scraper. +## Who this course is for -Any software can do only what it was programmed to do. If you build your own scraper, it can do anything you want. And you can always quickly change it to do more, less, or the same, but faster or cheaper. The possibilities are endless once you know how scraping really works. +Anyone with basic knowledge of developing programs in JavaScript who wants to start with web scraping can take this course. The course does not expect you to have any prior knowledge of other web technologies or scraping. -Scraper development is a fun and challenging way to learn web development, web technologies, and understand the internet. You will reverse-engineer websites and understand how they work internally, what technologies they use and how they communicate with their servers. You will also master your chosen programming language and core programming concepts. When you truly understand web scraping, learning other technologies like React or Next.js will be a piece of cake. +## Requirements -## Course Summary {#summary} +- A macOS, Linux, or Windows machine with a web browser and Node.js installed. +- Familiarity with JavaScript basics: variables, conditions, loops, functions, strings, arrays, objects, files, classes, promises, imports, and exceptions. +- Comfort with building a Node.js package and installing dependencies with `npm`. +- Familiarity with running commands in Terminal (macOS/Linux) or Command Prompt (Windows). -When we set out to create the Academy, we wanted to build a complete guide to web scraping - a course that a beginner could use to create their first scraper, as well as a resource that professionals will continuously use to learn about advanced and niche web scraping techniques and technologies. All lessons include code examples and code-along exercises that you can use to immediately put your scraping skills into action. +## You may want to know -This is what you'll learn in the **Web scraping basics for JavaScript devs** course: +Let's explore the key reasons to take this course. What is web scraping good for, and what career opportunities does it enable for you? -* [Web scraping basics for JavaScript devs](./index.md) - * [Basics of data extraction](./data_extraction/index.md) - * [Basics of crawling](./crawling/index.md) - * [Best practices](./best_practices.md) +### Why learn scraping -## Requirements {#requirements} +The internet is full of useful data, but most of it isn't offered in a structured way that's easy to process programmatically. That's why you need scraping, a set of approaches to download websites and extract data from them. -You don't need to be a developer or a software engineer to complete this course, but basic programming knowledge is recommended. Don't be afraid, though. We explain everything in great detail in the course and provide external references that can help you level up your web scraping and web development skills. If you're new to programming, pay very close attention to the instructions and examples. A seemingly insignificant thing like using `[]` instead of `()` can make a lot of difference. +Scraper development is also a fun and challenging way to learn web development, web technologies, and understand the internet. You'll reverse-engineer websites, understand how they work internally, discover what technologies they use, and learn how they communicate with servers. You'll also master your chosen programming language and core programming concepts. Understanding web scraping gives you a head start in learning web technologies such as HTML, CSS, JavaScript, frontend frameworks (like React or Next.js), HTTP, REST APIs, GraphQL APIs, and more. -> If you don't already have basic programming knowledge and would like to be well-prepared for this course, we recommend learning about [JavaScript basics](https://developer.mozilla.org/en-US/curriculum/core/javascript-fundamentals/) and [CSS Selectors](https://developer.mozilla.org/en-US/docs/Learn/CSS/Building_blocks/Selectors). +### Why build your own scrapers -As you progress to the more advanced courses, the coding will get more challenging, but will still be manageable to a person with an intermediate level of programming skills. +Scrapers are programs specifically designed to mine data from the internet. Point-and-click or no-code scraping solutions do exist, but they only take you so far. While simple to use, they lack the flexibility and optimization needed to handle advanced cases. Only custom-built scrapers can tackle more difficult challenges. And unlike ready-made solutions, they can be fine-tuned to perform tasks more efficiently, at a lower cost, or with greater precision. -Ideally, you should have at least a moderate understanding of the following concepts: +### Why become a scraper dev -### JavaScript + Node.js {#javascript-and-node} +As a scraper developer, you are not limited by whether certain data is available programmatically through an official API—the entire web becomes your API! Here are some things you can do if you understand scraping: -It is recommended to understand at least the fundamentals of JavaScript and be proficient with Node.js prior to starting this course. If you are not yet comfortable with asynchronous programming (with promises and `async...await`), loops (and the different types of loops in JavaScript), modularity, or working with external packages, we would recommend studying the following resources before coming back and continuing this section: +- Improve your productivity by building personal tools, such as your own real estate or rare sneakers watchdog. +- Companies can hire you to build custom scrapers mining data important for their business. +- Become an invaluable asset to data journalism, data science, or nonprofit teams working to make the world a better place. +- You can publish your scrapers on platforms like the [Apify Store](https://apify.com/store) and earn money by renting them out to others. -* [`async...await` (YouTube)](https://www.youtube.com/watch?v=vn3tm0quoqE&ab_channel=Fireship) -* [JavaScript loops (MDN)](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Loops_and_iteration) -* [Modularity in Node.js](https://javascript.plainenglish.io/how-to-use-modular-patterns-in-nodejs-982f0e5c8f6e) +### Why learn with Apify -### General web development {#general-web-development} +We are [Apify](https://apify.com), a web scraping and automation platform. We do our best to build this course on top of open source technologies. That means what you learn applies to any scraping project, and you'll be able to run your scrapers on any computer. We will show you how a scraping platform can simplify your life, but that lesson is optional and designed to fit within our [free tier](https://apify.com/pricing). -Throughout the next lessons, we will sometimes use certain technologies and terms related to the web without explaining them. This is because their knowledge will be **assumed** (unless we're showing something out of the ordinary). +## Course content -* [HTML](https://developer.mozilla.org/en-US/docs/Web/HTML) -* [HTTP protocol](https://developer.mozilla.org/en-US/docs/Web/HTTP) -* [DevTools](./data_extraction/browser_devtools.md) - -### jQuery or Cheerio {#jquery-or-cheerio} - -We'll be using the [**Cheerio**](https://www.npmjs.com/package/cheerio) package a lot to parse data from HTML. This package provides an API using jQuery syntax to help traverse downloaded HTML within Node.js. - -## Next up {#next} - -The course begins with a small bit of theory and moves into some realistic and practical examples of extracting data from the most popular websites on the internet using your browser console. [Let's get to it!](./introduction.md) - -> If you already have experience with HTML, CSS, and browser DevTools, feel free to skip to the [Basics of crawling](./crawling/index.md) section. + diff --git a/sources/academy/webscraping/scraping_basics_javascript2/index.md b/sources/academy/webscraping/scraping_basics_javascript2/index.md deleted file mode 100644 index 3751f05efb..0000000000 --- a/sources/academy/webscraping/scraping_basics_javascript2/index.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -title: Web scraping basics for JavaScript devs -description: Learn how to use JavaScript to extract information from websites in this practical course, starting from the absolute basics. -sidebar_position: 1.5 -category: web scraping & automation -slug: /scraping-basics-javascript2 -unlisted: true ---- - -import DocCardList from '@theme/DocCardList'; - -**Learn how to use JavaScript to extract information from websites in this practical course, starting from the absolute basics.** - ---- - -In this course we'll use JavaScript to create an application for watching prices. It'll be able to scrape all product pages of an e-commerce website and record prices. Data from several runs of such program would be useful for seeing trends in price changes, detecting discounts, etc. - -![E-commerce listing on the left, JSON with data on the right](./images/scraping.webp) - -## What we'll do - -- Inspect pages using browser DevTools. -- Download web pages using the Fetch API. -- Extract data from web pages using the Cheerio library. -- Save extracted data in various formats (e.g. CSV which MS Excel or Google Sheets can open) using the json2csv library. -- Follow links programmatically (crawling). -- Save time and effort with frameworks, such as Crawlee, and scraping platforms, such as Apify. - -## Who this course is for - -Anyone with basic knowledge of developing programs in JavaScript who wants to start with web scraping can take this course. The course does not expect you to have any prior knowledge of other web technologies or scraping. - -## Requirements - -- A macOS, Linux, or Windows machine with a web browser and Node.js installed. -- Familiarity with JavaScript basics: variables, conditions, loops, functions, strings, arrays, objects, files, classes, promises, imports, and exceptions. -- Comfort with building a Node.js package and installing dependencies with `npm`. -- Familiarity with running commands in Terminal (macOS/Linux) or Command Prompt (Windows). - -## You may want to know - -Let's explore the key reasons to take this course. What is web scraping good for, and what career opportunities does it enable for you? - -### Why learn scraping - -The internet is full of useful data, but most of it isn't offered in a structured way that's easy to process programmatically. That's why you need scraping, a set of approaches to download websites and extract data from them. - -Scraper development is also a fun and challenging way to learn web development, web technologies, and understand the internet. You'll reverse-engineer websites, understand how they work internally, discover what technologies they use, and learn how they communicate with servers. You'll also master your chosen programming language and core programming concepts. Understanding web scraping gives you a head start in learning web technologies such as HTML, CSS, JavaScript, frontend frameworks (like React or Next.js), HTTP, REST APIs, GraphQL APIs, and more. - -### Why build your own scrapers - -Scrapers are programs specifically designed to mine data from the internet. Point-and-click or no-code scraping solutions do exist, but they only take you so far. While simple to use, they lack the flexibility and optimization needed to handle advanced cases. Only custom-built scrapers can tackle more difficult challenges. And unlike ready-made solutions, they can be fine-tuned to perform tasks more efficiently, at a lower cost, or with greater precision. - -### Why become a scraper dev - -As a scraper developer, you are not limited by whether certain data is available programmatically through an official API—the entire web becomes your API! Here are some things you can do if you understand scraping: - -- Improve your productivity by building personal tools, such as your own real estate or rare sneakers watchdog. -- Companies can hire you to build custom scrapers mining data important for their business. -- Become an invaluable asset to data journalism, data science, or nonprofit teams working to make the world a better place. -- You can publish your scrapers on platforms like the [Apify Store](https://apify.com/store) and earn money by renting them out to others. - -### Why learn with Apify - -We are [Apify](https://apify.com), a web scraping and automation platform. We do our best to build this course on top of open source technologies. That means what you learn applies to any scraping project, and you'll be able to run your scrapers on any computer. We will show you how a scraping platform can simplify your life, but that lesson is optional and designed to fit within our [free tier](https://apify.com/pricing). - -## Course content - - diff --git a/sources/academy/webscraping/scraping_basics_javascript/best_practices.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/best_practices.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/images/crawlee-create.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/images/crawlee-create.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/images/crawlee-create.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/images/crawlee-create.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/images/offers-page.jpg b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/images/offers-page.jpg similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/images/offers-page.jpg rename to sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/images/offers-page.jpg diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/images/view-offers-button.jpg b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/images/view-offers-button.jpg similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/images/view-offers-button.jpg rename to sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/images/view-offers-button.jpg diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/index.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/initializing_and_setting_up.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/initializing_and_setting_up.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/modularity.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/modularity.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/challenge/scraping_amazon.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/challenge/scraping_amazon.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/exporting_data.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/exporting_data.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/filtering_links.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/filtering_links.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.js b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.js similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.js rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.js diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/finding_links.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/first_crawl.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/first_crawl.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/headless_browser.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/headless_browser.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-product-detail-link.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/filtering-product-detail-link.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-product-detail-link.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/filtering-product-detail-link.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-product-urls.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/filtering-product-urls.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-product-urls.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/filtering-product-urls.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-regex-urls.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/filtering-regex-urls.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/filtering-regex-urls.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/filtering-regex-urls.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/headless-dynamic-data.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/headless-dynamic-data.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/headless-dynamic-data.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/headless-dynamic-data.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/scraping-title.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/scraping-title.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/scraping-title.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/scraping-title.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/warehouse-links.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/warehouse-links.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/warehouse-links.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/warehouse-links.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/images/warehouse-store.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/warehouse-store.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/images/warehouse-store.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/images/warehouse-store.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/index.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/pro_scraping.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/pro_scraping.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/recap_extraction_basics.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/recap_extraction_basics.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/relative_urls.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/relative_urls.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/crawling/scraping_the_data.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/crawling/scraping_the_data.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/browser_devtools.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/browser_devtools.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/computer_preparation.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/computer_preparation.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/devtools_continued.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/devtools_continued.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-console-commands.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-console-commands.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-console-commands.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-console-commands.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-console.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-console.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-console.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-console.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-element-selection.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-element-selection.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-element-selection.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-element-selection.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-elements-tab.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-elements-tab.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-elements-tab.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-elements-tab.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-hover.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-hover.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-hover.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-hover.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-wikipedia.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-wikipedia.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/browser-devtools-wikipedia.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/browser-devtools-wikipedia.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/csv-data-in-sheets.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/csv-data-in-sheets.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/csv-data-in-sheets.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/csv-data-in-sheets.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-clean-price.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-clean-price.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-clean-price.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-clean-price.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-cleaning-noise.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-cleaning-noise.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-cleaning-noise.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-cleaning-noise.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-class.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-class.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-class.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-class.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-product-hover.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-product-hover.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-product-hover.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-product-hover.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-product-name.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-product-name.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-product-name.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-product-name.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query-all.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-query-all.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query-all.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-query-all.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query-hover.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-query-hover.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query-hover.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-query-hover.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-query.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-query.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-query.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-warehouse.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-warehouse.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-collection-warehouse.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-collection-warehouse.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-count-products.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-count-products.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-count-products.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-count-products.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-extract-product-price.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-extract-product-price.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-extract-product-price.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-extract-product-price.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-extract-product-title.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-extract-product-title.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-extract-product-title.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-extract-product-title.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-find-child-elements.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-find-child-elements.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-find-child-elements.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-find-child-elements.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-print-all-products.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-print-all-products.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-print-all-products.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-print-all-products.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-print-parent-text.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-print-parent-text.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-print-parent-text.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-print-parent-text.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-product-titles.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-product-titles.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-product-titles.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-product-titles.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-split-price.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-split-price.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/devtools-split-price.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/devtools-split-price.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/node-scraper-title.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/node-scraper-title.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/node-scraper-title.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/node-scraper-title.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-create-file.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-create-file.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-create-file.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-create-file.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-hello-world.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-hello-world.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-hello-world.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-hello-world.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-npm-init.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-npm-init.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-npm-init.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-npm-init.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-open-folder.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-open-folder.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-open-folder.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-open-folder.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-open-terminal.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-open-terminal.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-open-terminal.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-open-terminal.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-test-setup.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-test-setup.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-test-setup.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-test-setup.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-type-module.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-type-module.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/images/vscode-type-module.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/images/vscode-type-module.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/index.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_continued.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_continued.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_js_scraper.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/node_js_scraper.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/project_setup.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/project_setup.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/save_to_csv.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/save_to_csv.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/data_extraction/using_devtools.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/data_extraction/using_devtools.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md diff --git a/sources/academy/webscraping/scraping_basics_javascript/images/beginners-data-collection.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/images/beginners-data-collection.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/images/beginners-data-collection.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/images/beginners-data-collection.png diff --git a/sources/academy/webscraping/scraping_basics_javascript/images/beginners-data-extraction.png b/sources/academy/webscraping/scraping_basics_legacy_javascript/images/beginners-data-extraction.png similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/images/beginners-data-extraction.png rename to sources/academy/webscraping/scraping_basics_legacy_javascript/images/beginners-data-extraction.png diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md new file mode 100644 index 0000000000..064723fc3a --- /dev/null +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md @@ -0,0 +1,74 @@ +--- +title: Web scraping basics for JavaScript devs +description: Learn how to develop web scrapers with this comprehensive and practical course. Go from beginner to expert, all in one place. +sidebar_position: 1 +category: web scraping & automation +slug: /web-scraping-for-beginners +--- + +# Web scraping basics for JavaScript devs {#welcome} + +**Learn how to develop web scrapers with this comprehensive and practical course. Go from beginner to expert, all in one place.** + +--- + +Welcome to **Web scraping basics for JavaScript devs**, a comprehensive, practical and long form web scraping course that will take you from an absolute beginner to a successful web scraper developer. If you're looking for a quick start, we recommend trying [this tutorial](https://blog.apify.com/web-scraping-javascript-nodejs/) instead. + +This course is made by [Apify](https://apify.com), the web scraping and automation platform, but we will use only open-source technologies throughout all academy lessons. This means that the skills you learn will be applicable to any scraping project, and you'll be able to run your scrapers on any computer. No Apify account needed. + +If you would like to learn about the Apify platform and how it can help you build, run and scale your web scraping and automation projects, see the [Apify platform course](../../platform/apify_platform.md), where we'll teach you all about Apify serverless infrastructure, proxies, API, scheduling, webhooks and much more. + +## Why learn scraper development? {#why-learn} + +With so many point-and-click tools and no-code software that can help you extract data from websites, what is the point of learning web scraper development? Contrary to what their marketing departments say, a point-and-click or no-code tool will never be as flexible, as powerful, or as optimized as a custom-built scraper. + +Any software can do only what it was programmed to do. If you build your own scraper, it can do anything you want. And you can always quickly change it to do more, less, or the same, but faster or cheaper. The possibilities are endless once you know how scraping really works. + +Scraper development is a fun and challenging way to learn web development, web technologies, and understand the internet. You will reverse-engineer websites and understand how they work internally, what technologies they use and how they communicate with their servers. You will also master your chosen programming language and core programming concepts. When you truly understand web scraping, learning other technologies like React or Next.js will be a piece of cake. + +## Course Summary {#summary} + +When we set out to create the Academy, we wanted to build a complete guide to web scraping - a course that a beginner could use to create their first scraper, as well as a resource that professionals will continuously use to learn about advanced and niche web scraping techniques and technologies. All lessons include code examples and code-along exercises that you can use to immediately put your scraping skills into action. + +This is what you'll learn in the **Web scraping basics for JavaScript devs** course: + +* [Web scraping basics for JavaScript devs](./index.md) + * [Basics of data extraction](./data_extraction/index.md) + * [Basics of crawling](./crawling/index.md) + * [Best practices](./best_practices.md) + +## Requirements {#requirements} + +You don't need to be a developer or a software engineer to complete this course, but basic programming knowledge is recommended. Don't be afraid, though. We explain everything in great detail in the course and provide external references that can help you level up your web scraping and web development skills. If you're new to programming, pay very close attention to the instructions and examples. A seemingly insignificant thing like using `[]` instead of `()` can make a lot of difference. + +> If you don't already have basic programming knowledge and would like to be well-prepared for this course, we recommend learning about [JavaScript basics](https://developer.mozilla.org/en-US/curriculum/core/javascript-fundamentals/) and [CSS Selectors](https://developer.mozilla.org/en-US/docs/Learn/CSS/Building_blocks/Selectors). + +As you progress to the more advanced courses, the coding will get more challenging, but will still be manageable to a person with an intermediate level of programming skills. + +Ideally, you should have at least a moderate understanding of the following concepts: + +### JavaScript + Node.js {#javascript-and-node} + +It is recommended to understand at least the fundamentals of JavaScript and be proficient with Node.js prior to starting this course. If you are not yet comfortable with asynchronous programming (with promises and `async...await`), loops (and the different types of loops in JavaScript), modularity, or working with external packages, we would recommend studying the following resources before coming back and continuing this section: + +* [`async...await` (YouTube)](https://www.youtube.com/watch?v=vn3tm0quoqE&ab_channel=Fireship) +* [JavaScript loops (MDN)](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Loops_and_iteration) +* [Modularity in Node.js](https://javascript.plainenglish.io/how-to-use-modular-patterns-in-nodejs-982f0e5c8f6e) + +### General web development {#general-web-development} + +Throughout the next lessons, we will sometimes use certain technologies and terms related to the web without explaining them. This is because their knowledge will be **assumed** (unless we're showing something out of the ordinary). + +* [HTML](https://developer.mozilla.org/en-US/docs/Web/HTML) +* [HTTP protocol](https://developer.mozilla.org/en-US/docs/Web/HTTP) +* [DevTools](./data_extraction/browser_devtools.md) + +### jQuery or Cheerio {#jquery-or-cheerio} + +We'll be using the [**Cheerio**](https://www.npmjs.com/package/cheerio) package a lot to parse data from HTML. This package provides an API using jQuery syntax to help traverse downloaded HTML within Node.js. + +## Next up {#next} + +The course begins with a small bit of theory and moves into some realistic and practical examples of extracting data from the most popular websites on the internet using your browser console. [Let's get to it!](./introduction.md) + +> If you already have experience with HTML, CSS, and browser DevTools, feel free to skip to the [Basics of crawling](./crawling/index.md) section. diff --git a/sources/academy/webscraping/scraping_basics_javascript/introduction.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md similarity index 100% rename from sources/academy/webscraping/scraping_basics_javascript/introduction.md rename to sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md From 9e5c3058dfe61b6b6d39cdc7b347ce0b84ea6ab1 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 15:02:58 +0200 Subject: [PATCH 2/9] chore: prepare redirects --- nginx.conf | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/nginx.conf b/nginx.conf index be0aba51d2..679dc6b709 100644 --- a/nginx.conf +++ b/nginx.conf @@ -312,6 +312,33 @@ server { rewrite ^academy/advanced-web-scraping/scraping-paginated-sites$ /academy/advanced-web-scraping/crawling/crawling-with-search permanent; rewrite ^academy/php$ /academy/php/use-apify-from-php redirect; # not permanent in case we want to reuse /php in the future + ; # Academy: replacing the 'Web Scraping for Beginners' course + ; rewrite ^academy/web-scraping-for-beginners/best-practices$ + ; rewrite ^academy/web-scraping-for-beginners/introduction$ + ; rewrite ^academy/web-scraping-for-beginners/challenge$ + ; rewrite ^academy/web-scraping-for-beginners/challenge/initializing-and-setting-up$ + ; rewrite ^academy/web-scraping-for-beginners/challenge/modularity$ + ; rewrite ^academy/web-scraping-for-beginners/challenge/scraping-amazon$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/exporting-data$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/filtering-links$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/finding-links$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/first-crawl$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/headless-browser$ + ; rewrite ^academy/web-scraping-for-beginners/crawling$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/pro-scraping$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/recap-extraction-basics$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/relative-urls$ + ; rewrite ^academy/web-scraping-for-beginners/crawling/scraping-the-data$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction/browser-devtools$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction/computer-preparation$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction/devtools-continued$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction/node-continued$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction/node-js-scraper$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction/project-setup$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction/save-to-csv$ + ; rewrite ^academy/web-scraping-for-beginners/data-extraction/using-devtools$ + # Removed pages # GPT plugins were discontinued April 9th, 2024 - https://help.openai.com/en/articles/8988022-winding-down-the-chatgpt-plugins-beta rewrite ^/platform/integrations/chatgpt-plugin$ https://blog.apify.com/add-custom-actions-to-your-gpts/ redirect; From f50f6bc4f734d758fa1aeea68730782fa26de9b8 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 15:04:00 +0200 Subject: [PATCH 3/9] feat: put the new course to the /scraping-basics-javascript/ URL --- .../scraping_basics_javascript/01_devtools_inspecting.md | 2 +- .../scraping_basics_javascript/02_devtools_locating_elements.md | 2 +- .../scraping_basics_javascript/03_devtools_extracting_data.md | 2 +- .../scraping_basics_javascript/04_downloading_html.md | 2 +- .../webscraping/scraping_basics_javascript/05_parsing_html.md | 2 +- .../scraping_basics_javascript/06_locating_elements.md | 2 +- .../scraping_basics_javascript/07_extracting_data.md | 2 +- .../webscraping/scraping_basics_javascript/08_saving_data.md | 2 +- .../webscraping/scraping_basics_javascript/09_getting_links.md | 2 +- .../webscraping/scraping_basics_javascript/10_crawling.md | 2 +- .../scraping_basics_javascript/11_scraping_variants.md | 2 +- .../webscraping/scraping_basics_javascript/12_framework.md | 2 +- .../webscraping/scraping_basics_javascript/13_platform.md | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/sources/academy/webscraping/scraping_basics_javascript/01_devtools_inspecting.md b/sources/academy/webscraping/scraping_basics_javascript/01_devtools_inspecting.md index 2540bfd21b..40b609e53f 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/01_devtools_inspecting.md +++ b/sources/academy/webscraping/scraping_basics_javascript/01_devtools_inspecting.md @@ -2,7 +2,7 @@ title: Inspecting web pages with browser DevTools sidebar_label: "DevTools: Inspecting" description: Lesson about using the browser tools for developers to inspect and manipulate the structure of a website. -slug: /scraping-basics-javascript2/devtools-inspecting +slug: /scraping-basics-javascript/devtools-inspecting unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/02_devtools_locating_elements.md b/sources/academy/webscraping/scraping_basics_javascript/02_devtools_locating_elements.md index 0796418c9e..1459b6ade8 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/02_devtools_locating_elements.md +++ b/sources/academy/webscraping/scraping_basics_javascript/02_devtools_locating_elements.md @@ -2,7 +2,7 @@ title: Locating HTML elements on a web page with browser DevTools sidebar_label: "DevTools: Locating HTML elements" description: Lesson about using the browser tools for developers to manually find products on an e-commerce website. -slug: /scraping-basics-javascript2/devtools-locating-elements +slug: /scraping-basics-javascript/devtools-locating-elements unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/03_devtools_extracting_data.md b/sources/academy/webscraping/scraping_basics_javascript/03_devtools_extracting_data.md index aeb6fc7ed6..0d0b7b876f 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/03_devtools_extracting_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript/03_devtools_extracting_data.md @@ -2,7 +2,7 @@ title: Extracting data from a web page with browser DevTools sidebar_label: "DevTools: Extracting data" description: Lesson about using the browser tools for developers to manually extract product data from an e-commerce website. -slug: /scraping-basics-javascript2/devtools-extracting-data +slug: /scraping-basics-javascript/devtools-extracting-data unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/04_downloading_html.md b/sources/academy/webscraping/scraping_basics_javascript/04_downloading_html.md index f5ff62a6c8..0199d94cf6 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/04_downloading_html.md +++ b/sources/academy/webscraping/scraping_basics_javascript/04_downloading_html.md @@ -2,7 +2,7 @@ title: Downloading HTML with Node.js sidebar_label: Downloading HTML description: Lesson about building a Node.js application for watching prices. Using the Fetch API to download HTML code of a product listing page. -slug: /scraping-basics-javascript2/downloading-html +slug: /scraping-basics-javascript/downloading-html unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/05_parsing_html.md b/sources/academy/webscraping/scraping_basics_javascript/05_parsing_html.md index f641e263df..2a31d88b3a 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/05_parsing_html.md +++ b/sources/academy/webscraping/scraping_basics_javascript/05_parsing_html.md @@ -2,7 +2,7 @@ title: Parsing HTML with Node.js sidebar_label: Parsing HTML description: Lesson about building a Node.js application for watching prices. Using the Cheerio library to parse HTML code of a product listing page. -slug: /scraping-basics-javascript2/parsing-html +slug: /scraping-basics-javascript/parsing-html unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/06_locating_elements.md b/sources/academy/webscraping/scraping_basics_javascript/06_locating_elements.md index 09101ee358..f5ac7b13c5 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/06_locating_elements.md +++ b/sources/academy/webscraping/scraping_basics_javascript/06_locating_elements.md @@ -2,7 +2,7 @@ title: Locating HTML elements with Node.js sidebar_label: Locating HTML elements description: Lesson about building a Node.js application for watching prices. Using the Cheerio library to locate products on the product listing page. -slug: /scraping-basics-javascript2/locating-elements +slug: /scraping-basics-javascript/locating-elements unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/07_extracting_data.md b/sources/academy/webscraping/scraping_basics_javascript/07_extracting_data.md index e7b81e9450..17c9873ae7 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/07_extracting_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript/07_extracting_data.md @@ -2,7 +2,7 @@ title: Extracting data from HTML with Node.js sidebar_label: Extracting data from HTML description: Lesson about building a Node.js application for watching prices. Using string manipulation to extract and clean data scraped from the product listing page. -slug: /scraping-basics-javascript2/extracting-data +slug: /scraping-basics-javascript/extracting-data unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/08_saving_data.md b/sources/academy/webscraping/scraping_basics_javascript/08_saving_data.md index f3801457d8..5dc5516057 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/08_saving_data.md +++ b/sources/academy/webscraping/scraping_basics_javascript/08_saving_data.md @@ -2,7 +2,7 @@ title: Saving data with Node.js sidebar_label: Saving data description: Lesson about building a Node.js application for watching prices. Using the json2csv library to save data scraped from product listing pages in both JSON and CSV. -slug: /scraping-basics-javascript2/saving-data +slug: /scraping-basics-javascript/saving-data unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/09_getting_links.md b/sources/academy/webscraping/scraping_basics_javascript/09_getting_links.md index 6e3be25049..f83c675050 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/09_getting_links.md +++ b/sources/academy/webscraping/scraping_basics_javascript/09_getting_links.md @@ -2,7 +2,7 @@ title: Getting links from HTML with Node.js sidebar_label: Getting links from HTML description: Lesson about building a Node.js application for watching prices. Using the Cheerio library to locate links to individual product pages. -slug: /scraping-basics-javascript2/getting-links +slug: /scraping-basics-javascript/getting-links unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/10_crawling.md b/sources/academy/webscraping/scraping_basics_javascript/10_crawling.md index 85ad4acad2..29c60482e6 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/10_crawling.md +++ b/sources/academy/webscraping/scraping_basics_javascript/10_crawling.md @@ -2,7 +2,7 @@ title: Crawling websites with Node.js sidebar_label: Crawling websites description: Lesson about building a Node.js application for watching prices. Using the Fetch API to follow links to individual product pages. -slug: /scraping-basics-javascript2/crawling +slug: /scraping-basics-javascript/crawling unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md b/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md index 04d340119a..f2a7c131bd 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md +++ b/sources/academy/webscraping/scraping_basics_javascript/11_scraping_variants.md @@ -2,7 +2,7 @@ title: Scraping product variants with Node.js sidebar_label: Scraping product variants description: Lesson about building a Node.js application for watching prices. Using browser DevTools to figure out how to extract product variants and exporting them as separate items. -slug: /scraping-basics-javascript2/scraping-variants +slug: /scraping-basics-javascript/scraping-variants unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/12_framework.md b/sources/academy/webscraping/scraping_basics_javascript/12_framework.md index bc43ea0508..ff87758e99 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/12_framework.md +++ b/sources/academy/webscraping/scraping_basics_javascript/12_framework.md @@ -2,7 +2,7 @@ title: Using a scraping framework with Node.js sidebar_label: Using a framework description: Lesson about building a Node.js application for watching prices. Using the Crawlee framework to simplify creating a scraper. -slug: /scraping-basics-javascript2/framework +slug: /scraping-basics-javascript/framework unlisted: true --- diff --git a/sources/academy/webscraping/scraping_basics_javascript/13_platform.md b/sources/academy/webscraping/scraping_basics_javascript/13_platform.md index cc1fc3b7d1..5e0fb10082 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/13_platform.md +++ b/sources/academy/webscraping/scraping_basics_javascript/13_platform.md @@ -2,7 +2,7 @@ title: Using a scraping platform with Node.js sidebar_label: Using a platform description: Lesson about building a Node.js application for watching prices. Using the Apify platform to deploy a scraper. -slug: /scraping-basics-javascript2/platform +slug: /scraping-basics-javascript/platform unlisted: true --- From 43e97454adc08f44197aba32acbc31ba7c0187dd Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 15:24:54 +0200 Subject: [PATCH 4/9] fix: edit links to the JS course --- sources/academy/homepage_content.json | 2 +- .../platform/expert_scraping_with_apify/actors_webhooks.md | 2 +- .../advanced_web_scraping/crawling/sitemaps-vs-search.md | 2 +- sources/academy/webscraping/advanced_web_scraping/index.md | 2 +- src/pages/index.tsx | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sources/academy/homepage_content.json b/sources/academy/homepage_content.json index 0799a0e182..9af8a74751 100644 --- a/sources/academy/homepage_content.json +++ b/sources/academy/homepage_content.json @@ -2,7 +2,7 @@ "Beginner courses": [ { "title": "Web scraping basics for JavaScript devs", - "link": "/academy/web-scraping-for-beginners", + "link": "/academy/scraping-basics-javascript", "description": "Learn how to use JavaScript to extract information from websites in this practical course, starting from the absolute basics.", "imageUrl": "/img/academy/intro.svg" }, diff --git a/sources/academy/platform/expert_scraping_with_apify/actors_webhooks.md b/sources/academy/platform/expert_scraping_with_apify/actors_webhooks.md index 53814c0033..0e46bfa195 100644 --- a/sources/academy/platform/expert_scraping_with_apify/actors_webhooks.md +++ b/sources/academy/platform/expert_scraping_with_apify/actors_webhooks.md @@ -41,7 +41,7 @@ Prior to moving forward, please read over these resources: ## Our task {#our-task} -In this task, we'll be building on top of what we already created in the [Web scraping basics for JavaScript devs](/academy/web-scraping-for-beginners/challenge) course's final challenge, so keep those files safe! +In this task, we'll be building on top of what we already created in the [Web scraping basics for JavaScript devs](/academy/scraping-basics-javascript/legacy/challenge) course's final challenge, so keep those files safe! Once our Amazon Actor has completed its run, we will, rather than sending an email to ourselves, call an Actor through a webhook. The Actor called will be a new Actor that we will create together, which will take the dataset ID as input, then subsequently filter through all of the results and return only the cheapest one for each product. All of the results of the Actor will be pushed to its default dataset. diff --git a/sources/academy/webscraping/advanced_web_scraping/crawling/sitemaps-vs-search.md b/sources/academy/webscraping/advanced_web_scraping/crawling/sitemaps-vs-search.md index f34b24d261..ae577c3455 100644 --- a/sources/academy/webscraping/advanced_web_scraping/crawling/sitemaps-vs-search.md +++ b/sources/academy/webscraping/advanced_web_scraping/crawling/sitemaps-vs-search.md @@ -5,7 +5,7 @@ sidebar_position: 1 slug: /advanced-web-scraping/crawling/sitemaps-vs-search --- -The core crawling problem comes to down to ensuring that we reliably find all detail pages on the target website or inside its categories. This is trivial for small sites. We just open the home page or category pages and paginate to the end as we did in the [Web scraping basics for JavaScript devs](/academy/web-scraping-for-beginners) course. +The core crawling problem comes to down to ensuring that we reliably find all detail pages on the target website or inside its categories. This is trivial for small sites. We just open the home page or category pages and paginate to the end. Unfortunately, _most modern websites restrict pagination_ only to somewhere between 1 and 10,000 products. Solving this problem might seem relatively straightforward at first but there are multiple hurdles that we will explore in this lesson. diff --git a/sources/academy/webscraping/advanced_web_scraping/index.md b/sources/academy/webscraping/advanced_web_scraping/index.md index fe58884117..33ffc603b0 100644 --- a/sources/academy/webscraping/advanced_web_scraping/index.md +++ b/sources/academy/webscraping/advanced_web_scraping/index.md @@ -6,7 +6,7 @@ category: web scraping & automation slug: /advanced-web-scraping --- -In the [Web scraping basics for JavaScript devs](/academy/web-scraping-for-beginners) course, we have learned the necessary basics required to create a scraper. In the following courses, we learned more about specific practices and techniques that will help us to solve most of the problems we will face. +In the [Web scraping basics for JavaScript devs](/academy/scraping-basics-javascript) course, we have learned the necessary basics required to create a scraper. In the following courses, we learned more about specific practices and techniques that will help us to solve most of the problems we will face. In this course, we will take all of that knowledge, add a few more advanced concepts, and apply them to learn how to build a production-ready web scraper. diff --git a/src/pages/index.tsx b/src/pages/index.tsx index 0d3b18947d..7769d30aa4 100644 --- a/src/pages/index.tsx +++ b/src/pages/index.tsx @@ -267,7 +267,7 @@ export default function Home() { icon={} title="Web scraping for beginners" description="Learn the basics of web scraping and how to develop your own scraper." - to="/academy/web-scraping-for-beginners" + to="/academy/scraping-basics-javascript" /> } From faa0b0a40a1f7ba5fcb8b19e3d2d6da911d6ac3a Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 15:25:36 +0200 Subject: [PATCH 5/9] feat: change URLs of the legacy JS course --- .../scraping_basics_legacy_javascript/best_practices.md | 2 +- .../scraping_basics_legacy_javascript/challenge/index.md | 2 +- .../challenge/initializing_and_setting_up.md | 2 +- .../scraping_basics_legacy_javascript/challenge/modularity.md | 2 +- .../challenge/scraping_amazon.md | 2 +- .../crawling/exporting_data.md | 2 +- .../crawling/filtering_links.md | 2 +- .../scraping_basics_legacy_javascript/crawling/finding_links.md | 2 +- .../scraping_basics_legacy_javascript/crawling/first_crawl.md | 2 +- .../crawling/headless_browser.md | 2 +- .../scraping_basics_legacy_javascript/crawling/index.md | 2 +- .../scraping_basics_legacy_javascript/crawling/pro_scraping.md | 2 +- .../crawling/recap_extraction_basics.md | 2 +- .../scraping_basics_legacy_javascript/crawling/relative_urls.md | 2 +- .../crawling/scraping_the_data.md | 2 +- .../data_extraction/browser_devtools.md | 2 +- .../data_extraction/computer_preparation.md | 2 +- .../data_extraction/devtools_continued.md | 2 +- .../scraping_basics_legacy_javascript/data_extraction/index.md | 2 +- .../data_extraction/node_continued.md | 2 +- .../data_extraction/node_js_scraper.md | 2 +- .../data_extraction/project_setup.md | 2 +- .../data_extraction/save_to_csv.md | 2 +- .../data_extraction/using_devtools.md | 2 +- .../webscraping/scraping_basics_legacy_javascript/index.md | 2 +- .../scraping_basics_legacy_javascript/introduction.md | 2 +- 26 files changed, 26 insertions(+), 26 deletions(-) diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md index b3e1540cc4..3f59d03be1 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md @@ -2,7 +2,7 @@ title: Best practices description: Understand the standards and best practices that we here at Apify abide by to write readable, scalable, and maintainable code. sidebar_position: 1.5 -slug: /web-scraping-for-beginners/best-practices +slug: /scraping-basics-javascript/legacy/best-practices --- # Best practices when writing scrapers {#best-practices} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md index 3ab9ca4ee1..3dbc755e8c 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md @@ -2,7 +2,7 @@ title: Challenge description: Test your knowledge acquired in the previous sections of this course by building an Amazon scraper using Crawlee's CheerioCrawler! sidebar_position: 1.4 -slug: /web-scraping-for-beginners/challenge +slug: /scraping-basics-javascript/legacy/challenge --- # Challenge diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md index c0cf40bc11..4d57f91959 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md @@ -2,7 +2,7 @@ title: Initializing & setting up description: When you extract links from a web page, you often end up with a lot of irrelevant URLs. Learn how to filter the links to only keep the ones you need. sidebar_position: 1 -slug: /web-scraping-for-beginners/challenge/initializing-and-setting-up +slug: /scraping-basics-javascript/legacy/challenge/initializing-and-setting-up --- # Initialization & setting up diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md index e6d62c7b32..107ea2473e 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md @@ -2,7 +2,7 @@ title: Modularity description: Before you build your first web scraper with Crawlee, it is important to understand the concept of modularity in programming. sidebar_position: 2 -slug: /web-scraping-for-beginners/challenge/modularity +slug: /scraping-basics-javascript/legacy/challenge/modularity --- # Modularity diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md index fa82915930..cce8ba52ad 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md @@ -2,7 +2,7 @@ title: Scraping Amazon description: Before you build your first web scraper with Crawlee, it is important to understand the concept of modularity in programming. sidebar_position: 4 -slug: /web-scraping-for-beginners/challenge/scraping-amazon +slug: /scraping-basics-javascript/legacy/challenge/scraping-amazon --- # Scraping Amazon diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md index d0d4baad8d..21f3c16bbf 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md @@ -2,7 +2,7 @@ title: Exporting data description: Learn how to export the data you scraped using Crawlee to CSV or JSON. sidebar_position: 9 -slug: /web-scraping-for-beginners/crawling/exporting-data +slug: /scraping-basics-javascript/legacy/crawling/exporting-data --- # Exporting data {#exporting-data} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md index 34d4961aaa..c1862a7f14 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md @@ -2,7 +2,7 @@ title: Filtering links description: When you extract links from a web page, you often end up with a lot of irrelevant URLs. Learn how to filter the links to only keep the ones you need. sidebar_position: 3 -slug: /web-scraping-for-beginners/crawling/filtering-links +slug: /scraping-basics-javascript/legacy/crawling/filtering-links --- import Tabs from '@theme/Tabs'; diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md index 785d9396aa..a119df4c27 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md @@ -2,7 +2,7 @@ title: Finding links description: Learn what a link looks like in HTML and how to find and extract their URLs when web scraping. Using both DevTools and Node.js. sidebar_position: 2 -slug: /web-scraping-for-beginners/crawling/finding-links +slug: /scraping-basics-javascript/legacy/crawling/finding-links --- import Example from '!!raw-loader!roa-loader!./finding_links.js'; diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md index 432d06f646..29ecf422e6 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md @@ -2,7 +2,7 @@ title: Your first crawl description: Learn how to crawl the web using Node.js, Cheerio and an HTTP client. Extract URLs from pages and use them to visit more websites. sidebar_position: 5 -slug: /web-scraping-for-beginners/crawling/first-crawl +slug: /scraping-basics-javascript/legacy/crawling/first-crawl --- # Your first crawl {#your-first-crawl} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md index b57a810645..fa2c1da153 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md @@ -2,7 +2,7 @@ title: Headless browsers description: Learn how to scrape the web with a headless browser using only a few lines of code. Chrome, Firefox, Safari, Edge - all are supported. sidebar_position: 8 -slug: /web-scraping-for-beginners/crawling/headless-browser +slug: /scraping-basics-javascript/legacy/crawling/headless-browser --- import Tabs from '@theme/Tabs'; diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md index 14ba327616..3da6f289dc 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md @@ -3,7 +3,7 @@ title: Basics of crawling description: Learn how to crawl the web with your scraper. How to extract links and URLs from web pages and how to manage the collected links to visit new pages. sidebar_position: 1.3 category: courses -slug: /web-scraping-for-beginners/crawling +slug: /scraping-basics-javascript/legacy/crawling --- # Basics of crawling {#basics} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md index b4b1616417..d432d1daea 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md @@ -2,7 +2,7 @@ title: Professional scraping description: Learn how to build scrapers quicker and get better and more robust results by using Crawlee, an open-source library for scraping in Node.js. sidebar_position: 7 -slug: /web-scraping-for-beginners/crawling/pro-scraping +slug: /scraping-basics-javascript/legacy/crawling/pro-scraping --- # Professional scraping 👷 {#pro-scraping} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md index cdeea8cd58..de966c6560 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md @@ -2,7 +2,7 @@ title: Recap - Data extraction description: Review our e-commerce website scraper and refresh our memory about its code and the programming techniques we used to extract and save the data. sidebar_position: 1 -slug: /web-scraping-for-beginners/crawling/recap-extraction-basics +slug: /scraping-basics-javascript/legacy/crawling/recap-extraction-basics --- # Recap of data extraction basics {#quick-recap} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md index f9487c80a8..f2b8e7e296 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md @@ -2,7 +2,7 @@ title: Relative URLs description: Learn about absolute and relative URLs used on web pages and how to work with them when parsing HTML with Cheerio in your scraper. sidebar_position: 4 -slug: /web-scraping-for-beginners/crawling/relative-urls +slug: /scraping-basics-javascript/legacy/crawling/relative-urls --- # Relative URLs {#filtering-links} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md index 734c637d67..aa62ac7a62 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md @@ -2,7 +2,7 @@ title: Scraping data description: Learn how to add data extraction logic to your crawler, which will allow you to extract data from all the websites you crawled. sidebar_position: 6 -slug: /web-scraping-for-beginners/crawling/scraping-the-data +slug: /scraping-basics-javascript/legacy/crawling/scraping-the-data --- # Scraping data {#scraping-data} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md index e4d24df9b3..854499c881 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md @@ -2,7 +2,7 @@ title: Starting with browser DevTools description: Learn about browser DevTools, a valuable tool in the world of web scraping, and how you can use them to extract data from a website. sidebar_position: 1 -slug: /web-scraping-for-beginners/data-extraction/browser-devtools +slug: /scraping-basics-javascript/legacy/data-extraction/browser-devtools --- **Learn about browser DevTools, a valuable tool in the world of web scraping, and how you can use them to extract data from a website.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md index c4b9baf78b..a904e51ac3 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md @@ -2,7 +2,7 @@ title: Computer preparation description: Set up your computer to be able to code scrapers with Node.js and JavaScript. Download Node.js and npm and run a Hello World script. sidebar_position: 4 -slug: /web-scraping-for-beginners/data-extraction/computer-preparation +slug: /scraping-basics-javascript/legacy/data-extraction/computer-preparation --- # Prepare your computer for programming {#prepare-computer} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md index 79278386a1..135af5f784 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md @@ -2,7 +2,7 @@ title: Extracting data with DevTools description: Continue learning how to extract data from a website using browser DevTools, CSS selectors, and JavaScript via the DevTools console. sidebar_position: 3 -slug: /web-scraping-for-beginners/data-extraction/devtools-continued +slug: /scraping-basics-javascript/legacy/data-extraction/devtools-continued --- **Continue learning how to extract data from a website using browser DevTools, CSS selectors, and JavaScript via the DevTools console.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md index 0482b5eb38..1d9062ddf2 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md @@ -3,7 +3,7 @@ title: Basics of data extraction description: Learn about HTML, CSS, and JavaScript, the basic building blocks of a website, and how to use them in web scraping and data extraction. sidebar_position: 1.2 category: courses -slug: /web-scraping-for-beginners/data-extraction +slug: /scraping-basics-javascript/legacy/data-extraction --- # Basics of data extraction {#basics} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md index 1fdb51e7e7..5092da9273 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md @@ -2,7 +2,7 @@ title: Extracting data with Node.js description: Continue learning how to create a web scraper with Node.js and Cheerio. Learn how to parse HTML and print the results of the data your scraper has collected. sidebar_position: 7 -slug: /web-scraping-for-beginners/data-extraction/node-continued +slug: /scraping-basics-javascript/legacy/data-extraction/node-continued --- **Continue learning how to create a web scraper with Node.js and Cheerio. Learn how to parse HTML and print the results of the data your scraper has collected.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md index 746cd71036..9cf6aece0c 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md @@ -2,7 +2,7 @@ title: Scraping with Node.js description: Learn how to use JavaScript and Node.js to create a web scraper, plus take advantage of the Cheerio and Got-scraping libraries to make your job easier. sidebar_position: 6 -slug: /web-scraping-for-beginners/data-extraction/node-js-scraper +slug: /scraping-basics-javascript/legacy/data-extraction/node-js-scraper --- **Learn how to use JavaScript and Node.js to create a web scraper, plus take advantage of the Cheerio and Got-scraping libraries to make your job easier.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md index 72b146a408..567df949ab 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md @@ -2,7 +2,7 @@ title: Project setup description: Create a new project with npm and Node.js. Install necessary libraries, and test that everything works before starting the next lesson. sidebar_position: 5 -slug: /web-scraping-for-beginners/data-extraction/project-setup +slug: /scraping-basics-javascript/legacy/data-extraction/project-setup --- # Setting up your project {#setting-up} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md index b6ec1b7df4..b1ee862999 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md @@ -2,7 +2,7 @@ title: Saving results to CSV description: Learn how to save the results of your scraper's collected data to a CSV file that can be opened in Excel, Google Sheets, or any other spreadsheets program. sidebar_position: 8 -slug: /web-scraping-for-beginners/data-extraction/save-to-csv +slug: /scraping-basics-javascript/legacy/data-extraction/save-to-csv --- # Saving results to CSV {#saving-to-csv} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md index d486638243..e700e8e375 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md @@ -2,7 +2,7 @@ title: Finding elements with DevTools description: Learn how to use browser DevTools, CSS selectors, and JavaScript via the DevTools console to extract data from a website. sidebar_position: 2 -slug: /web-scraping-for-beginners/data-extraction/using-devtools +slug: /scraping-basics-javascript/legacy/data-extraction/using-devtools --- **Learn how to use browser DevTools, CSS selectors, and JavaScript via the DevTools console to extract data from a website.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md index 064723fc3a..ca3306684b 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md @@ -3,7 +3,7 @@ title: Web scraping basics for JavaScript devs description: Learn how to develop web scrapers with this comprehensive and practical course. Go from beginner to expert, all in one place. sidebar_position: 1 category: web scraping & automation -slug: /web-scraping-for-beginners +slug: /scraping-basics-javascript/legacy --- # Web scraping basics for JavaScript devs {#welcome} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md index aff6571d1f..579e979af1 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md @@ -3,7 +3,7 @@ title: Introduction description: Start learning about web scraping, web crawling, data extraction, and popular tools to start developing your own scraper. sidebar_position: 1.1 category: courses -slug: /web-scraping-for-beginners/introduction +slug: /scraping-basics-javascript/legacy/introduction --- # Introduction {#introduction} From ff71f80c1dc913aa2273470d050c277c9b3ee162 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 15:50:23 +0200 Subject: [PATCH 6/9] fix: leftover, correct URL for the course root --- sources/academy/webscraping/scraping_basics_javascript/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/academy/webscraping/scraping_basics_javascript/index.md b/sources/academy/webscraping/scraping_basics_javascript/index.md index 3751f05efb..995208e31a 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript/index.md @@ -3,7 +3,7 @@ title: Web scraping basics for JavaScript devs description: Learn how to use JavaScript to extract information from websites in this practical course, starting from the absolute basics. sidebar_position: 1.5 category: web scraping & automation -slug: /scraping-basics-javascript2 +slug: /scraping-basics-javascript unlisted: true --- From 4a6d4a35054ddd36e995811e513d71fe99430f77 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 15:51:00 +0200 Subject: [PATCH 7/9] feat: redirects from original URLs to the new JS course's lessons, with the original URL in query --- nginx.conf | 61 +++++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 30 deletions(-) diff --git a/nginx.conf b/nginx.conf index 679dc6b709..8c90d6cd2e 100644 --- a/nginx.conf +++ b/nginx.conf @@ -306,38 +306,39 @@ server { # Rename output schema to dataset schema rewrite ^/platform/actors/development/actor-definition/output-schema$ /platform/actors/development/actor-definition/dataset-schema permanent; - rewrite ^academy/deploying-your-code/output-schema$ /academy/deploying-your-code/dataset-schema permanent; + rewrite ^/academy/deploying-your-code/output-schema$ /academy/deploying-your-code/dataset-schema permanent; # Academy restructuring - rewrite ^academy/advanced-web-scraping/scraping-paginated-sites$ /academy/advanced-web-scraping/crawling/crawling-with-search permanent; - rewrite ^academy/php$ /academy/php/use-apify-from-php redirect; # not permanent in case we want to reuse /php in the future - - ; # Academy: replacing the 'Web Scraping for Beginners' course - ; rewrite ^academy/web-scraping-for-beginners/best-practices$ - ; rewrite ^academy/web-scraping-for-beginners/introduction$ - ; rewrite ^academy/web-scraping-for-beginners/challenge$ - ; rewrite ^academy/web-scraping-for-beginners/challenge/initializing-and-setting-up$ - ; rewrite ^academy/web-scraping-for-beginners/challenge/modularity$ - ; rewrite ^academy/web-scraping-for-beginners/challenge/scraping-amazon$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/exporting-data$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/filtering-links$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/finding-links$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/first-crawl$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/headless-browser$ - ; rewrite ^academy/web-scraping-for-beginners/crawling$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/pro-scraping$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/recap-extraction-basics$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/relative-urls$ - ; rewrite ^academy/web-scraping-for-beginners/crawling/scraping-the-data$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction/browser-devtools$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction/computer-preparation$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction/devtools-continued$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction/node-continued$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction/node-js-scraper$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction/project-setup$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction/save-to-csv$ - ; rewrite ^academy/web-scraping-for-beginners/data-extraction/using-devtools$ + rewrite ^/academy/advanced-web-scraping/scraping-paginated-sites$ /academy/advanced-web-scraping/crawling/crawling-with-search permanent; + rewrite ^/academy/php$ /academy/php/use-apify-from-php redirect; # not permanent in case we want to reuse /php in the future + + # Academy: replacing the 'Web Scraping for Beginners' course + rewrite ^/academy/web-scraping-for-beginners/best-practices$ /academy/scraping-basics-javascript?legacy-js-course=/legacy/best-practices permanent; + rewrite ^/academy/web-scraping-for-beginners/introduction$ /academy/scraping-basics-javascript?legacy-js-course=/legacy/introduction permanent; + rewrite ^/academy/web-scraping-for-beginners/challenge/initializing-and-setting-up$ /academy/scraping-basics-javascript?legacy-js-course=/legacy/challenge/initializing-and-setting-up permanent; + rewrite ^/academy/web-scraping-for-beginners/challenge/modularity$ /academy/scraping-basics-javascript?legacy-js-course=/legacy/challenge/modularity permanent; + rewrite ^/academy/web-scraping-for-beginners/challenge/scraping-amazon$ /academy/scraping-basics-javascript?legacy-js-course=/legacy/challenge/scraping-amazon permanent; + rewrite ^/academy/web-scraping-for-beginners/challenge$ /academy/scraping-basics-javascript?legacy-js-course=/legacy/challenge permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/exporting-data$ /academy/scraping-basics-javascript/framework?legacy-js-course=/legacy/crawling/exporting-data permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/filtering-links$ /academy/scraping-basics-javascript/getting-links?legacy-js-course=/legacy/crawling/filtering-links permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/finding-links$ /academy/scraping-basics-javascript/getting-links?legacy-js-course=/legacy/crawling/finding-links permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/first-crawl$ /academy/scraping-basics-javascript/crawling?legacy-js-course=/legacy/crawling/first-crawl permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/headless-browser$ /academy/scraping-basics-javascript?legacy-js-course=/legacy/crawling/headless-browser permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/pro-scraping$ /academy/scraping-basics-javascript/framework?legacy-js-course=/legacy/crawling/pro-scraping permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/recap-extraction-basics$ /academy/scraping-basics-javascript/extracting-data?legacy-js-course=/legacy/crawling/recap-extraction-basics permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/relative-urls$ /academy/scraping-basics-javascript/getting-links?legacy-js-course=/legacy/crawling/relative-urls permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling/scraping-the-data$ /academy/scraping-basics-javascript/scraping-variants?legacy-js-course=/legacy/crawling/scraping-the-data permanent; + rewrite ^/academy/web-scraping-for-beginners/crawling$ /academy/scraping-basics-javascript/crawling?legacy-js-course=/legacy/crawling permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction/browser-devtools$ /academy/scraping-basics-javascript/devtools-inspecting?legacy-js-course=/legacy/data-extraction/browser-devtools permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction/computer-preparation$ /academy/scraping-basics-javascript/downloading-html?legacy-js-course=/legacy/data-extraction/computer-preparation permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction/devtools-continued$ /academy/scraping-basics-javascript/devtools-extracting-data?legacy-js-course=/legacy/data-extraction/devtools-continued permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction/node-continued$ /academy/scraping-basics-javascript/extracting-data?legacy-js-course=/legacy/data-extraction/node-continued permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction/node-js-scraper$ /academy/scraping-basics-javascript/downloading-html?legacy-js-course=/legacy/data-extraction/node-js-scraper permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction/project-setup$ /academy/scraping-basics-javascript/downloading-html?legacy-js-course=/legacy/data-extraction/project-setup permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction/save-to-csv$ /academy/scraping-basics-javascript/saving-data?legacy-js-course=/legacy/data-extraction/save-to-csv permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction/using-devtools$ /academy/scraping-basics-javascript/devtools-locating-elements?legacy-js-course=/legacy/data-extraction/using-devtools permanent; + rewrite ^/academy/web-scraping-for-beginners/data-extraction$ /academy/scraping-basics-javascript/devtools-inspecting?legacy-js-course=/legacy/data-extraction permanent; + rewrite ^/academy/web-scraping-for-beginners$ /academy/scraping-basics-javascript?legacy-js-course=/legacy permanent; # Removed pages # GPT plugins were discontinued April 9th, 2024 - https://help.openai.com/en/articles/8988022-winding-down-the-chatgpt-plugins-beta From d468b8efabc2a8b874e81d5c44a7290ba7de4aa0 Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 15:57:51 +0200 Subject: [PATCH 8/9] feat: set new sidebar position --- sources/academy/webscraping/scraping_basics_javascript/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sources/academy/webscraping/scraping_basics_javascript/index.md b/sources/academy/webscraping/scraping_basics_javascript/index.md index 995208e31a..55d50342ee 100644 --- a/sources/academy/webscraping/scraping_basics_javascript/index.md +++ b/sources/academy/webscraping/scraping_basics_javascript/index.md @@ -1,7 +1,7 @@ --- title: Web scraping basics for JavaScript devs description: Learn how to use JavaScript to extract information from websites in this practical course, starting from the absolute basics. -sidebar_position: 1.5 +sidebar_position: 1 category: web scraping & automation slug: /scraping-basics-javascript unlisted: true From ce75785b722b1e6e6c8b4ba9b39e25a542cf5fcf Mon Sep 17 00:00:00 2001 From: Honza Javorek Date: Mon, 8 Sep 2025 15:58:09 +0200 Subject: [PATCH 9/9] feat: unlist the legacy JS course --- .../scraping_basics_legacy_javascript/best_practices.md | 1 + .../scraping_basics_legacy_javascript/challenge/index.md | 1 + .../challenge/initializing_and_setting_up.md | 1 + .../scraping_basics_legacy_javascript/challenge/modularity.md | 1 + .../challenge/scraping_amazon.md | 1 + .../crawling/exporting_data.md | 1 + .../crawling/filtering_links.md | 1 + .../crawling/finding_links.md | 1 + .../scraping_basics_legacy_javascript/crawling/first_crawl.md | 1 + .../crawling/headless_browser.md | 1 + .../scraping_basics_legacy_javascript/crawling/index.md | 1 + .../scraping_basics_legacy_javascript/crawling/pro_scraping.md | 1 + .../crawling/recap_extraction_basics.md | 1 + .../crawling/relative_urls.md | 1 + .../crawling/scraping_the_data.md | 1 + .../data_extraction/browser_devtools.md | 1 + .../data_extraction/computer_preparation.md | 1 + .../data_extraction/devtools_continued.md | 1 + .../scraping_basics_legacy_javascript/data_extraction/index.md | 1 + .../data_extraction/node_continued.md | 1 + .../data_extraction/node_js_scraper.md | 1 + .../data_extraction/project_setup.md | 1 + .../data_extraction/save_to_csv.md | 1 + .../data_extraction/using_devtools.md | 1 + .../webscraping/scraping_basics_legacy_javascript/index.md | 3 ++- .../scraping_basics_legacy_javascript/introduction.md | 1 + 26 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md index 3f59d03be1..61a810e819 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/best_practices.md @@ -3,6 +3,7 @@ title: Best practices description: Understand the standards and best practices that we here at Apify abide by to write readable, scalable, and maintainable code. sidebar_position: 1.5 slug: /scraping-basics-javascript/legacy/best-practices +unlisted: true --- # Best practices when writing scrapers {#best-practices} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md index 3dbc755e8c..8750cc2905 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/index.md @@ -3,6 +3,7 @@ title: Challenge description: Test your knowledge acquired in the previous sections of this course by building an Amazon scraper using Crawlee's CheerioCrawler! sidebar_position: 1.4 slug: /scraping-basics-javascript/legacy/challenge +unlisted: true --- # Challenge diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md index 4d57f91959..ecf70f6a0b 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/initializing_and_setting_up.md @@ -3,6 +3,7 @@ title: Initializing & setting up description: When you extract links from a web page, you often end up with a lot of irrelevant URLs. Learn how to filter the links to only keep the ones you need. sidebar_position: 1 slug: /scraping-basics-javascript/legacy/challenge/initializing-and-setting-up +unlisted: true --- # Initialization & setting up diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md index 107ea2473e..4522f46c4b 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/modularity.md @@ -3,6 +3,7 @@ title: Modularity description: Before you build your first web scraper with Crawlee, it is important to understand the concept of modularity in programming. sidebar_position: 2 slug: /scraping-basics-javascript/legacy/challenge/modularity +unlisted: true --- # Modularity diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md index cce8ba52ad..24e47b6d89 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/challenge/scraping_amazon.md @@ -3,6 +3,7 @@ title: Scraping Amazon description: Before you build your first web scraper with Crawlee, it is important to understand the concept of modularity in programming. sidebar_position: 4 slug: /scraping-basics-javascript/legacy/challenge/scraping-amazon +unlisted: true --- # Scraping Amazon diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md index 21f3c16bbf..8c15050857 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/exporting_data.md @@ -3,6 +3,7 @@ title: Exporting data description: Learn how to export the data you scraped using Crawlee to CSV or JSON. sidebar_position: 9 slug: /scraping-basics-javascript/legacy/crawling/exporting-data +unlisted: true --- # Exporting data {#exporting-data} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md index c1862a7f14..675b82db20 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/filtering_links.md @@ -3,6 +3,7 @@ title: Filtering links description: When you extract links from a web page, you often end up with a lot of irrelevant URLs. Learn how to filter the links to only keep the ones you need. sidebar_position: 3 slug: /scraping-basics-javascript/legacy/crawling/filtering-links +unlisted: true --- import Tabs from '@theme/Tabs'; diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md index a119df4c27..92c5505869 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/finding_links.md @@ -3,6 +3,7 @@ title: Finding links description: Learn what a link looks like in HTML and how to find and extract their URLs when web scraping. Using both DevTools and Node.js. sidebar_position: 2 slug: /scraping-basics-javascript/legacy/crawling/finding-links +unlisted: true --- import Example from '!!raw-loader!roa-loader!./finding_links.js'; diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md index 29ecf422e6..f070e76d6d 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/first_crawl.md @@ -3,6 +3,7 @@ title: Your first crawl description: Learn how to crawl the web using Node.js, Cheerio and an HTTP client. Extract URLs from pages and use them to visit more websites. sidebar_position: 5 slug: /scraping-basics-javascript/legacy/crawling/first-crawl +unlisted: true --- # Your first crawl {#your-first-crawl} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md index fa2c1da153..8e3fcafcfd 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/headless_browser.md @@ -3,6 +3,7 @@ title: Headless browsers description: Learn how to scrape the web with a headless browser using only a few lines of code. Chrome, Firefox, Safari, Edge - all are supported. sidebar_position: 8 slug: /scraping-basics-javascript/legacy/crawling/headless-browser +unlisted: true --- import Tabs from '@theme/Tabs'; diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md index 3da6f289dc..afa99d7789 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/index.md @@ -4,6 +4,7 @@ description: Learn how to crawl the web with your scraper. How to extract links sidebar_position: 1.3 category: courses slug: /scraping-basics-javascript/legacy/crawling +unlisted: true --- # Basics of crawling {#basics} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md index d432d1daea..eed2fdb47a 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/pro_scraping.md @@ -3,6 +3,7 @@ title: Professional scraping description: Learn how to build scrapers quicker and get better and more robust results by using Crawlee, an open-source library for scraping in Node.js. sidebar_position: 7 slug: /scraping-basics-javascript/legacy/crawling/pro-scraping +unlisted: true --- # Professional scraping 👷 {#pro-scraping} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md index de966c6560..c708360827 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/recap_extraction_basics.md @@ -3,6 +3,7 @@ title: Recap - Data extraction description: Review our e-commerce website scraper and refresh our memory about its code and the programming techniques we used to extract and save the data. sidebar_position: 1 slug: /scraping-basics-javascript/legacy/crawling/recap-extraction-basics +unlisted: true --- # Recap of data extraction basics {#quick-recap} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md index f2b8e7e296..9eeb62ef51 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/relative_urls.md @@ -3,6 +3,7 @@ title: Relative URLs description: Learn about absolute and relative URLs used on web pages and how to work with them when parsing HTML with Cheerio in your scraper. sidebar_position: 4 slug: /scraping-basics-javascript/legacy/crawling/relative-urls +unlisted: true --- # Relative URLs {#filtering-links} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md index aa62ac7a62..20ef95cd25 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/crawling/scraping_the_data.md @@ -3,6 +3,7 @@ title: Scraping data description: Learn how to add data extraction logic to your crawler, which will allow you to extract data from all the websites you crawled. sidebar_position: 6 slug: /scraping-basics-javascript/legacy/crawling/scraping-the-data +unlisted: true --- # Scraping data {#scraping-data} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md index 854499c881..f390b4d715 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/browser_devtools.md @@ -3,6 +3,7 @@ title: Starting with browser DevTools description: Learn about browser DevTools, a valuable tool in the world of web scraping, and how you can use them to extract data from a website. sidebar_position: 1 slug: /scraping-basics-javascript/legacy/data-extraction/browser-devtools +unlisted: true --- **Learn about browser DevTools, a valuable tool in the world of web scraping, and how you can use them to extract data from a website.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md index a904e51ac3..52be851ab2 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/computer_preparation.md @@ -3,6 +3,7 @@ title: Computer preparation description: Set up your computer to be able to code scrapers with Node.js and JavaScript. Download Node.js and npm and run a Hello World script. sidebar_position: 4 slug: /scraping-basics-javascript/legacy/data-extraction/computer-preparation +unlisted: true --- # Prepare your computer for programming {#prepare-computer} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md index 135af5f784..a25515bed3 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/devtools_continued.md @@ -3,6 +3,7 @@ title: Extracting data with DevTools description: Continue learning how to extract data from a website using browser DevTools, CSS selectors, and JavaScript via the DevTools console. sidebar_position: 3 slug: /scraping-basics-javascript/legacy/data-extraction/devtools-continued +unlisted: true --- **Continue learning how to extract data from a website using browser DevTools, CSS selectors, and JavaScript via the DevTools console.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md index 1d9062ddf2..ec7593668f 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/index.md @@ -4,6 +4,7 @@ description: Learn about HTML, CSS, and JavaScript, the basic building blocks of sidebar_position: 1.2 category: courses slug: /scraping-basics-javascript/legacy/data-extraction +unlisted: true --- # Basics of data extraction {#basics} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md index 5092da9273..cfeadeff46 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_continued.md @@ -3,6 +3,7 @@ title: Extracting data with Node.js description: Continue learning how to create a web scraper with Node.js and Cheerio. Learn how to parse HTML and print the results of the data your scraper has collected. sidebar_position: 7 slug: /scraping-basics-javascript/legacy/data-extraction/node-continued +unlisted: true --- **Continue learning how to create a web scraper with Node.js and Cheerio. Learn how to parse HTML and print the results of the data your scraper has collected.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md index 9cf6aece0c..4a74cfb7e3 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/node_js_scraper.md @@ -3,6 +3,7 @@ title: Scraping with Node.js description: Learn how to use JavaScript and Node.js to create a web scraper, plus take advantage of the Cheerio and Got-scraping libraries to make your job easier. sidebar_position: 6 slug: /scraping-basics-javascript/legacy/data-extraction/node-js-scraper +unlisted: true --- **Learn how to use JavaScript and Node.js to create a web scraper, plus take advantage of the Cheerio and Got-scraping libraries to make your job easier.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md index 567df949ab..9f8ee553ec 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/project_setup.md @@ -3,6 +3,7 @@ title: Project setup description: Create a new project with npm and Node.js. Install necessary libraries, and test that everything works before starting the next lesson. sidebar_position: 5 slug: /scraping-basics-javascript/legacy/data-extraction/project-setup +unlisted: true --- # Setting up your project {#setting-up} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md index b1ee862999..d3d7d5f672 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/save_to_csv.md @@ -3,6 +3,7 @@ title: Saving results to CSV description: Learn how to save the results of your scraper's collected data to a CSV file that can be opened in Excel, Google Sheets, or any other spreadsheets program. sidebar_position: 8 slug: /scraping-basics-javascript/legacy/data-extraction/save-to-csv +unlisted: true --- # Saving results to CSV {#saving-to-csv} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md index e700e8e375..2bcb3d9460 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/data_extraction/using_devtools.md @@ -3,6 +3,7 @@ title: Finding elements with DevTools description: Learn how to use browser DevTools, CSS selectors, and JavaScript via the DevTools console to extract data from a website. sidebar_position: 2 slug: /scraping-basics-javascript/legacy/data-extraction/using-devtools +unlisted: true --- **Learn how to use browser DevTools, CSS selectors, and JavaScript via the DevTools console to extract data from a website.** diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md index ca3306684b..afd3c1c2c1 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/index.md @@ -1,9 +1,10 @@ --- title: Web scraping basics for JavaScript devs description: Learn how to develop web scrapers with this comprehensive and practical course. Go from beginner to expert, all in one place. -sidebar_position: 1 +sidebar_position: 3 category: web scraping & automation slug: /scraping-basics-javascript/legacy +unlisted: true --- # Web scraping basics for JavaScript devs {#welcome} diff --git a/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md b/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md index 579e979af1..772b0720c1 100644 --- a/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md +++ b/sources/academy/webscraping/scraping_basics_legacy_javascript/introduction.md @@ -4,6 +4,7 @@ description: Start learning about web scraping, web crawling, data extraction, a sidebar_position: 1.1 category: courses slug: /scraping-basics-javascript/legacy/introduction +unlisted: true --- # Introduction {#introduction}