Skip to content

Commit adfd9bf

Browse files
committed
feat: package caches
1 parent c411bce commit adfd9bf

28 files changed

+873
-493
lines changed

.editorconfig

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
root = true
22

33
[*]
4-
indent_style = space
4+
indent_style = tabs
55
indent_size = 4
66
charset = utf-8
77
trim_trailing_whitespace = true
88
insert_final_newline = true
99
end_of_line = lf
1010
# editorconfig-tools is unable to ignore longs strings or urls
1111
max_line_length = null
12+
quote_type = single
1213

13-
[{*.yaml, *.yml}]
14+
[*.md]
1415
indent_size = 2
16+
17+
[*.yml]
18+
indent_size = 2
19+
indent_style = spaces
20+
21+
[*.yaml]
22+
indent_size = 2
23+
indent_style = spaces
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
# Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore
2+
3+
# Logs
4+
5+
logs
6+
_.log
7+
npm-debug.log_
8+
yarn-debug.log*
9+
yarn-error.log*
10+
lerna-debug.log*
11+
.pnpm-debug.log*
12+
13+
# Caches
14+
15+
.cache
16+
17+
# Diagnostic reports (https://nodejs.org/api/report.html)
18+
19+
report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
20+
21+
# Runtime data
22+
23+
pids
24+
_.pid
25+
_.seed
26+
*.pid.lock
27+
28+
# Directory for instrumented libs generated by jscoverage/JSCover
29+
30+
lib-cov
31+
32+
# Coverage directory used by tools like istanbul
33+
34+
coverage
35+
*.lcov
36+
37+
# nyc test coverage
38+
39+
.nyc_output
40+
41+
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
42+
43+
.grunt
44+
45+
# Bower dependency directory (https://bower.io/)
46+
47+
bower_components
48+
49+
# node-waf configuration
50+
51+
.lock-wscript
52+
53+
# Compiled binary addons (https://nodejs.org/api/addons.html)
54+
55+
build/Release
56+
57+
# Dependency directories
58+
59+
node_modules/
60+
jspm_packages/
61+
62+
# Snowpack dependency directory (https://snowpack.dev/)
63+
64+
web_modules/
65+
66+
# TypeScript cache
67+
68+
*.tsbuildinfo
69+
70+
# Optional npm cache directory
71+
72+
.npm
73+
74+
# Optional eslint cache
75+
76+
.eslintcache
77+
78+
# Optional stylelint cache
79+
80+
.stylelintcache
81+
82+
# Microbundle cache
83+
84+
.rpt2_cache/
85+
.rts2_cache_cjs/
86+
.rts2_cache_es/
87+
.rts2_cache_umd/
88+
89+
# Optional REPL history
90+
91+
.node_repl_history
92+
93+
# Output of 'npm pack'
94+
95+
*.tgz
96+
97+
# Yarn Integrity file
98+
99+
.yarn-integrity
100+
101+
# dotenv environment variable files
102+
103+
.env
104+
.env.development.local
105+
.env.test.local
106+
.env.production.local
107+
.env.local
108+
109+
# parcel-bundler cache (https://parceljs.org/)
110+
111+
.parcel-cache
112+
113+
# Next.js build output
114+
115+
.next
116+
out
117+
118+
# Nuxt.js build / generate output
119+
120+
.nuxt
121+
dist
122+
123+
# Gatsby files
124+
125+
# Comment in the public line in if your project uses Gatsby and not Next.js
126+
127+
# https://nextjs.org/blog/next-9-1#public-directory-support
128+
129+
# public
130+
131+
# vuepress build output
132+
133+
.vuepress/dist
134+
135+
# vuepress v2.x temp and cache directory
136+
137+
.temp
138+
139+
# Docusaurus cache and generated files
140+
141+
.docusaurus
142+
143+
# Serverless directories
144+
145+
.serverless/
146+
147+
# FuseBox cache
148+
149+
.fusebox/
150+
151+
# DynamoDB Local files
152+
153+
.dynamodb/
154+
155+
# TernJS port file
156+
157+
.tern-port
158+
159+
# Stores VSCode versions used for testing VSCode extensions
160+
161+
.vscode-test
162+
163+
# yarn v2
164+
165+
.yarn/cache
166+
.yarn/unplugged
167+
.yarn/build-state.yml
168+
.yarn/install-state.gz
169+
.pnp.*
170+
171+
# IntelliJ based IDEs
172+
.idea
173+
174+
# Finder (MacOS) folder config
175+
.DS_Store
176+
177+
# caches
178+
data/npm
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Version Matrix Actions Script
2+
3+
This folder contains scripts that are used to create Actions matrices for building specific Docker images with the right version combinations of Apify SDK, Playwright/Puppeteer, and Crawlee.
4+
5+
These scripts are ran using the [bun](https://bun.sh) runtime (for no reason other than ease of use).
6+
7+
## Adding a new Node version to the matrix
8+
9+
When a new version of Node is released, just update the `supportedNodeVersions` array in the `src/shares/constants.ts` file.
10+
11+
Then, run `SKIP_CACHE_SET=true bun node:normal` locally to preview the new matrix. (you can append `| jq -r '.include[] | "node-version=\(.["node-version"]) apify-version=\(.["apify-version"]) is-latest=\(.["is-latest"])"'` to get a nicer output from the big JSON blob)
12+
13+
## Adding a new Python version to the matrix
14+
15+
When a new version of Python is released, just update the `supportedPythonVersions` array in the `src/shares/constants.ts` file.
16+
17+
Then, run `SKIP_CACHE_SET=true bun python:normal` locally to preview the new matrix. (you can append `| jq -r '.include[] | "python-version=\(.["python-version"]) playwright-version=\(.["playwright-version"]) apify-version=\(.["apify-version"]) is-latest=\(.["is-latest"])"'` to get a nicer output from the big JSON blob)
18+
19+
## Adding a new Python version range for specific Playwright version ranges
20+
21+
Sometimes, newer Python is not compatible with Playwright versions that were released before a specific one (at the time of writing, this is the case for Playwright 1.48.0 and Python 3.13 -> Python 3.13.x can only run Playwright 1.48.0 and newer).
22+
23+
To add a new Python version range for a specific Playwright version, add a new entry to the `playwrightPythonVersionConstraints` array in the `python.ts` file.
24+
25+
The key represents the Python version range where this starts taking effect. The value is the Playwright version range that is required for the Python version.
26+
27+
## Updating the runtime version that will be used for images that are referenced with just the build tag
28+
29+
When we build images, we also include a specific runtime version in the tag (as an example, we have `apify/actor-node:20`). We also provide images tagged with `latest` or `beta`. These images will default to the "latest" runtime version that is specified in the `src/shares/constants.ts` file under `latestPythonVersion` or `latestNodeVersion`.
30+
31+
When the time comes to bump these, just make a PR, edit those values, and merge it. Next time images get built, the `latest` or `beta` tags will use those new versions for the tag.
32+
33+
## Creating new matrices
34+
35+
The structure for a GitHub Actions matrix is as follows:
36+
37+
```ts
38+
interface Matrix {
39+
include: MatrixEntry[];
40+
}
41+
42+
type MatrixEntry = Record<string, string>;
43+
```
44+
45+
When trying to integrate a new matrix into a flow, you need to follow the following steps:
46+
47+
- have a step that outputs the matrix as a JSON blob
48+
49+
```yaml
50+
matrix:
51+
outputs:
52+
matrix: ${{ steps.set-matrix.outputs.matrix }}
53+
54+
steps:
55+
- name: Generate matrix
56+
id: set-matrix
57+
run: echo "matrix=$(bun python:normal)" >> $GITHUB_OUTPUT
58+
working-directory: ./.github/actions/version-matrix
59+
```
60+
61+
(optionally you can also add in a print step to ensure the matrix is correct. Feel free to copy it from any that uses previous matrices)
62+
63+
- ensure the actual build step needs the matrix and uses it like this (the if check if optional if the matrix will always have at least one entry):
64+
65+
```yaml
66+
needs: [matrix]
67+
if: ${{ toJson(fromJson(needs.matrix.outputs.matrix).include) != '[]' }}
68+
strategy:
69+
matrix: ${{ fromJson(needs.matrix.outputs.matrix) }}
70+
```
71+
72+
- reference matrix values based on the keys in the objects in the `include` array. For example, to get the Python version, you can use `${{ matrix.python-version }}`.

.github/actions/cache-builder/data/.gitkeep

Whitespace-only changes.
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
"crawlee": [
3+
"3.13.1",
4+
"3.13.2",
5+
"3.13.3",
6+
"3.13.4",
7+
"3.13.5"
8+
],
9+
"apify": [
10+
"3.3.1",
11+
"3.3.2",
12+
"3.4.0",
13+
"3.4.1",
14+
"3.4.2"
15+
],
16+
"playwright": [
17+
"1.50.0",
18+
"1.50.1",
19+
"1.51.0",
20+
"1.51.1",
21+
"1.52.0"
22+
],
23+
"puppeteer": [
24+
"24.7.2",
25+
"24.8.0",
26+
"24.8.1",
27+
"24.8.2",
28+
"24.9.0"
29+
],
30+
"typescript": [
31+
"5.6.3",
32+
"5.7.2",
33+
"5.7.3",
34+
"5.8.2",
35+
"5.8.3"
36+
]
37+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
{
2+
"name": "cache-builder",
3+
"type": "module",
4+
"private": true,
5+
"scripts": {
6+
"node:npm": "node src/caches/npm.ts",
7+
"fmt": "biome format --write .",
8+
"typecheck": "tsc --noEmit"
9+
},
10+
"devDependencies": {
11+
"@biomejs/biome": "^1.9.4",
12+
"@types/node": "^22.15.18",
13+
"@types/semver": "^7.7.0",
14+
"typescript": "^5.8.3"
15+
},
16+
"dependencies": {
17+
"nano-spawn": "^1.0.2",
18+
"semver": "^7.7.2"
19+
},
20+
"volta": {
21+
"node": "24.0.2",
22+
"yarn": "4.9.1"
23+
},
24+
"packageManager": "[email protected]"
25+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import spawn from 'nano-spawn';
2+
import { fetchPackageVersions, getCachePathData } from '../shared/npm.ts';
3+
import { writeFile } from 'node:fs/promises';
4+
import { packagesToPrecache } from '../shared/constants.ts';
5+
6+
const cachePath = getCachePathData();
7+
8+
console.log(`Cache location: ${cachePath.path}`);
9+
10+
const cacheState: Record<string, string[]> = {};
11+
12+
for (const packageName of packagesToPrecache) {
13+
const lastFiveVersions = (await fetchPackageVersions(packageName)).slice(-5);
14+
cacheState[packageName] = lastFiveVersions;
15+
16+
for (const version of lastFiveVersions) {
17+
console.log(`Fetching ${packageName}@${version}`);
18+
19+
try {
20+
await spawn('npm', ['cache', 'add', `${packageName}@${version}`], {
21+
env: {
22+
[cachePath.environmentVariable]: cachePath.path,
23+
},
24+
});
25+
} catch (error) {
26+
console.error(`Failed to fetch ${packageName}@${version}:`, error);
27+
} finally {
28+
console.log(`Done fetching ${packageName}@${version}`);
29+
}
30+
}
31+
}
32+
33+
await writeFile(new URL('../../data/npm_state.json', import.meta.url), JSON.stringify(cacheState, null, '\t'));
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
export const packagesToPrecache = [
2+
//
3+
'crawlee',
4+
'apify',
5+
'playwright',
6+
'puppeteer',
7+
'typescript',
8+
];

0 commit comments

Comments
 (0)