Skip to content

Commit 8a92e31

Browse files
authored
feat: move CUDA prebuilt binaries to dependency modules (#250)
* feat: move CUDA prebuilt binaries to dependency modules to improve `npm install` times * fix: improve GGUF metadata reading performance * fix: adapt to breaking `llama.cpp` changes
1 parent 9cab784 commit 8a92e31

39 files changed

+955
-204
lines changed

.github/workflows/build.yml

Lines changed: 32 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ jobs:
145145
- name: Setup & Build
146146
id: build
147147
shell: bash
148-
timeout-minutes: 40
148+
timeout-minutes: 80
149149
env:
150150
ARTIFACT_NAME: ${{ matrix.config.artifact }}
151151
run: |
@@ -187,6 +187,7 @@ jobs:
187187
throw new Error("Could not find node versions");
188188
}
189189
190+
$.verbose = true;
190191
await $`mkdir -p bins`;
191192
192193
async function buildBinary(arch, flags = [], nodeTarget = nodeVersion) {
@@ -253,28 +254,28 @@ jobs:
253254
# unzip -d ./upx "./upxInstallations/upx-${UPX_VERSION}-win64.zip"
254255
# mv "./upx/upx-${UPX_VERSION}-win64" ./upx/upx
255256
#
256-
# ./upx/upx/upx.exe --best ./bins/win-x64-cuda/llama-addon.node
257+
# ./upx/upx/upx.exe --best ./bins/win-x64-cuda/Release/ggml.dll
257258

258-
- name: Compress CUDA binary on Ubuntu
259-
if: matrix.config.name == 'Ubuntu'
260-
env:
261-
UPX_VERSION: 4.2.4
262-
run: |
263-
mkdir -p upxInstallations
264-
265-
if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" ]; then
266-
pushd upxInstallations
267-
curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-amd64_linux.tar.xz"
268-
popd
269-
fi
270-
271-
mkdir -p upx
272-
tar -xvf "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" -C ./upx
273-
mv "./upx/upx-${UPX_VERSION}-amd64_linux" ./upx/upx
274-
275-
chmod +x ./bins/linux-x64-cuda/llama-addon.node
276-
./upx/upx/upx --best ./bins/linux-x64-cuda/llama-addon.node
277-
chmod -x ./bins/linux-x64-cuda/llama-addon.node
259+
# - name: Compress CUDA binary on Ubuntu
260+
# if: matrix.config.name == 'Ubuntu'
261+
# env:
262+
# UPX_VERSION: 4.2.4
263+
# run: |
264+
# mkdir -p upxInstallations
265+
#
266+
# if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" ]; then
267+
# pushd upxInstallations
268+
# curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-amd64_linux.tar.xz"
269+
# popd
270+
# fi
271+
#
272+
# mkdir -p upx
273+
# tar -xvf "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" -C ./upx
274+
# mv "./upx/upx-${UPX_VERSION}-amd64_linux" ./upx/upx
275+
#
276+
# chmod +x ./bins/linux-x64-cuda/llama-addon.node
277+
# ./upx/upx/upx --best ./bins/linux-x64-cuda/libggml.so
278+
# chmod -x ./bins/linux-x64-cuda/llama-addon.node
278279

279280
- name: Publish artifact
280281
uses: actions/upload-artifact@v4
@@ -423,13 +424,21 @@ jobs:
423424
424425
echo "Built binaries:"
425426
ls bins
427+
- name: Move binaries to standalone prebuilt binary modules
428+
run: npx --no vite-node ./scripts/movePrebuiltBinariesToStandaloneModules.ts
429+
- name: Prepare standalone prebuilt binary modules
430+
run: npx --no vite-node ./scripts/prepareStandalonePrebuiltBinaryModules.ts
426431
- name: Add "postinstall" script to package.json
427432
run: npm run addPostinstallScript
428433
- name: Release
429434
env:
430435
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
431436
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
432-
run: npx semantic-release
437+
GH_RELEASE_REF: ${{ github.ref }}
438+
run: |
439+
echo "//registry.npmjs.org/:_authToken=\${NPM_TOKEN}" > ~/.npmrc
440+
441+
npx semantic-release
433442
- name: Set npm package url to GITHUB_OUTPUT
434443
id: set-npm-url
435444
run: |

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
/.vscode
33
node_modules
44
.DS_Store
5+
*.cpuprofile
56

67
/dist
78
/docs-site

.releaserc.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ export default {
3939
"footerPartial": newFooterTemplate
4040
}
4141
}],
42+
["@semantic-release/exec", {
43+
"publishCmd": "npx --no vite-node ./scripts/publishStandalonePrebuiltBinaryModules.ts --packageVersion \"${nextRelease.version}\""
44+
}],
4245
"@semantic-release/npm",
4346
["@semantic-release/github", {
4447
"discussionCategoryName": "Releases"

llama/CMakeLists.txt

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
cmake_minimum_required(VERSION 3.13)
1+
cmake_minimum_required(VERSION 3.14)
22

33
project("llama-addon" C CXX)
44

55
if (MSVC)
6-
if (LLAMA_STATIC)
6+
if (GGML_STATIC)
77
add_link_options(-static)
88
if (MINGW)
99
add_link_options(-static-libgcc -static-libstdc++)
@@ -29,7 +29,7 @@ include_directories("gpuInfo")
2929
include_directories("llama.cpp")
3030
include_directories("./llama.cpp/common")
3131

32-
if (LLAMA_CUDA)
32+
if (GGML_CUDA)
3333
cmake_minimum_required(VERSION 3.17)
3434

3535
find_package(CUDAToolkit)
@@ -43,33 +43,33 @@ if (LLAMA_CUDA)
4343

4444
add_compile_definitions(GPU_INFO_USE_CUDA)
4545

46-
if (LLAMA_STATIC)
47-
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
46+
if (GGML_STATIC)
47+
set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
4848
else()
49-
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart)
49+
set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart)
5050
endif()
5151

5252
set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cuda_driver)
5353

5454
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
5555
# copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
56-
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
57-
set(CMAKE_CUDA_ARCHITECTURES "60;61;70")
56+
if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
57+
set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
5858
else()
59-
set(CMAKE_CUDA_ARCHITECTURES "52;61;70")
59+
set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
6060
endif()
6161
endif()
6262
else()
6363
message(FATAL_ERROR "CUDA was not found")
6464
endif()
6565
endif()
6666

67-
if (LLAMA_VULKAN OR LLAMA_KOMPUTE)
67+
if (GGML_VULKAN OR GGML_KOMPUTE)
6868
find_package(Vulkan)
6969
if (Vulkan_FOUND)
70-
if (LLAMA_VULKAN)
70+
if (GGML_VULKAN)
7171
message(STATUS "Using Vulkan for GPU info")
72-
elseif (LLAMA_KOMPUTE)
72+
elseif (GGML_KOMPUTE)
7373
message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
7474
endif()
7575

@@ -84,7 +84,7 @@ if (LLAMA_VULKAN OR LLAMA_KOMPUTE)
8484
endif()
8585
endif()
8686

87-
if (LLAMA_HIPBLAS)
87+
if (GGML_HIPBLAS)
8888
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
8989

9090
if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
@@ -111,7 +111,7 @@ if (LLAMA_HIPBLAS)
111111
endif()
112112
endif()
113113

114-
if (LLAMA_METAL)
114+
if (GGML_METAL)
115115
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
116116
find_library(METAL_FRAMEWORK Metal REQUIRED)
117117
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)

llama/addon.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_mod
235235
if (token < 0) {
236236
return Napi::Number::From(info.Env(), -1);
237237
}
238-
238+
239239
auto tokenAttributes = llama_token_get_attr(model, token);
240240

241241
if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
@@ -1810,7 +1810,7 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi
18101810
fputs(text, stdout);
18111811
fflush(stdout);
18121812
}
1813-
}
1813+
}
18141814
}
18151815

18161816
Napi::Value setLogger(const Napi::CallbackInfo& info) {

package.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
"scripts": {
4444
"prepare": "[ \"$CI\" = true ] || [ -d '.husky/_' ] || husky",
4545
"postinstall": "cd templates && npm install",
46+
"postversion": "vite-node scripts/postVersion.ts",
4647
"prebuild": "rimraf ./dist ./tsconfig.tsbuildinfo",
4748
"build": "tsc --build tsconfig.json --force && npm run build:packTemplates",
4849
"build:packTemplates": "vite-node scripts/packTemplates.ts",
@@ -105,6 +106,7 @@
105106
],
106107
"author": "Gilad S.",
107108
"license": "MIT",
109+
"preferUnplugged": true,
108110
"bugs": {
109111
"url": "https://github.com/withcatai/node-llama-cpp/issues"
110112
},
@@ -189,5 +191,9 @@
189191
"typescript": {
190192
"optional": true
191193
}
194+
},
195+
"optionalDependencies": {
196+
"@node-llama-cpp/win-x64-cuda": "0.1.0",
197+
"@node-llama-cpp/linux-x64-cuda": "0.1.0"
192198
}
193199
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/dist
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2024 Gilad S.
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# [`node-llama-cpp`](https://github.com/withcatai/node-llama-cpp)
2+
This is a prebuilt binary package for [`node-llama-cpp`](https://github.com/withcatai/node-llama-cpp) for Linux x64 with CUDA support.
3+
4+
Do not install this package directly.

packages/@node-llama-cpp/linux-x64-cuda/package-lock.json

Lines changed: 38 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)