withcatai
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 32 additions & 23 deletions b/‎.github/workflows/build.yml‎
Lines changed: 32 additions & 23 deletions
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.releaserc.ts‎
Lines changed: 3 additions & 0 deletions b/‎.releaserc.ts‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎llama/CMakeLists.txt‎
Lines changed: 14 additions & 14 deletions b/‎llama/CMakeLists.txt‎
Lines changed: 14 additions & 14 deletions
diff --git a/‎llama/addon.cpp‎
Lines changed: 2 additions & 2 deletions b/‎llama/addon.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎package.json‎
Lines changed: 6 additions & 0 deletions b/‎package.json‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎packages/@node-llama-cpp/linux-x64-cuda/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎packages/@node-llama-cpp/linux-x64-cuda/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎packages/@node-llama-cpp/linux-x64-cuda/LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎packages/@node-llama-cpp/linux-x64-cuda/LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎packages/@node-llama-cpp/linux-x64-cuda/README.md‎
Lines changed: 4 additions & 0 deletions b/‎packages/@node-llama-cpp/linux-x64-cuda/README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎packages/@node-llama-cpp/linux-x64-cuda/package-lock.json‎
Lines changed: 38 additions & 0 deletions b/‎packages/@node-llama-cpp/linux-x64-cuda/package-lock.json‎
Lines changed: 38 additions & 0 deletions
@@ -145,7 +145,7 @@ jobs:
       - name: Setup & Build
         id: build
         shell: bash
-        timeout-minutes: 40
+        timeout-minutes: 80
         env:
           ARTIFACT_NAME: ${{ matrix.config.artifact }}
         run: |
@@ -187,6 +187,7 @@ jobs:
             throw new Error("Could not find node versions");
           }
           
+          $.verbose = true;
           await $`mkdir -p bins`;
           
           async function buildBinary(arch, flags = [], nodeTarget = nodeVersion) {
@@ -253,28 +254,28 @@ jobs:
 #          unzip -d ./upx "./upxInstallations/upx-${UPX_VERSION}-win64.zip"
 #          mv "./upx/upx-${UPX_VERSION}-win64" ./upx/upx
 #
-#          ./upx/upx/upx.exe --best ./bins/win-x64-cuda/llama-addon.node
+#          ./upx/upx/upx.exe --best ./bins/win-x64-cuda/Release/ggml.dll
 
-      - name: Compress CUDA binary on Ubuntu
-        if: matrix.config.name == 'Ubuntu'
-        env:
-          UPX_VERSION: 4.2.4
-        run: |
-          mkdir -p upxInstallations
-
-          if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" ]; then
-            pushd upxInstallations
-            curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-amd64_linux.tar.xz"
-            popd
-          fi
-
-          mkdir -p upx
-          tar -xvf "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" -C ./upx
-          mv "./upx/upx-${UPX_VERSION}-amd64_linux" ./upx/upx
-
-          chmod +x ./bins/linux-x64-cuda/llama-addon.node
-          ./upx/upx/upx --best ./bins/linux-x64-cuda/llama-addon.node
-          chmod -x ./bins/linux-x64-cuda/llama-addon.node
+#      - name: Compress CUDA binary on Ubuntu
+#        if: matrix.config.name == 'Ubuntu'
+#        env:
+#          UPX_VERSION: 4.2.4
+#        run: |
+#          mkdir -p upxInstallations
+#
+#          if [ ! -f "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" ]; then
+#            pushd upxInstallations
+#            curl -OL "https://github.com/upx/upx/releases/download/v${UPX_VERSION}/upx-${UPX_VERSION}-amd64_linux.tar.xz"
+#            popd
+#          fi
+#
+#          mkdir -p upx
+#          tar -xvf "./upxInstallations/upx-${UPX_VERSION}-amd64_linux.tar.xz" -C ./upx
+#          mv "./upx/upx-${UPX_VERSION}-amd64_linux" ./upx/upx
+#
+#          chmod +x ./bins/linux-x64-cuda/llama-addon.node
+#          ./upx/upx/upx --best ./bins/linux-x64-cuda/libggml.so
+#          chmod -x ./bins/linux-x64-cuda/llama-addon.node
 
       - name: Publish artifact
         uses: actions/upload-artifact@v4
@@ -423,13 +424,21 @@ jobs:
           
           echo "Built binaries:"
           ls bins
+      - name: Move binaries to standalone prebuilt binary modules
+        run: npx --no vite-node ./scripts/movePrebuiltBinariesToStandaloneModules.ts
+      - name: Prepare standalone prebuilt binary modules
+        run: npx --no vite-node ./scripts/prepareStandalonePrebuiltBinaryModules.ts
       - name: Add "postinstall" script to package.json
         run: npm run addPostinstallScript
       - name: Release
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
-        run: npx semantic-release
+          GH_RELEASE_REF: ${{ github.ref }}
+        run: |
+          echo "//registry.npmjs.org/:_authToken=\${NPM_TOKEN}" > ~/.npmrc
+          
+          npx semantic-release
       - name: Set npm package url to GITHUB_OUTPUT
         id: set-npm-url
         run: |
 
@@ -2,6 +2,7 @@
 /.vscode
 node_modules
 .DS_Store
+*.cpuprofile
 
 /dist
 /docs-site
 
@@ -39,6 +39,9 @@ export default {
                 "footerPartial": newFooterTemplate
             }
         }],
+        ["@semantic-release/exec", {
+            "publishCmd": "npx --no vite-node ./scripts/publishStandalonePrebuiltBinaryModules.ts --packageVersion \"${nextRelease.version}\""
+        }],
         "@semantic-release/npm",
         ["@semantic-release/github", {
             "discussionCategoryName": "Releases"
 
@@ -1,9 +1,9 @@
-cmake_minimum_required(VERSION 3.13)
+cmake_minimum_required(VERSION 3.14)
 
 project("llama-addon" C CXX)
 
 if (MSVC)
-    if (LLAMA_STATIC)
+    if (GGML_STATIC)
         add_link_options(-static)
         if (MINGW)
             add_link_options(-static-libgcc -static-libstdc++)
@@ -29,7 +29,7 @@ include_directories("gpuInfo")
 include_directories("llama.cpp")
 include_directories("./llama.cpp/common")
 
-if (LLAMA_CUDA)
+if (GGML_CUDA)
     cmake_minimum_required(VERSION 3.17)
 
     find_package(CUDAToolkit)
@@ -43,33 +43,33 @@ if (LLAMA_CUDA)
 
         add_compile_definitions(GPU_INFO_USE_CUDA)
 
-        if (LLAMA_STATIC)
-            set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
+        if (GGML_STATIC)
+            set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
         else()
-            set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart)
+            set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart)
         endif()
 
         set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cuda_driver)
 
         if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
             # copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
-            if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
-                set(CMAKE_CUDA_ARCHITECTURES "60;61;70")
+            if (GGML_CUDA_F16 OR GGML_CUDA_DMMV_F16)
+                set(CMAKE_CUDA_ARCHITECTURES "60;61;70;75")
             else()
-                set(CMAKE_CUDA_ARCHITECTURES "52;61;70")
+                set(CMAKE_CUDA_ARCHITECTURES "52;61;70;75")
             endif()
         endif()
     else()
         message(FATAL_ERROR "CUDA was not found")
     endif()
 endif()
 
-if (LLAMA_VULKAN OR LLAMA_KOMPUTE)
+if (GGML_VULKAN OR GGML_KOMPUTE)
     find_package(Vulkan)
     if (Vulkan_FOUND)
-        if (LLAMA_VULKAN)
+        if (GGML_VULKAN)
             message(STATUS "Using Vulkan for GPU info")
-        elseif (LLAMA_KOMPUTE)
+        elseif (GGML_KOMPUTE)
             message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
         endif()
 
@@ -84,7 +84,7 @@ if (LLAMA_VULKAN OR LLAMA_KOMPUTE)
     endif()
 endif()
 
-if (LLAMA_HIPBLAS)
+if (GGML_HIPBLAS)
     list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
 
     if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
@@ -111,7 +111,7 @@ if (LLAMA_HIPBLAS)
     endif()
 endif()
 
-if (LLAMA_METAL)
+if (GGML_METAL)
     find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
     find_library(METAL_FRAMEWORK    Metal      REQUIRED)
     find_library(METALKIT_FRAMEWORK MetalKit   REQUIRED)
 
@@ -235,7 +235,7 @@ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_mod
     if (token < 0) {
         return Napi::Number::From(info.Env(), -1);
     }
-    
+
     auto tokenAttributes = llama_token_get_attr(model, token);
 
     if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {
@@ -1810,7 +1810,7 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi
             fputs(text, stdout);
             fflush(stdout);
         }
-    }    
+    }
 }
 
 Napi::Value setLogger(const Napi::CallbackInfo& info) {
 
@@ -43,6 +43,7 @@
   "scripts": {
     "prepare": "[ \"$CI\" = true ] || [ -d '.husky/_' ] || husky",
     "postinstall": "cd templates && npm install",
+    "postversion": "vite-node scripts/postVersion.ts",
     "prebuild": "rimraf ./dist ./tsconfig.tsbuildinfo",
     "build": "tsc --build tsconfig.json --force && npm run build:packTemplates",
     "build:packTemplates": "vite-node scripts/packTemplates.ts",
@@ -105,6 +106,7 @@
   ],
   "author": "Gilad S.",
   "license": "MIT",
+  "preferUnplugged": true,
   "bugs": {
     "url": "https://github.com/withcatai/node-llama-cpp/issues"
   },
@@ -189,5 +191,9 @@
     "typescript": {
       "optional": true
     }
+  },
+  "optionalDependencies": {
+    "@node-llama-cpp/win-x64-cuda": "0.1.0",
+    "@node-llama-cpp/linux-x64-cuda": "0.1.0"
   }
 }
@@ -0,0 +1 @@
+/dist
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 Gilad S.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,4 @@
+# [`node-llama-cpp`](https://github.com/withcatai/node-llama-cpp)
+This is a prebuilt binary package for [`node-llama-cpp`](https://github.com/withcatai/node-llama-cpp) for Linux x64 with CUDA support.
+
+Do not install this package directly.
Original file line number	Diff line number	Diff line change
`@@ -235,7 +235,7 @@ static Napi::Value getNapiControlToken(const Napi::CallbackInfo& info, llama_mod`
`235`	`235`	`if (token < 0) {`
`236`	`236`	`return Napi::Number::From(info.Env(), -1);`
`237`	`237`	`}`
`238`		`-`
	`238`	`+`
`239`	`239`	`auto tokenAttributes = llama_token_get_attr(model, token);`
`240`	`240`
`241`	`241`	`if (!(tokenAttributes & LLAMA_TOKEN_ATTR_CONTROL) && !(tokenAttributes & LLAMA_TOKEN_ATTR_UNDEFINED)) {`
`@@ -1810,7 +1810,7 @@ static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, voi`
`1810`	`1810`	`fputs(text, stdout);`
`1811`	`1811`	`fflush(stdout);`
`1812`	`1812`	`}`
`1813`		`- }`
	`1813`	`+ }`
`1814`	`1814`	`}`
`1815`	`1815`
`1816`	`1816`	`Napi::Value setLogger(const Napi::CallbackInfo& info) {`