withcatai
diff --git a/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 26 additions & 3 deletions b/‎.github/workflows/build.yml‎
Lines changed: 26 additions & 3 deletions
diff --git a/‎.github/workflows/prLint.yml‎
Lines changed: 1 addition & 0 deletions b/‎.github/workflows/prLint.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 8 additions & 1 deletion b/‎.github/workflows/test.yml‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/guide/chat-prompt-wrapper.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/guide/chat-prompt-wrapper.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/index.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/index.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama/CMakeLists.txt‎
Lines changed: 20 additions & 0 deletions b/‎llama/CMakeLists.txt‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎llama/addon.cpp‎
Lines changed: 34 additions & 3 deletions b/‎llama/addon.cpp‎
Lines changed: 34 additions & 3 deletions
diff --git a/‎llama/gpuInfo/cuda-gpu-info.cu‎
Lines changed: 5 additions & 5 deletions b/‎llama/gpuInfo/cuda-gpu-info.cu‎
Lines changed: 5 additions & 5 deletions
@@ -76,8 +76,12 @@ body:
           required: false
         - label: CUDA support
           required: false
+        - label: Vulkan support
+          required: false
         - label: Grammar
           required: false
+        - label: Function calling
+          required: false
   - type: dropdown
     id: pr
     attributes:
 
@@ -1,7 +1,10 @@
 name: Build
 on:
   push:
-
+    branches:
+      - master
+      - beta
+  pull_request:
   workflow_dispatch:
 
 jobs:
@@ -116,6 +119,24 @@ jobs:
           cuda: '12.2.0'
           method: 'network'
 
+      - name: Install Vulkan SDK on Windows
+        if: startsWith(matrix.config.os, 'windows')
+        env:
+          VULKAN_VERSION: 1.3.261.1
+        run: |
+          curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
+          & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
+          Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
+          Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
+
+      - name: Install Vulkan SDK on Ubuntu
+        if: startsWith(matrix.config.name, 'Ubuntu GCC')
+        run: |
+          wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
+          sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
+          sudo apt update
+          sudo apt install vulkan-sdk
+
       - name: Install dependencies on macOS
         if: startsWith(matrix.config.os, 'macos')
         run: |
@@ -179,10 +200,12 @@ jobs:
           if (process.env.ARTIFACT_NAME === "win") {
             await buildBinary("x64");
             await buildBinary("x64", ["--cuda"]);
+            await buildBinary("x64", ["--vulkan"]);
             // await buildBinary("arm64", [], windowsOnArmNodeVersion); // disabled arm64 for now as compilation doesn't work
           } else if (process.env.ARTIFACT_NAME === "linux") {
             await buildBinary("x64");
             await buildBinary("x64", ["--cuda"]);
+            await buildBinary("x64", ["--vulkan"]);
             await buildBinary("arm64");
             await buildBinary("armv7l");
           } else if (process.env.ARTIFACT_NAME === "mac") {
@@ -299,7 +322,7 @@ jobs:
 
   release:
     name: Release
-    if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta'
+    if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta')
     runs-on: ubuntu-latest
     concurrency: release-${{ github.ref }}
     environment:
@@ -367,7 +390,7 @@ jobs:
           npm run docs:build
       - name: Upload docs to GitHub Pages
         if: steps.set-npm-url.outputs.npm-url != '' && github.ref == 'refs/heads/master'
-        uses: actions/upload-artifact@v4
+        uses: actions/upload-pages-artifact@v3
         with:
           name: pages-docs
           path: docs-site
 
@@ -3,6 +3,7 @@ on:
   pull_request:
   pull_request_target:
     types: [opened, reopened, edited, synchronize]
+
 jobs:
   lint:
     name: Lint
 
@@ -1,5 +1,12 @@
 name: Test
-on: [push]
+on:
+  push:
+    branches:
+      - master
+      - beta
+  pull_request:
+  workflow_dispatch:
+
 jobs:
   test:
     name: Test
 
@@ -19,7 +19,7 @@
 
 ## Features
 * Run a text generation model locally on your machine
-* Metal and CUDA support
+* Metal, CUDA and Vulkan support
 * Pre-built binaries are provided, with a fallback to building from source without `node-gyp` or Python
 * Chat with a model using a chat wrapper
 * Use the CLI to chat with a model without writing any code
 
@@ -7,8 +7,8 @@ and parse its response to know whether it finished answering, or should we tell
 For example, to prompt a model with "Where do llamas come from?" we can give the model a text like this to predict the completion of:
 ```txt
 You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
-If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct.
-If you don't know the answer to a question, please don't share false information.
+If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly.
+If you don't know the answer to a question, don't share false information.
 
 ### Human
 Where do llamas come from?
 
@@ -20,7 +20,7 @@ hero:
 
 features:
   - icon: 🚀
-    title: Metal and CUDA support
+    title: Metal, CUDA and Vulkan support
     details: Utilize the power of your GPU to run AI models faster
     link: /guide/#cuda-and-metal-support
     linkText: Learn more
 
@@ -58,6 +58,26 @@ if (LLAMA_CUBLAS)
     endif()
 endif()
 
+if (LLAMA_VULKAN OR LLAMA_KOMPUTE)
+    find_package(Vulkan)
+    if (Vulkan_FOUND)
+        if (LLAMA_VULKAN)
+            message(STATUS "Using Vulkan for GPU info")
+        elseif (LLAMA_KOMPUTE)
+            message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
+        endif()
+
+        set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/vulkan-gpu-info.h)
+        set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/vulkan-gpu-info.cpp)
+
+        add_compile_definitions(GPU_INFO_USE_VULKAN)
+
+        set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} Vulkan::Vulkan)
+    else()
+        message(WARNING "Vulkan not found. Not using it for GPU info")
+    endif()
+endif()
+
 if (LLAMA_HIPBLAS)
     list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
 
 
@@ -12,6 +12,9 @@
 #ifdef GPU_INFO_USE_CUBLAS
 #  include "gpuInfo/cuda-gpu-info.h"
 #endif
+#ifdef GPU_INFO_USE_VULKAN
+#  include "gpuInfo/vulkan-gpu-info.h"
+#endif
 #ifdef GPU_INFO_USE_METAL
 #  include "gpuInfo/metal-gpu-info.h"
 #endif
@@ -35,6 +38,7 @@ using AddonThreadSafeLogCallbackFunction =
 AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
 bool addonJsLoggerCallbackSet = false;
 int addonLoggerLogLevel = 5;
+bool backendInitialized = false;
 
 std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
     std::vector<char> result(8, 0);
@@ -51,10 +55,15 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
 }
 
 #ifdef GPU_INFO_USE_CUBLAS
-void lodCudaError(const char* message) {
+void logCudaError(const char* message) {
     addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
 }
 #endif
+#ifdef GPU_INFO_USE_VULKAN
+void logVulkanWarning(const char* message) {
+    addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
+}
+#endif
 
 Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
     uint64_t total = 0;
@@ -63,14 +72,25 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
 #ifdef GPU_INFO_USE_CUBLAS
     size_t cudaDeviceTotal = 0;
     size_t cudaDeviceUsed = 0;
-    bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
+    bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
 
     if (cudeGetInfoSuccess) {
         total += cudaDeviceTotal;
         used += cudaDeviceUsed;
     }
 #endif
 
+#ifdef GPU_INFO_USE_VULKAN
+    uint64_t vulkanDeviceTotal = 0;
+    uint64_t vulkanDeviceUsed = 0;
+    const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
+
+    if (vulkanDeviceSupportsMemoryBudgetExtension) {
+        total += vulkanDeviceTotal;
+        used += vulkanDeviceUsed;
+    }
+#endif
+
 #ifdef GPU_INFO_USE_METAL
     uint64_t metalDeviceTotal = 0;
     uint64_t metalDeviceUsed = 0;
@@ -950,7 +970,7 @@ void addonCallJsLogCallback(
             called = false;
         }
     }
-    
+
     if (!called && data != nullptr) {
         if (data->logLevelNumber == 2) {
             fputs(data->stringStream->str().c_str(), stderr);
@@ -1046,8 +1066,17 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
     return info.Env().Undefined();
 }
 
+static void addonFreeLlamaBackend(Napi::Env env, int* data) {
+    if (backendInitialized) {
+        llama_backend_free();
+        backendInitialized = false;
+    }
+}
+
 Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
     llama_backend_init();
+    backendInitialized = true;
+
     exports.DefineProperties({
         Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
         Napi::PropertyDescriptor::Function("setLogger", setLogger),
@@ -1061,6 +1090,8 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
 
     llama_log_set(addonLlamaCppLogCallback, nullptr);
 
+    exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
+
     return exports;
 }
 
 
@@ -15,9 +15,9 @@
 #endif
 
 
-typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
+typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
 
-bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
+bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
     int current_device;
     auto getDeviceResult = cudaGetDevice(&current_device);
 
@@ -40,7 +40,7 @@ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCa
     return true;
 }
 
-bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
+bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
     gpuInfoSetCudaDevice(device, errorLogCallback);
 
     size_t freeMem;
@@ -58,7 +58,7 @@ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfo
     return true;
 }
 
-int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
+int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) {
     int deviceCount;
     auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
 
@@ -70,7 +70,7 @@ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
     return deviceCount;
 }
 
-bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
+bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
     int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
 
     if (deviceCount < 0) {