Skip to content

Commit d161bcd

Browse files
authored
feat: Vulkan support (#171)
* feat: Vulkan support * fix: free llama backend when garbage collected
1 parent e8687de commit d161bcd

31 files changed

+361
-79
lines changed

.github/ISSUE_TEMPLATE/bug-report.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,12 @@ body:
7676
required: false
7777
- label: CUDA support
7878
required: false
79+
- label: Vulkan support
80+
required: false
7981
- label: Grammar
8082
required: false
83+
- label: Function calling
84+
required: false
8185
- type: dropdown
8286
id: pr
8387
attributes:

.github/workflows/build.yml

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
name: Build
22
on:
33
push:
4-
4+
branches:
5+
- master
6+
- beta
7+
pull_request:
58
workflow_dispatch:
69

710
jobs:
@@ -116,6 +119,24 @@ jobs:
116119
cuda: '12.2.0'
117120
method: 'network'
118121

122+
- name: Install Vulkan SDK on Windows
123+
if: startsWith(matrix.config.os, 'windows')
124+
env:
125+
VULKAN_VERSION: 1.3.261.1
126+
run: |
127+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
128+
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
129+
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
130+
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
131+
132+
- name: Install Vulkan SDK on Ubuntu
133+
if: startsWith(matrix.config.name, 'Ubuntu GCC')
134+
run: |
135+
wget -qO- https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo tee /etc/apt/trusted.gpg.d/lunarg.asc
136+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
137+
sudo apt update
138+
sudo apt install vulkan-sdk
139+
119140
- name: Install dependencies on macOS
120141
if: startsWith(matrix.config.os, 'macos')
121142
run: |
@@ -179,10 +200,12 @@ jobs:
179200
if (process.env.ARTIFACT_NAME === "win") {
180201
await buildBinary("x64");
181202
await buildBinary("x64", ["--cuda"]);
203+
await buildBinary("x64", ["--vulkan"]);
182204
// await buildBinary("arm64", [], windowsOnArmNodeVersion); // disabled arm64 for now as compilation doesn't work
183205
} else if (process.env.ARTIFACT_NAME === "linux") {
184206
await buildBinary("x64");
185207
await buildBinary("x64", ["--cuda"]);
208+
await buildBinary("x64", ["--vulkan"]);
186209
await buildBinary("arm64");
187210
await buildBinary("armv7l");
188211
} else if (process.env.ARTIFACT_NAME === "mac") {
@@ -299,7 +322,7 @@ jobs:
299322

300323
release:
301324
name: Release
302-
if: github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta'
325+
if: github.event_name == 'push' && (github.ref == 'refs/heads/master' || github.ref == 'refs/heads/beta')
303326
runs-on: ubuntu-latest
304327
concurrency: release-${{ github.ref }}
305328
environment:
@@ -367,7 +390,7 @@ jobs:
367390
npm run docs:build
368391
- name: Upload docs to GitHub Pages
369392
if: steps.set-npm-url.outputs.npm-url != '' && github.ref == 'refs/heads/master'
370-
uses: actions/upload-artifact@v4
393+
uses: actions/upload-pages-artifact@v3
371394
with:
372395
name: pages-docs
373396
path: docs-site

.github/workflows/prLint.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ on:
33
pull_request:
44
pull_request_target:
55
types: [opened, reopened, edited, synchronize]
6+
67
jobs:
78
lint:
89
name: Lint

.github/workflows/test.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
name: Test
2-
on: [push]
2+
on:
3+
push:
4+
branches:
5+
- master
6+
- beta
7+
pull_request:
8+
workflow_dispatch:
9+
310
jobs:
411
test:
512
name: Test

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919

2020
## Features
2121
* Run a text generation model locally on your machine
22-
* Metal and CUDA support
22+
* Metal, CUDA and Vulkan support
2323
* Pre-built binaries are provided, with a fallback to building from source without `node-gyp` or Python
2424
* Chat with a model using a chat wrapper
2525
* Use the CLI to chat with a model without writing any code

docs/guide/chat-prompt-wrapper.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ and parse its response to know whether it finished answering, or should we tell
77
For example, to prompt a model with "Where do llamas come from?" we can give the model a text like this to predict the completion of:
88
```txt
99
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
10-
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct.
11-
If you don't know the answer to a question, please don't share false information.
10+
If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly.
11+
If you don't know the answer to a question, don't share false information.
1212
1313
### Human
1414
Where do llamas come from?

docs/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ hero:
2020

2121
features:
2222
- icon: 🚀
23-
title: Metal and CUDA support
23+
title: Metal, CUDA and Vulkan support
2424
details: Utilize the power of your GPU to run AI models faster
2525
link: /guide/#cuda-and-metal-support
2626
linkText: Learn more

llama/CMakeLists.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,26 @@ if (LLAMA_CUBLAS)
5858
endif()
5959
endif()
6060

61+
if (LLAMA_VULKAN OR LLAMA_KOMPUTE)
62+
find_package(Vulkan)
63+
if (Vulkan_FOUND)
64+
if (LLAMA_VULKAN)
65+
message(STATUS "Using Vulkan for GPU info")
66+
elseif (LLAMA_KOMPUTE)
67+
message(STATUS "Using Vulkan for GPU info because Kompute is enabled")
68+
endif()
69+
70+
set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/vulkan-gpu-info.h)
71+
set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/vulkan-gpu-info.cpp)
72+
73+
add_compile_definitions(GPU_INFO_USE_VULKAN)
74+
75+
set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} Vulkan::Vulkan)
76+
else()
77+
message(WARNING "Vulkan not found. Not using it for GPU info")
78+
endif()
79+
endif()
80+
6181
if (LLAMA_HIPBLAS)
6282
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
6383

llama/addon.cpp

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
#ifdef GPU_INFO_USE_CUBLAS
1313
# include "gpuInfo/cuda-gpu-info.h"
1414
#endif
15+
#ifdef GPU_INFO_USE_VULKAN
16+
# include "gpuInfo/vulkan-gpu-info.h"
17+
#endif
1518
#ifdef GPU_INFO_USE_METAL
1619
# include "gpuInfo/metal-gpu-info.h"
1720
#endif
@@ -35,6 +38,7 @@ using AddonThreadSafeLogCallbackFunction =
3538
AddonThreadSafeLogCallbackFunction addonThreadSafeLoggerCallback;
3639
bool addonJsLoggerCallbackSet = false;
3740
int addonLoggerLogLevel = 5;
41+
bool backendInitialized = false;
3842

3943
std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token) {
4044
std::vector<char> result(8, 0);
@@ -51,10 +55,15 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
5155
}
5256

5357
#ifdef GPU_INFO_USE_CUBLAS
54-
void lodCudaError(const char* message) {
58+
void logCudaError(const char* message) {
5559
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
5660
}
5761
#endif
62+
#ifdef GPU_INFO_USE_VULKAN
63+
void logVulkanWarning(const char* message) {
64+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_WARN, (std::string("Vulkan warning: ") + std::string(message)).c_str(), nullptr);
65+
}
66+
#endif
5867

5968
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
6069
uint64_t total = 0;
@@ -63,14 +72,25 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
6372
#ifdef GPU_INFO_USE_CUBLAS
6473
size_t cudaDeviceTotal = 0;
6574
size_t cudaDeviceUsed = 0;
66-
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
75+
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, logCudaError);
6776

6877
if (cudeGetInfoSuccess) {
6978
total += cudaDeviceTotal;
7079
used += cudaDeviceUsed;
7180
}
7281
#endif
7382

83+
#ifdef GPU_INFO_USE_VULKAN
84+
uint64_t vulkanDeviceTotal = 0;
85+
uint64_t vulkanDeviceUsed = 0;
86+
const bool vulkanDeviceSupportsMemoryBudgetExtension = gpuInfoGetTotalVulkanDevicesInfo(&vulkanDeviceTotal, &vulkanDeviceUsed, logVulkanWarning);
87+
88+
if (vulkanDeviceSupportsMemoryBudgetExtension) {
89+
total += vulkanDeviceTotal;
90+
used += vulkanDeviceUsed;
91+
}
92+
#endif
93+
7494
#ifdef GPU_INFO_USE_METAL
7595
uint64_t metalDeviceTotal = 0;
7696
uint64_t metalDeviceUsed = 0;
@@ -950,7 +970,7 @@ void addonCallJsLogCallback(
950970
called = false;
951971
}
952972
}
953-
973+
954974
if (!called && data != nullptr) {
955975
if (data->logLevelNumber == 2) {
956976
fputs(data->stringStream->str().c_str(), stderr);
@@ -1046,8 +1066,17 @@ Napi::Value setLoggerLogLevel(const Napi::CallbackInfo& info) {
10461066
return info.Env().Undefined();
10471067
}
10481068

1069+
static void addonFreeLlamaBackend(Napi::Env env, int* data) {
1070+
if (backendInitialized) {
1071+
llama_backend_free();
1072+
backendInitialized = false;
1073+
}
1074+
}
1075+
10491076
Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
10501077
llama_backend_init();
1078+
backendInitialized = true;
1079+
10511080
exports.DefineProperties({
10521081
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
10531082
Napi::PropertyDescriptor::Function("setLogger", setLogger),
@@ -1061,6 +1090,8 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
10611090

10621091
llama_log_set(addonLlamaCppLogCallback, nullptr);
10631092

1093+
exports.AddFinalizer(addonFreeLlamaBackend, static_cast<int*>(nullptr));
1094+
10641095
return exports;
10651096
}
10661097

llama/gpuInfo/cuda-gpu-info.cu

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515
#endif
1616

1717

18-
typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
18+
typedef void (*gpuInfoCudaErrorLogCallback_t)(const char* message);
1919

20-
bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
20+
bool gpuInfoSetCudaDevice(const int device, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
2121
int current_device;
2222
auto getDeviceResult = cudaGetDevice(&current_device);
2323

@@ -40,7 +40,7 @@ bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCa
4040
return true;
4141
}
4242

43-
bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
43+
bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
4444
gpuInfoSetCudaDevice(device, errorLogCallback);
4545

4646
size_t freeMem;
@@ -58,7 +58,7 @@ bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfo
5858
return true;
5959
}
6060

61-
int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
61+
int gpuInfoGetCudaDeviceCount(gpuInfoCudaErrorLogCallback_t errorLogCallback) {
6262
int deviceCount;
6363
auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
6464

@@ -70,7 +70,7 @@ int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
7070
return deviceCount;
7171
}
7272

73-
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
73+
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoCudaErrorLogCallback_t errorLogCallback) {
7474
int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
7575

7676
if (deviceCount < 0) {

0 commit comments

Comments
 (0)