Skip to content

Commit 46235a2

Browse files
authored
feat: get VRAM state (#161)
* feat: get VRAM state * feat: `chatWrapper` getter on a `LlamaChatSession` * fix(`resolveChatWrapperBasedOnModel`): use llamaChat wrapper for llama models only if there's a `chat` sub-variant * fix: update latest build on postinstall compilation
1 parent 61ea38f commit 46235a2

24 files changed

+2050
-246
lines changed

llama/CMakeLists.txt

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,102 @@ execute_process(COMMAND node -p "require('node-addon-api').include.slice(1,-1)"
1919
include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
2020

2121
add_subdirectory("llama.cpp")
22+
include_directories("gpuInfo")
2223
include_directories("llama.cpp")
2324
include_directories("./llama.cpp/common")
2425

25-
file(GLOB SOURCE_FILES "addon.cpp")
26+
if (LLAMA_CUBLAS)
27+
cmake_minimum_required(VERSION 3.17)
2628

27-
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
29+
find_package(CUDAToolkit)
30+
if (CUDAToolkit_FOUND)
31+
message(STATUS "Using cuBLAS for GPU info")
32+
33+
enable_language(CUDA)
34+
35+
set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/cuda-gpu-info.h)
36+
set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/cuda-gpu-info.cu)
37+
38+
add_compile_definitions(GPU_INFO_USE_CUBLAS)
39+
40+
if (LLAMA_STATIC)
41+
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
42+
else()
43+
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart)
44+
endif()
45+
46+
set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cuda_driver)
47+
48+
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
49+
# copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
50+
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
51+
set(CMAKE_CUDA_ARCHITECTURES "60;61;70")
52+
else()
53+
set(CMAKE_CUDA_ARCHITECTURES "52;61;70")
54+
endif()
55+
endif()
56+
else()
57+
message(WARNING "cuBLAS not found. Not using it for GPU info")
58+
endif()
59+
endif()
60+
61+
if (LLAMA_HIPBLAS)
62+
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
63+
64+
if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
65+
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
66+
endif()
67+
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
68+
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
69+
endif()
70+
71+
find_package(hip)
72+
find_package(hipblas)
73+
find_package(rocblas)
74+
75+
if (${hipblas_FOUND} AND ${hip_FOUND})
76+
message(STATUS "Using HIP and hipBLAS for GPU info")
77+
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUBLAS)
78+
add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
79+
set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
80+
target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
81+
82+
set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} gpu-info-rocm)
83+
else()
84+
message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
85+
endif()
86+
endif()
87+
88+
if (LLAMA_METAL)
89+
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
90+
find_library(METAL_FRAMEWORK Metal REQUIRED)
91+
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
92+
93+
message(STATUS "Using Metal for GPU info")
94+
set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/metal-gpu-info.h)
95+
set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/metal-gpu-info.mm)
96+
97+
add_compile_definitions(GPU_INFO_USE_METAL)
98+
99+
set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS}
100+
${FOUNDATION_LIBRARY}
101+
${METAL_FRAMEWORK}
102+
${METALKIT_FRAMEWORK}
103+
)
104+
endif()
105+
106+
file(GLOB SOURCE_FILES "addon.cpp" ${GPU_INFO_SOURCES})
107+
108+
add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
28109
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
29110
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
30111
target_link_libraries(${PROJECT_NAME} "llama")
31112
target_link_libraries(${PROJECT_NAME} "common")
32113

114+
if (DEFINED GPU_INFO_EXTRA_LIBS)
115+
target_link_libraries(${PROJECT_NAME} ${GPU_INFO_EXTRA_LIBS})
116+
endif()
117+
33118
if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
34119
# Generate node.lib
35120
execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS})

llama/addon.cpp

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,22 @@
99
#include "llama.h"
1010
#include "napi.h"
1111

12+
#ifdef GPU_INFO_USE_CUBLAS
13+
# include "gpuInfo/cuda-gpu-info.h"
14+
#endif
15+
#ifdef GPU_INFO_USE_METAL
16+
# include "gpuInfo/metal-gpu-info.h"
17+
#endif
18+
19+
1220
struct addon_logger_log {
1321
public:
1422
const int logLevelNumber;
1523
const std::stringstream* stringStream;
1624
};
1725

26+
static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
27+
1828
using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
1929
void addonCallJsLogCallback(
2030
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
@@ -40,6 +50,43 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
4050
return std::string(result.data(), result.size());
4151
}
4252

53+
#ifdef GPU_INFO_USE_CUBLAS
54+
void lodCudaError(const char* message) {
55+
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
56+
}
57+
#endif
58+
59+
Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
60+
uint64_t total = 0;
61+
uint64_t used = 0;
62+
63+
#ifdef GPU_INFO_USE_CUBLAS
64+
size_t cudaDeviceTotal = 0;
65+
size_t cudaDeviceUsed = 0;
66+
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
67+
68+
if (cudeGetInfoSuccess) {
69+
total += cudaDeviceTotal;
70+
used += cudaDeviceUsed;
71+
}
72+
#endif
73+
74+
#ifdef GPU_INFO_USE_METAL
75+
uint64_t metalDeviceTotal = 0;
76+
uint64_t metalDeviceUsed = 0;
77+
get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
78+
79+
total += metalDeviceTotal;
80+
used += metalDeviceUsed;
81+
#endif
82+
83+
Napi::Object result = Napi::Object::New(info.Env());
84+
result.Set("total", Napi::Number::From(info.Env(), total));
85+
result.Set("used", Napi::Number::From(info.Env(), used));
86+
87+
return result;
88+
}
89+
4390
class AddonModel : public Napi::ObjectWrap<AddonModel> {
4491
public:
4592
llama_model_params model_params;
@@ -830,12 +877,21 @@ int addonGetGgmlLogLevelNumber(ggml_log_level level) {
830877
void addonCallJsLogCallback(
831878
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
832879
) {
880+
bool called = false;
881+
833882
if (env != nullptr && callback != nullptr) {
834-
callback.Call({
835-
Napi::Number::New(env, data->logLevelNumber),
836-
Napi::String::New(env, data->stringStream->str()),
837-
});
838-
} else if (data != nullptr) {
883+
try {
884+
callback.Call({
885+
Napi::Number::New(env, data->logLevelNumber),
886+
Napi::String::New(env, data->stringStream->str()),
887+
});
888+
called = true;
889+
} catch (const Napi::Error& e) {
890+
called = false;
891+
}
892+
}
893+
894+
if (!called && data != nullptr) {
839895
if (data->logLevelNumber == 2) {
840896
fputs(data->stringStream->str().c_str(), stderr);
841897
fflush(stderr);
@@ -936,6 +992,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
936992
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
937993
Napi::PropertyDescriptor::Function("setLogger", setLogger),
938994
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
995+
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
939996
});
940997
AddonModel::init(exports);
941998
AddonGrammar::init(exports);

llama/gpuInfo/cuda-gpu-info.cu

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#include <stddef.h>
2+
3+
#if defined(GPU_INFO_USE_HIPBLAS)
4+
#include <hip/hip_runtime.h>
5+
#include <hipblas/hipblas.h>
6+
#define cudaGetDevice hipGetDevice
7+
#define cudaGetDeviceCount hipGetDeviceCount
8+
#define cudaGetErrorString hipGetErrorString
9+
#define cudaMemGetInfo hipMemGetInfo
10+
#define cudaSetDevice hipSetDevice
11+
#define cudaSuccess hipSuccess
12+
#else
13+
#include <cuda_runtime.h>
14+
#include <cuda.h>
15+
#endif
16+
17+
18+
typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
19+
20+
bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
21+
int current_device;
22+
auto getDeviceResult = cudaGetDevice(&current_device);
23+
24+
if (getDeviceResult != cudaSuccess) {
25+
errorLogCallback(cudaGetErrorString(getDeviceResult));
26+
return false;
27+
}
28+
29+
if (device == current_device) {
30+
return true;
31+
}
32+
33+
const auto setDeviceResult = cudaSetDevice(device);
34+
35+
if (setDeviceResult != cudaSuccess) {
36+
errorLogCallback(cudaGetErrorString(setDeviceResult));
37+
return false;
38+
}
39+
40+
return true;
41+
}
42+
43+
bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
44+
gpuInfoSetCudaDevice(device, errorLogCallback);
45+
46+
size_t freeMem;
47+
size_t totalMem;
48+
auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem);
49+
50+
if (getMemInfoResult != cudaSuccess) {
51+
errorLogCallback(cudaGetErrorString(getMemInfoResult));
52+
return false;
53+
}
54+
55+
*total = totalMem;
56+
*used = totalMem - freeMem;
57+
58+
return true;
59+
}
60+
61+
int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
62+
int deviceCount;
63+
auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
64+
65+
if (getDeviceCountResult != cudaSuccess) {
66+
errorLogCallback(cudaGetErrorString(getDeviceCountResult));
67+
return -1;
68+
}
69+
70+
return deviceCount;
71+
}
72+
73+
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
74+
int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
75+
76+
if (deviceCount < 0) {
77+
return false;
78+
}
79+
80+
size_t usedMem = 0;
81+
size_t totalMem = 0;
82+
83+
for (int i = 0; i < deviceCount; i++) {
84+
size_t deviceUsedMem;
85+
size_t deviceTotalMem;
86+
87+
if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) {
88+
return false;
89+
}
90+
91+
usedMem += deviceUsedMem;
92+
totalMem += deviceTotalMem;
93+
}
94+
95+
*total = totalMem;
96+
*used = usedMem;
97+
98+
return true;
99+
}

llama/gpuInfo/cuda-gpu-info.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#pragma once
2+
3+
#include <stddef.h>
4+
5+
typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
6+
7+
bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback);

llama/gpuInfo/metal-gpu-info.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#pragma once
2+
3+
#include <stdint.h>
4+
5+
void get_metal_gpu_info(uint64_t * total, uint64_t * used);

llama/gpuInfo/metal-gpu-info.mm

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#include <stdint.h>
2+
#import <Metal/Metal.h>
3+
4+
void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
5+
id<MTLDevice> device = MTLCreateSystemDefaultDevice();
6+
7+
if (device) {
8+
*total = device.recommendedMaxWorkingSetSize;
9+
*used = device.currentAllocatedSize;
10+
} else {
11+
*total = 0;
12+
*used = 0;
13+
}
14+
15+
[device release];
16+
device = nil;
17+
}

0 commit comments

Comments
 (0)