withcatai · giladgd · Feb 10, 2024 · Feb 8, 2024 · Feb 8, 2024 · Feb 8, 2024
diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
@@ -19,17 +19,102 @@ execute_process(COMMAND node -p "require('node-addon-api').include.slice(1,-1)"
 include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})
 
 add_subdirectory("llama.cpp")
+include_directories("gpuInfo")
 include_directories("llama.cpp")
 include_directories("./llama.cpp/common")
 
-file(GLOB SOURCE_FILES "addon.cpp")
+if (LLAMA_CUBLAS)
+    cmake_minimum_required(VERSION 3.17)
 
-add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
+    find_package(CUDAToolkit)
+    if (CUDAToolkit_FOUND)
+        message(STATUS "Using cuBLAS for GPU info")
+
+        enable_language(CUDA)
+
+        set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/cuda-gpu-info.h)
+        set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/cuda-gpu-info.cu)
+
+        add_compile_definitions(GPU_INFO_USE_CUBLAS)
+
+        if (LLAMA_STATIC)
+            set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
+        else()
+            set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart)
+        endif()
+
+        set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cuda_driver)
+
+        if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+            # copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
+            if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
+                set(CMAKE_CUDA_ARCHITECTURES "60;61;70")
+            else()
+                set(CMAKE_CUDA_ARCHITECTURES "52;61;70")
+            endif()
+        endif()
+    else()
+        message(WARNING "cuBLAS not found. Not using it for GPU info")
+    endif()
+endif()
+
+if (LLAMA_HIPBLAS)
+    list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
+
+    if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
+        message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
+    endif()
+    if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
+        message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
+    endif()
+
+    find_package(hip)
+    find_package(hipblas)
+    find_package(rocblas)
+
+    if (${hipblas_FOUND} AND ${hip_FOUND})
+        message(STATUS "Using HIP and hipBLAS for GPU info")
+        add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUBLAS)
+        add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
+        set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
+        target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)
+
+        set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} gpu-info-rocm)
+    else()
+        message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
+    endif()
+endif()
+
+if (LLAMA_METAL)
+    find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
+    find_library(METAL_FRAMEWORK    Metal      REQUIRED)
+    find_library(METALKIT_FRAMEWORK MetalKit   REQUIRED)
+
+    message(STATUS "Using Metal for GPU info")
+    set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS}  gpuInfo/metal-gpu-info.h)
+    set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/metal-gpu-info.mm)
+
+    add_compile_definitions(GPU_INFO_USE_METAL)
+
+    set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS}
+        ${FOUNDATION_LIBRARY}
+        ${METAL_FRAMEWORK}
+        ${METALKIT_FRAMEWORK}
+        )
+endif()
+
+file(GLOB SOURCE_FILES "addon.cpp" ${GPU_INFO_SOURCES})
+
+add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
 set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
 target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
 target_link_libraries(${PROJECT_NAME} "llama")
 target_link_libraries(${PROJECT_NAME} "common")
 
+if (DEFINED GPU_INFO_EXTRA_LIBS)
+    target_link_libraries(${PROJECT_NAME} ${GPU_INFO_EXTRA_LIBS})
+endif()
+
 if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
   # Generate node.lib
   execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS})

diff --git a/llama/addon.cpp b/llama/addon.cpp
@@ -9,12 +9,22 @@
 #include "llama.h"
 #include "napi.h"
 
+#ifdef GPU_INFO_USE_CUBLAS
+#  include "gpuInfo/cuda-gpu-info.h"
+#endif
+#ifdef GPU_INFO_USE_METAL
+#  include "gpuInfo/metal-gpu-info.h"
+#endif
+
+
 struct addon_logger_log {
     public:
         const int logLevelNumber;
         const std::stringstream* stringStream;
 };
 
+static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);
+
 using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
 void addonCallJsLogCallback(
     Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
@@ -40,6 +50,43 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
     return std::string(result.data(), result.size());
 }
 
+#ifdef GPU_INFO_USE_CUBLAS
+void lodCudaError(const char* message) {
+    addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
+}
+#endif
+
+Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
+    uint64_t total = 0;
+    uint64_t used = 0;
+
+#ifdef GPU_INFO_USE_CUBLAS
+    size_t cudaDeviceTotal = 0;
+    size_t cudaDeviceUsed = 0;
+    bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);
+
+    if (cudeGetInfoSuccess) {
+        total += cudaDeviceTotal;
+        used += cudaDeviceUsed;
+    }
+#endif
+
+#ifdef GPU_INFO_USE_METAL
+    uint64_t metalDeviceTotal = 0;
+    uint64_t metalDeviceUsed = 0;
+    get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);
+
+    total += metalDeviceTotal;
+    used += metalDeviceUsed;
+#endif
+
+    Napi::Object result = Napi::Object::New(info.Env());
+    result.Set("total", Napi::Number::From(info.Env(), total));
+    result.Set("used", Napi::Number::From(info.Env(), used));
+
+    return result;
+}
+
 class AddonModel : public Napi::ObjectWrap<AddonModel> {
     public:
         llama_model_params model_params;
@@ -830,12 +877,21 @@ int addonGetGgmlLogLevelNumber(ggml_log_level level) {
 void addonCallJsLogCallback(
     Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
 ) {
+    bool called = false;
+
     if (env != nullptr && callback != nullptr) {
-        callback.Call({
-            Napi::Number::New(env, data->logLevelNumber),
-            Napi::String::New(env, data->stringStream->str()),
-        });
-    } else if (data != nullptr) {
+        try {
+            callback.Call({
+                Napi::Number::New(env, data->logLevelNumber),
+                Napi::String::New(env, data->stringStream->str()),
+            });
+            called = true;
+        } catch (const Napi::Error& e) {
+            called = false;
+        }
+    }
+
+    if (!called && data != nullptr) {
         if (data->logLevelNumber == 2) {
             fputs(data->stringStream->str().c_str(), stderr);
             fflush(stderr);
@@ -936,6 +992,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
         Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
         Napi::PropertyDescriptor::Function("setLogger", setLogger),
         Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
+        Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
     });
     AddonModel::init(exports);
     AddonGrammar::init(exports);

diff --git a/llama/gpuInfo/cuda-gpu-info.cu b/llama/gpuInfo/cuda-gpu-info.cu
@@ -0,0 +1,99 @@
+#include <stddef.h>
+
+#if defined(GPU_INFO_USE_HIPBLAS)
+#include <hip/hip_runtime.h>
+#include <hipblas/hipblas.h>
+#define cudaGetDevice hipGetDevice
+#define cudaGetDeviceCount hipGetDeviceCount
+#define cudaGetErrorString hipGetErrorString
+#define cudaMemGetInfo hipMemGetInfo
+#define cudaSetDevice hipSetDevice
+#define cudaSuccess hipSuccess
+#else
+#include <cuda_runtime.h>
+#include <cuda.h>
+#endif
+
+
+typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
+
+bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
+    int current_device;
+    auto getDeviceResult = cudaGetDevice(&current_device);
+
+    if (getDeviceResult != cudaSuccess) {
+        errorLogCallback(cudaGetErrorString(getDeviceResult));
+        return false;
+    }
+
+    if (device == current_device) {
+        return true;
+    }
+
+    const auto setDeviceResult = cudaSetDevice(device);
+
+    if (setDeviceResult != cudaSuccess) {
+        errorLogCallback(cudaGetErrorString(setDeviceResult));
+        return false;
+    }
+
+    return true;
+}
+
+bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
+    gpuInfoSetCudaDevice(device, errorLogCallback);
+
+    size_t freeMem;
+    size_t totalMem;
+    auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem);
+
+    if (getMemInfoResult != cudaSuccess) {
+        errorLogCallback(cudaGetErrorString(getMemInfoResult));
+        return false;
+    }
+
+    *total = totalMem;
+    *used = totalMem - freeMem;
+
+    return true;
+}
+
+int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
+    int deviceCount;
+    auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);
+
+    if (getDeviceCountResult != cudaSuccess) {
+        errorLogCallback(cudaGetErrorString(getDeviceCountResult));
+        return -1;
+    }
+
+    return deviceCount;
+}
+
+bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
+    int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);
+
+    if (deviceCount < 0) {
+        return false;
+    }
+
+    size_t usedMem = 0;
+    size_t totalMem = 0;
+
+    for (int i = 0; i < deviceCount; i++) {
+        size_t deviceUsedMem;
+        size_t deviceTotalMem;
+
+        if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) {
+            return false;
+        }
+
+        usedMem += deviceUsedMem;
+        totalMem += deviceTotalMem;
+    }
+
+    *total = totalMem;
+    *used = usedMem;
+
+    return true;
+}
diff --git a/llama/gpuInfo/cuda-gpu-info.h b/llama/gpuInfo/cuda-gpu-info.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <stddef.h>
+
+typedef void (*gpuInfoErrorLogCallback_t)(const char* message);
+
+bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback);
diff --git a/llama/gpuInfo/metal-gpu-info.h b/llama/gpuInfo/metal-gpu-info.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#include <stdint.h>
+
+void get_metal_gpu_info(uint64_t * total, uint64_t * used);
diff --git a/llama/gpuInfo/metal-gpu-info.mm b/llama/gpuInfo/metal-gpu-info.mm
@@ -0,0 +1,17 @@
+#include <stdint.h>
+#import <Metal/Metal.h>
+
+void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
+    id<MTLDevice> device = MTLCreateSystemDefaultDevice();
+
+    if (device) {
+        *total = device.recommendedMaxWorkingSetSize;
+        *used = device.currentAllocatedSize;
+    } else {
+        *total = 0;
+        *used = 0;
+    }
+
+    [device release];
+    device = nil;
+}