Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 87 additions & 2 deletions llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,102 @@ execute_process(COMMAND node -p "require('node-addon-api').include.slice(1,-1)"
include_directories(${NODE_ADDON_API_DIR} ${CMAKE_JS_INC})

add_subdirectory("llama.cpp")
include_directories("gpuInfo")
include_directories("llama.cpp")
include_directories("./llama.cpp/common")

file(GLOB SOURCE_FILES "addon.cpp")
if (LLAMA_CUBLAS)
cmake_minimum_required(VERSION 3.17)

add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC})
find_package(CUDAToolkit)
if (CUDAToolkit_FOUND)
message(STATUS "Using cuBLAS for GPU info")

enable_language(CUDA)

set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/cuda-gpu-info.h)
set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/cuda-gpu-info.cu)

add_compile_definitions(GPU_INFO_USE_CUBLAS)

if (LLAMA_STATIC)
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart_static)
else()
set(LLAMA_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cudart)
endif()

set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} CUDA::cuda_driver)

if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# copied from llama.cpp/CMakLists.txt under "if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)"
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70")
else()
set(CMAKE_CUDA_ARCHITECTURES "52;61;70")
endif()
endif()
else()
message(WARNING "cuBLAS not found. Not using it for GPU info")
endif()
endif()

if (LLAMA_HIPBLAS)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)

if (NOT ${CMAKE_C_COMPILER_ID} MATCHES "Clang")
message(WARNING "Only LLVM is supported for HIP, hint: CC=/opt/rocm/llvm/bin/clang")
endif()
if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")
message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++")
endif()

find_package(hip)
find_package(hipblas)
find_package(rocblas)

if (${hipblas_FOUND} AND ${hip_FOUND})
message(STATUS "Using HIP and hipBLAS for GPU info")
add_compile_definitions(GPU_INFO_USE_HIPBLAS GPU_INFO_USE_CUBLAS)
add_library(gpu-info-rocm OBJECT gpuInfo/cuda-gpu-info.cu gpuInfo/cuda-gpu-info.h)
set_source_files_properties(gpuInfo/cuda-gpu-info.cu PROPERTIES LANGUAGE CXX)
target_link_libraries(gpu-info-rocm PRIVATE hip::device PUBLIC hip::host roc::rocblas roc::hipblas)

set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS} gpu-info-rocm)
else()
message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
endif()
endif()

if (LLAMA_METAL)
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
find_library(METAL_FRAMEWORK Metal REQUIRED)
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)

message(STATUS "Using Metal for GPU info")
set(GPU_INFO_HEADERS ${GPU_INFO_HEADERS} gpuInfo/metal-gpu-info.h)
set(GPU_INFO_SOURCES ${GPU_INFO_SOURCES} gpuInfo/metal-gpu-info.mm)

add_compile_definitions(GPU_INFO_USE_METAL)

set(GPU_INFO_EXTRA_LIBS ${GPU_INFO_EXTRA_LIBS}
${FOUNDATION_LIBRARY}
${METAL_FRAMEWORK}
${METALKIT_FRAMEWORK}
)
endif()

file(GLOB SOURCE_FILES "addon.cpp" ${GPU_INFO_SOURCES})

add_library(${PROJECT_NAME} SHARED ${SOURCE_FILES} ${CMAKE_JS_SRC} ${GPU_INFO_HEADERS})
set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node")
target_link_libraries(${PROJECT_NAME} ${CMAKE_JS_LIB})
target_link_libraries(${PROJECT_NAME} "llama")
target_link_libraries(${PROJECT_NAME} "common")

if (DEFINED GPU_INFO_EXTRA_LIBS)
target_link_libraries(${PROJECT_NAME} ${GPU_INFO_EXTRA_LIBS})
endif()

if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
# Generate node.lib
execute_process(COMMAND ${CMAKE_AR} /def:${CMAKE_JS_NODELIB_DEF} /out:${CMAKE_JS_NODELIB_TARGET} ${CMAKE_STATIC_LINKER_FLAGS})
Expand Down
67 changes: 62 additions & 5 deletions llama/addon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,22 @@
#include "llama.h"
#include "napi.h"

#ifdef GPU_INFO_USE_CUBLAS
# include "gpuInfo/cuda-gpu-info.h"
#endif
#ifdef GPU_INFO_USE_METAL
# include "gpuInfo/metal-gpu-info.h"
#endif


struct addon_logger_log {
public:
const int logLevelNumber;
const std::stringstream* stringStream;
};

static void addonLlamaCppLogCallback(ggml_log_level level, const char* text, void* user_data);

using AddonThreadSafeLogCallbackFunctionContext = Napi::Reference<Napi::Value>;
void addonCallJsLogCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
Expand All @@ -40,6 +50,43 @@ std::string addon_model_token_to_piece(const struct llama_model* model, llama_to
return std::string(result.data(), result.size());
}

#ifdef GPU_INFO_USE_CUBLAS
void lodCudaError(const char* message) {
addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
}
#endif

Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
uint64_t total = 0;
uint64_t used = 0;

#ifdef GPU_INFO_USE_CUBLAS
size_t cudaDeviceTotal = 0;
size_t cudaDeviceUsed = 0;
bool cudeGetInfoSuccess = gpuInfoGetTotalCudaDevicesInfo(&cudaDeviceTotal, &cudaDeviceUsed, lodCudaError);

if (cudeGetInfoSuccess) {
total += cudaDeviceTotal;
used += cudaDeviceUsed;
}
#endif

#ifdef GPU_INFO_USE_METAL
uint64_t metalDeviceTotal = 0;
uint64_t metalDeviceUsed = 0;
get_metal_gpu_info(&metalDeviceTotal, &metalDeviceUsed);

total += metalDeviceTotal;
used += metalDeviceUsed;
#endif

Napi::Object result = Napi::Object::New(info.Env());
result.Set("total", Napi::Number::From(info.Env(), total));
result.Set("used", Napi::Number::From(info.Env(), used));

return result;
}

class AddonModel : public Napi::ObjectWrap<AddonModel> {
public:
llama_model_params model_params;
Expand Down Expand Up @@ -830,12 +877,21 @@ int addonGetGgmlLogLevelNumber(ggml_log_level level) {
void addonCallJsLogCallback(
Napi::Env env, Napi::Function callback, AddonThreadSafeLogCallbackFunctionContext* context, addon_logger_log* data
) {
bool called = false;

if (env != nullptr && callback != nullptr) {
callback.Call({
Napi::Number::New(env, data->logLevelNumber),
Napi::String::New(env, data->stringStream->str()),
});
} else if (data != nullptr) {
try {
callback.Call({
Napi::Number::New(env, data->logLevelNumber),
Napi::String::New(env, data->stringStream->str()),
});
called = true;
} catch (const Napi::Error& e) {
called = false;
}
}

if (!called && data != nullptr) {
if (data->logLevelNumber == 2) {
fputs(data->stringStream->str().c_str(), stderr);
fflush(stderr);
Expand Down Expand Up @@ -936,6 +992,7 @@ Napi::Object registerCallback(Napi::Env env, Napi::Object exports) {
Napi::PropertyDescriptor::Function("systemInfo", systemInfo),
Napi::PropertyDescriptor::Function("setLogger", setLogger),
Napi::PropertyDescriptor::Function("setLoggerLogLevel", setLoggerLogLevel),
Napi::PropertyDescriptor::Function("getGpuVramInfo", getGpuVramInfo),
});
AddonModel::init(exports);
AddonGrammar::init(exports);
Expand Down
99 changes: 99 additions & 0 deletions llama/gpuInfo/cuda-gpu-info.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include <stddef.h>

#if defined(GPU_INFO_USE_HIPBLAS)
#include <hip/hip_runtime.h>
#include <hipblas/hipblas.h>
#define cudaGetDevice hipGetDevice
#define cudaGetDeviceCount hipGetDeviceCount
#define cudaGetErrorString hipGetErrorString
#define cudaMemGetInfo hipMemGetInfo
#define cudaSetDevice hipSetDevice
#define cudaSuccess hipSuccess
#else
#include <cuda_runtime.h>
#include <cuda.h>
#endif


typedef void (*gpuInfoErrorLogCallback_t)(const char* message);

bool gpuInfoSetCudaDevice(const int device, gpuInfoErrorLogCallback_t errorLogCallback) {
int current_device;
auto getDeviceResult = cudaGetDevice(&current_device);

if (getDeviceResult != cudaSuccess) {
errorLogCallback(cudaGetErrorString(getDeviceResult));
return false;
}

if (device == current_device) {
return true;
}

const auto setDeviceResult = cudaSetDevice(device);

if (setDeviceResult != cudaSuccess) {
errorLogCallback(cudaGetErrorString(setDeviceResult));
return false;
}

return true;
}

bool gpuInfoGetCudaDeviceInfo(int device, size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
gpuInfoSetCudaDevice(device, errorLogCallback);

size_t freeMem;
size_t totalMem;
auto getMemInfoResult = cudaMemGetInfo(&freeMem, &totalMem);

if (getMemInfoResult != cudaSuccess) {
errorLogCallback(cudaGetErrorString(getMemInfoResult));
return false;
}

*total = totalMem;
*used = totalMem - freeMem;

return true;
}

int gpuInfoGetCudaDeviceCount(gpuInfoErrorLogCallback_t errorLogCallback) {
int deviceCount;
auto getDeviceCountResult = cudaGetDeviceCount(&deviceCount);

if (getDeviceCountResult != cudaSuccess) {
errorLogCallback(cudaGetErrorString(getDeviceCountResult));
return -1;
}

return deviceCount;
}

bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback) {
int deviceCount = gpuInfoGetCudaDeviceCount(errorLogCallback);

if (deviceCount < 0) {
return false;
}

size_t usedMem = 0;
size_t totalMem = 0;

for (int i = 0; i < deviceCount; i++) {
size_t deviceUsedMem;
size_t deviceTotalMem;

if (!gpuInfoGetCudaDeviceInfo(i, &deviceTotalMem, &deviceUsedMem, errorLogCallback)) {
return false;
}

usedMem += deviceUsedMem;
totalMem += deviceTotalMem;
}

*total = totalMem;
*used = usedMem;

return true;
}
7 changes: 7 additions & 0 deletions llama/gpuInfo/cuda-gpu-info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#pragma once

#include <stddef.h>

typedef void (*gpuInfoErrorLogCallback_t)(const char* message);

bool gpuInfoGetTotalCudaDevicesInfo(size_t * total, size_t * used, gpuInfoErrorLogCallback_t errorLogCallback);
5 changes: 5 additions & 0 deletions llama/gpuInfo/metal-gpu-info.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#pragma once

#include <stdint.h>

void get_metal_gpu_info(uint64_t * total, uint64_t * used);
17 changes: 17 additions & 0 deletions llama/gpuInfo/metal-gpu-info.mm
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include <stdint.h>
#import <Metal/Metal.h>

void get_metal_gpu_info(uint64_t * total, uint64_t * used) {
id<MTLDevice> device = MTLCreateSystemDefaultDevice();

if (device) {
*total = device.recommendedMaxWorkingSetSize;
*used = device.currentAllocatedSize;
} else {
*total = 0;
*used = 0;
}

[device release];
device = nil;
}
Loading