Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
/CMakeSettings.json

build/
build-blas/
build-coreml/
build-em/
build-debug/
Expand Down
174 changes: 87 additions & 87 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,10 @@ if (APPLE)
option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
option(WHISPER_METAL_EMBED_LIBRARY "whisper: embed Metal library" OFF)
option(WHISPER_BLAS "whisper: use BLAS" ON)
set (WHISPER_BLAS_VENDOR "Apple" CACHE STRING
"whisper: BLAS library vendor")
else()
option(WHISPER_BLAS "whisper: use BLAS libraries" OFF)
option(WHISPER_BLAS_VENDOR "whisper: BLAS library vendor" Generic)
option(WHISPER_OPENBLAS "whisper: prefer OpenBLAS" OFF)
option(WHISPER_OPENBLAS_INTERFACE64 "whisper: use OpenBLAS w/ 64-bit interface" OFF)
option(WHISPER_CUDA "whisper: support for CUDA" OFF)
option(WHISPER_CUDA_FA_ALL_QUANTS "whisper: compile all quants for FlashAttention" OFF)
option(WHISPER_CUBLAS "whisper: support for CUDA (deprecated)" OFF)
Expand All @@ -93,6 +92,9 @@ else()
option(WHISPER_MKL "whisper: use Intel Math Kernel Library (MKL)" OFF)
option(WHISPER_SYCL "whisper: use SYCL" OFF)
option(WHISPER_SYCL_F16 "whisper: use 16 bit floats for sycl calculations" OFF)
option(WHISPER_BLAS "whisper: use BLAS" OFF)
set (WHISPER_BLAS_VENDOR "Generic" CACHE STRING
"whisper: BLAS library vendor")
endif()

option(WHISPER_PERF "whisper: enable perf timings" OFF)
Expand Down Expand Up @@ -246,93 +248,90 @@ if (APPLE)
endif()
endif()

if (WHISPER_OPENBLAS)
set(WHISPER_BLAS_VENDOR "OpenBLAS")
set(WHISPER_BLAS ON)
# BLA_PKGCONFIG_BLAS is supported since CMake 3.25.
# FindBLAS.cmake pkg-config logic seems incomplete, because when
# BLA_SIZEOF_INTEGER is 8, then it should search for blas64 instead of blas.
# blas.pc/blas64.pc are not always provided, so let's be more specific
# and go with openblas.pc/openblas64.pc if WHISPER_OPENBLAS is on.
if (WHISPER_OPENBLAS_INTERFACE64)
set(WHISPER_BLAS_LIB "openblas64")
else ()
set(WHISPER_BLAS_LIB "openblas")
endif ()
set(BLA_PKGCONFIG_BLAS ${WHISPER_BLAS_LIB})
# OpenBLAS prebuilt libraries for Windows do not have "64" suffix in filename.
# (But .pc file has "64" suffix in filename for USE_64BITINT=1 Windows build.)
if (MSVC)
set(WHISPER_BLAS_LIB "openblas")
endif ()
endif()

if (WHISPER_BLAS)
if (NOT "$ENV{OPENBLAS_PATH}" STREQUAL "")
if (WHISPER_STATIC)
set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_STATIC_LIBRARY_PREFIX})
set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_STATIC_LIBRARY_SUFFIX})
else ()
if (CMAKE_IMPORT_LIBRARY_SUFFIX)
set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_IMPORT_LIBRARY_PREFIX})
set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_IMPORT_LIBRARY_SUFFIX})
else ()
set(WHISPER_BLAS_LIB_PREFIX ${CMAKE_SHARED_LIBRARY_PREFIX})
set(WHISPER_BLAS_LIB_SUFFIX ${CMAKE_SHARED_LIBRARY_SUFFIX})
endif ()
endif ()
# OpenBLAS prebuilt libraries hardcode "lib" prefix in filename even on Windows
if (WHISPER_OPENBLAS)
set(WHISPER_BLAS_LIB_PREFIX "lib")
endif ()
message(STATUS "BLAS compatible library path provided")
set(BLAS_LIBRARIES "$ENV{OPENBLAS_PATH}/lib/${WHISPER_BLAS_LIB_PREFIX}${WHISPER_BLAS_LIB}${WHISPER_BLAS_LIB_SUFFIX}")
message(STATUS "Libraries ${BLAS_LIBRARIES}")
set(BLAS_INCLUDE_DIRS "$ENV{OPENBLAS_PATH}/include")
message(STATUS "Include dirs ${BLAS_INCLUDE_DIRS}")
if (NOT EXISTS "${BLAS_LIBRARIES}")
message(FATAL_ERROR "BLAS library was not found. Environment variable OPENBLAS_PATH misdefined.")
endif ()
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
include_directories(${BLAS_INCLUDE_DIRS})
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES})
else ()
if (WHISPER_STATIC)
# FindBLAS.cmake pkg-config logic seems incomplete, because when
# BLA_STATIC is on, then it should use pkg_check_modules_static
# instead of pkg_check_modules.
# Some manual variable overriding may be necessary if you don't
# achieve desired results.
set(BLA_STATIC 1)
endif ()
set(BLA_VENDOR ${WHISPER_BLAS_VENDOR})
if (WHISPER_OPENBLAS_INTERFACE64)
set(BLA_SIZEOF_INTEGER 8)
else ()
set(BLA_SIZEOF_INTEGER 4)
endif()
set(BLA_PREFER_PKGCONFIG 1)
find_package(BLAS)

if(BLAS_FOUND)
message(STATUS "BLAS compatible library found")
message(STATUS "Libraries ${BLAS_LIBRARIES}")
if (NOT DEFINED BLAS_INCLUDE_DIRS)
if (PKGC_BLAS_FOUND)
set(BLAS_INCLUDE_DIRS "${PKGC_BLAS_INCLUDE_DIRS}")
else ()
find_path(BLAS_INCLUDE_DIRS cblas.h /usr/include/openblas)
if (WHISPER_STATIC)
set(BLA_STATIC ON)
endif()
#if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
# set(BLA_SIZEOF_INTEGER 8)
#endif()

set(BLA_VENDOR ${WHISPER_BLAS_VENDOR})
find_package(BLAS)

if (BLAS_FOUND)
message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")

if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${WHISPER_BLAS_VENDOR} MATCHES "Apple"))
# BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake.
# see https://gitlab.kitware.com/cmake/cmake/-/issues/20268
find_package(PkgConfig REQUIRED)
if (${WHISPER_BLAS_VENDOR} MATCHES "Generic")
pkg_check_modules(DepBLAS REQUIRED blas)
elseif (${WHISPER_BLAS_VENDOR} MATCHES "OpenBLAS")
# As of openblas v0.3.22, the 64-bit is named openblas64.pc
pkg_check_modules(DepBLAS openblas64)
if (NOT DepBLAS_FOUND)
pkg_check_modules(DepBLAS REQUIRED openblas)
endif()
elseif (${WHISPER_BLAS_VENDOR} MATCHES "FLAME")
pkg_check_modules(DepBLAS REQUIRED blis)
elseif (${WHISPER_BLAS_VENDOR} MATCHES "ATLAS")
pkg_check_modules(DepBLAS REQUIRED blas-atlas)
elseif (${WHISPER_BLAS_VENDOR} MATCHES "FlexiBLAS")
pkg_check_modules(DepBLAS REQUIRED flexiblas_api)
elseif (${WHISPER_BLAS_VENDOR} MATCHES "Intel")
# all Intel* libraries share the same include path
pkg_check_modules(DepBLAS REQUIRED mkl-sdl)
elseif (${WHISPER_BLAS_VENDOR} MATCHES "NVHPC")
# this doesn't provide pkg-config
# suggest to assign BLAS_INCLUDE_DIRS on your own
if ("${NVHPC_VERSION}" STREQUAL "")
message(WARNING "Better to set NVHPC_VERSION")
else()
set(DepBLAS_FOUND ON)
set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include")
endif()
endif()
message(STATUS "Include dirs ${BLAS_INCLUDE_DIRS}")
set(WHISPER_EXTRA_FLAGS ${WHISPER_EXTRA_FLAGS} -DGGML_USE_OPENBLAS)
include_directories(${BLAS_INCLUDE_DIRS})
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES})
else()
message(FATAL_ERROR "BLAS library was not found")
if (DepBLAS_FOUND)
set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS})
else()
message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically"
" detected by pkgconfig, trying to find cblas.h from possible paths...")
find_path(BLAS_INCLUDE_DIRS
NAMES cblas.h
HINTS
/usr/include
/usr/local/include
/usr/include/openblas
/opt/homebrew/opt/openblas/include
/usr/local/opt/openblas/include
/usr/include/x86_64-linux-gnu/openblas/include
)
endif()
endif()
endif ()
endif ()

message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}")

add_compile_options(${BLAS_LINKER_FLAGS})

add_compile_definitions(GGML_USE_BLAS)

if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${WHISPER_BLAS_VENDOR} MATCHES "Generic" OR ${WHISPER_BLAS_VENDOR} MATCHES "Intel"))
add_compile_definitions(GGML_BLAS_USE_MKL)
endif()

set(GGML_HEADERS_BLAS ggml-blas.h)
set(GGML_SOURCES_BLAS ggml-blas.cpp)

set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} ${BLAS_LIBRARIES})
set(WHISPER_EXTRA_INCLUDES ${WHISPER_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS})
else()
message(WARNING "BLAS not found, please refer to "
"https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
" to set correct WHISPER_BLAS_VENDOR")
endif()
endif()

if (WHISPER_MKL)
find_package(MKL CONFIG REQUIRED PATHS $ENV{MKLROOT})
Expand Down Expand Up @@ -712,6 +711,7 @@ add_library(${TARGET}
${GGML_SOURCES_CUDA}
${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL}
${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM}
${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS}
whisper.h
whisper.cpp
)
Expand Down
65 changes: 38 additions & 27 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ CXXV := $(shell $(CXX) --version | head -n 1)
# Mac OS + Arm can report x86_64
# ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
ifeq ($(UNAME_S),Darwin)
WHISPER_NO_OPENMP := 1

ifneq ($(UNAME_P),arm)
SYSCTL_M := $(shell sysctl -n hw.optional.arm64)
ifeq ($(SYSCTL_M),1)
Expand Down Expand Up @@ -222,10 +224,14 @@ endif
ifndef WHISPER_NO_ACCELERATE
# Mac M1 - include Accelerate framework
ifeq ($(UNAME_S),Darwin)
CFLAGS += -DGGML_USE_ACCELERATE
CFLAGS += -DACCELERATE_NEW_LAPACK
CFLAGS += -DACCELERATE_LAPACK_ILP64
LDFLAGS += -framework Accelerate
CFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
CFLAGS += -DACCELERATE_NEW_LAPACK
CFLAGS += -DACCELERATE_LAPACK_ILP64
CXXFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
CXXFLAGS += -DACCELERATE_NEW_LAPACK
CXXFLAGS += -DACCELERATE_LAPACK_ILP64
LDFLAGS += -framework Accelerate
WHISPER_OBJ += ggml-blas.o
endif
endif

Expand All @@ -248,29 +254,31 @@ ifndef WHISPER_NO_METAL
endif
endif

ifneq ($(filter-out 0,$(WHISPER_OPENBLAS)),) # OpenBLAS
WHISPER_OPENBLAS_INTERFACE64 ?= 0 # use 32-bit interface by default
ifneq ($(filter-out 0,$(WHISPER_OPENBLAS_INTERFACE64)),)
WHISPER_BLAS_LIB := openblas64
else
WHISPER_BLAS_LIB := openblas
endif
ifneq ($(OPENBLAS_PATH),)
WHISPER_BLAS_CFLAGS := -I$(OPENBLAS_PATH)/include
WHISPER_BLAS_LDFLAGS := -L$(OPENBLAS_PATH)/lib -l$(WHISPER_BLAS_LIB)
else
WHISPER_BLAS_LIB_PC_EXISTS := $(shell pkg-config --exists $(WHISPER_BLAS_LIB) && echo 1)
ifneq ($(filter-out 0,$(WHISPER_BLAS_LIB_PC_EXISTS)),)
WHISPER_BLAS_CFLAGS := $(shell pkg-config --cflags $(WHISPER_BLAS_LIB))
WHISPER_BLAS_LDFLAGS := $(shell pkg-config --libs $(WHISPER_BLAS_LIB))
else
WHISPER_BLAS_CFLAGS := -I/usr/include/openblas
WHISPER_BLAS_LDFLAGS := -l$(WHISPER_BLAS_LIB)
endif
endif
CFLAGS += $(WHISPER_BLAS_CFLAGS) -DGGML_USE_OPENBLAS
LDFLAGS += $(WHISPER_BLAS_LDFLAGS)
endif
ifndef WHISPER_NO_OPENMP
CXXFLAGS += -DGGML_USE_OPENMP
CFLAGS += -fopenmp
CXXFLAGS += -fopenmp
endif # WHISPER_NO_OPENMP

ifdef WHISPER_OPENBLAS
CXXFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
CFLAGS += $(shell pkg-config --cflags-only-other openblas)
LDFLAGS += $(shell pkg-config --libs openblas)
WHISPER_OBJ += ggml-blas.o
endif # WHISPER_OPENBLAS

ifdef WHISPER_OPENBLAS64
CXXFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
LDFLAGS += $(shell pkg-config --libs openblas64)
WHISPER_OBJ += ggml-blas.o
endif # WHISPER_OPENBLAS64

ifdef WHISPER_BLIS
CXXFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
LDFLAGS += -lblis -L/usr/local/lib
WHISPER_OBJ += ggml-blas.o
endif # WHISPER_BLIS

ifdef WHISPER_CUBLAS
# WHISPER_CUBLAS is deprecated and will be removed in the future
Expand Down Expand Up @@ -402,6 +410,9 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h
$(CC) $(CFLAGS) -c $< -o $@

ggml-blas.o: ggml-blas.cpp ggml-blas.h
$(CXX) $(CXXFLAGS) -c $< -o $@

WHISPER_OBJ += ggml.o ggml-alloc.o ggml-backend.o ggml-quants.o

whisper.o: whisper.cpp whisper.h whisper-mel.hpp ggml.h ggml-cuda.h
Expand Down
2 changes: 1 addition & 1 deletion examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ struct gpt_params {
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
int32_t n_predict = 200; // new tokens to predict
int32_t n_parallel = 1; // number of parallel streams
int32_t n_batch = 8; // batch size for prompt processing
int32_t n_batch = 32; // batch size for prompt processing
int32_t n_ctx = 2048; // context size (this is the KV cache max size)
int32_t n_gpu_layers = 0; // number of layers to offlload to the GPU

Expand Down
15 changes: 13 additions & 2 deletions ggml-backend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1706,14 +1706,16 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
bool backend_ids_changed = false;
for (int i = 0; i < sched->graph->n_nodes; i++) {
if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i]) {
if (sched->node_backend_ids[i] != sched->prev_node_backend_ids[i] &&
sched->bufts[sched->node_backend_ids[i]] != sched->bufts[sched->prev_node_backend_ids[i]]) {
backend_ids_changed = true;
break;
}
}
if (!backend_ids_changed) {
for (int i = 0; i < sched->graph->n_leafs; i++) {
if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i]) {
if (sched->leaf_backend_ids[i] != sched->prev_leaf_backend_ids[i] &&
sched->bufts[sched->leaf_backend_ids[i]] != sched->bufts[sched->prev_leaf_backend_ids[i]]) {
backend_ids_changed = true;
break;
}
Expand Down Expand Up @@ -1977,6 +1979,15 @@ int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
return sched->n_copies;
}

int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched) {
return sched->n_backends;
}

ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i) {
GGML_ASSERT(i >= 0 && i < sched->n_backends);
return sched->backends[i];
}

size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
int backend_index = ggml_backend_sched_backend_id(sched, backend);
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
Expand Down
3 changes: 3 additions & 0 deletions ggml-backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ extern "C" {
// Initialize backend buffers from a measure graph
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);

GGML_API int ggml_backend_sched_get_n_backends(ggml_backend_sched_t sched);
GGML_API ggml_backend_t ggml_backend_sched_get_backend(ggml_backend_sched_t sched, int i);

// Get the number of splits of the last graph
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
Expand Down
Loading