Skip to content

Commit 76318f0

Browse files
ggerganoviThalay
authored andcommitted
sync : ggml (VMM, sync-ggml-am, dotprod ARM fixes, CUDA fixes) (ggml-org#1691)
* scripts : add sync-ggml-am.sh * sync : ggml (VMM, ARM dot prod fix, etc.) * build : fix CUDA build * ggml : fix some mul mat cases + add tests for src1 F16 ggml-org/ggml@dbd0295
1 parent 5c1422d commit 76318f0

File tree

9 files changed

+721
-767
lines changed

9 files changed

+721
-767
lines changed

CMakeLists.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,17 @@ if (WHISPER_CUBLAS)
218218
add_compile_definitions(GGML_USE_CUBLAS)
219219

220220
if (WHISPER_STATIC)
221-
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
221+
if (WIN32)
222+
# As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
223+
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
224+
else ()
225+
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
226+
endif()
222227
else()
223228
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
224229
endif()
225230

231+
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cuda_driver)
226232
else()
227233
message(FATAL_ERROR "cuBLAS not found")
228234
endif()

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ ifdef WHISPER_CUBLAS
206206

207207
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
208208
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
209-
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
209+
LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
210210
WHISPER_OBJ += ggml-cuda.o
211211
NVCC = nvcc
212212
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=$(CUDA_ARCH_FLAG)

extra/sync-ggml-am.sh

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
#!/bin/bash
2+
#
3+
# Synchronize ggml changes to whisper.cpp
4+
#
5+
# Usage:
6+
#
7+
# $ cd /path/to/whisper.cpp
8+
# $ ./extra/sync-ggml-am.sh
9+
#
10+
11+
set -e
12+
13+
sd=$(dirname $0)
14+
cd $sd/../
15+
16+
SRC_WHISPER=$(pwd)
17+
SRC_GGML=$(cd ../ggml; pwd)
18+
19+
if [ ! -d $SRC_GGML ]; then
20+
echo "ggml not found at $SRC_GGML"
21+
exit 1
22+
fi
23+
24+
lc=$(cat $SRC_WHISPER/extra/sync-ggml.last)
25+
echo "Syncing ggml changes since commit $lc"
26+
27+
cd $SRC_GGML
28+
29+
git log --oneline $lc..HEAD
30+
31+
git format-patch $lc --stdout -- \
32+
include/ggml/ggml*.h \
33+
src/ggml*.h \
34+
src/ggml*.c \
35+
src/ggml*.cpp \
36+
src/ggml*.m \
37+
src/ggml*.metal \
38+
src/ggml*.cu \
39+
tests/test-opt.cpp \
40+
tests/test-grad0.cpp \
41+
tests/test-quantize-fns.cpp \
42+
tests/test-quantize-perf.cpp \
43+
tests/test-backend-ops.cpp \
44+
> $SRC_WHISPER/ggml-src.patch
45+
46+
# delete files if empty
47+
if [ ! -s $SRC_WHISPER/ggml-src.patch ]; then
48+
rm -v $SRC_WHISPER/ggml-src.patch
49+
fi
50+
51+
cd $SRC_WHISPER
52+
53+
if [ -f $SRC_WHISPER/ggml-src.patch ]; then
54+
# replace PR numbers
55+
#
56+
# Subject: some text (#1234)
57+
# Subject: some text (ggml/1234)
58+
cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
59+
mv ggml-src.patch.tmp ggml-src.patch
60+
61+
cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
62+
mv ggml-src.patch.tmp ggml-src.patch
63+
64+
# replace filenames:
65+
#
66+
# src/ggml.c -> ggml.c
67+
# src/ggml-alloc.c -> ggml-alloc.c
68+
# src/ggml-backend-impl.h -> ggml-backend-impl.h
69+
# src/ggml-backend.c -> ggml-backend.c
70+
# src/ggml-cuda.cu -> ggml-cuda.cu
71+
# src/ggml-cuda.h -> ggml-cuda.h
72+
# src/ggml-impl.h -> ggml-impl.h
73+
# src/ggml-metal.h -> ggml-metal.h
74+
# src/ggml-metal.m -> ggml-metal.m
75+
# src/ggml-metal.metal -> ggml-metal.metal
76+
# src/ggml-mpi.h -> ggml-mpi.h
77+
# src/ggml-mpi.c -> ggml-mpi.c
78+
# src/ggml-opencl.cpp -> ggml-opencl.cpp
79+
# src/ggml-opencl.h -> ggml-opencl.h
80+
# src/ggml-quants.c -> ggml-quants.c
81+
# src/ggml-quants.h -> ggml-quants.h
82+
# include/ggml/ggml.h -> ggml.h
83+
# include/ggml/ggml-alloc.h -> ggml-alloc.h
84+
# include/ggml/ggml-backend.h -> ggml-backend.h
85+
#
86+
# examples/common.h -> examples/common.h
87+
# examples/common.cpp -> examples/common.cpp
88+
# examples/common-ggml.h -> examples/common-ggml.h
89+
# examples/common-ggml.cpp -> examples/common-ggml.cpp
90+
#
91+
# examples/whisper/whisper.h -> whisper.h
92+
# examples/whisper/whisper.cpp -> whisper.cpp
93+
# examples/whisper/main.cpp -> examples/main/main.cpp
94+
# examples/whisper/quantize.cpp -> examples/quantize/quantize.cpp
95+
96+
cat ggml-src.patch | sed \
97+
-e 's/src\/ggml\.c/ggml.c/g' \
98+
-e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
99+
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
100+
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
101+
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
102+
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
103+
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
104+
-e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
105+
-e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
106+
-e 's/src\/ggml-metal\.metal/ggml-metal.metal/g' \
107+
-e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
108+
-e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
109+
-e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
110+
-e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
111+
-e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
112+
-e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
113+
-e 's/include\/ggml\/ggml\.h/ggml.h/g' \
114+
-e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
115+
-e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
116+
-e 's/examples\/common\.h/examples\/common.h/g' \
117+
-e 's/examples\/common\.cpp/examples\/common.cpp/g' \
118+
-e 's/examples\/common-ggml\.h/examples\/common-ggml.h/g' \
119+
-e 's/examples\/common-ggml\.cpp/examples\/common-ggml.cpp/g' \
120+
-e 's/examples\/whisper\/whisper\.h/whisper.h/g' \
121+
-e 's/examples\/whisper\/whisper\.cpp/whisper.cpp/g' \
122+
-e 's/examples\/whisper\/main\.cpp/examples\/main\/main.cpp/g' \
123+
-e 's/examples\/whisper\/quantize\.cpp/examples\/quantize\/quantize.cpp/g' \
124+
> ggml-src.patch.tmp
125+
mv ggml-src.patch.tmp ggml-src.patch
126+
127+
git am ggml-src.patch
128+
129+
rm -v $SRC_WHISPER/ggml-src.patch
130+
fi
131+
132+
# update last commit
133+
cd $SRC_GGML
134+
git log -1 --format=%H > $SRC_WHISPER/extra/sync-ggml.last
135+
136+
echo "Done"
137+
138+
exit 0

extra/sync-ggml.last

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1467a4eb71bdb5ac316d248a7f3f26cdadc56b68

ggml-backend.c

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ static void ggml_backend_registry_init(void) {
297297
void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
298298
GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG);
299299

300-
int id = ggml_backend_registry_count;
300+
size_t id = ggml_backend_registry_count;
301301

302302
ggml_backend_registry[id] = (struct ggml_backend_reg) {
303303
/* .name = */ {0},
@@ -330,6 +330,8 @@ size_t ggml_backend_reg_find_by_name(const char * name) {
330330
return i;
331331
}
332332
}
333+
334+
// not found
333335
return SIZE_MAX;
334336
}
335337

@@ -340,15 +342,15 @@ ggml_backend_t ggml_backend_reg_init_backend_from_str(const char * backend_str)
340342
const char * params = strchr(backend_str, ':');
341343
char backend_name[128];
342344
if (params == NULL) {
343-
strcpy(backend_name, backend_str);
345+
snprintf(backend_name, sizeof(backend_name), "%s", backend_str);
344346
params = "";
345347
} else {
346-
strncpy(backend_name, backend_str, params - backend_str);
347-
backend_name[params - backend_str] = '\0';
348+
snprintf(backend_name, sizeof(backend_name), "%.*s", (int)(params - backend_str), backend_str);
348349
params++;
349350
}
350351

351352
size_t backend_i = ggml_backend_reg_find_by_name(backend_name);
353+
352354
if (backend_i == SIZE_MAX) {
353355
fprintf(stderr, "%s: backend %s not found\n", __func__, backend_name);
354356
return NULL;
@@ -396,18 +398,12 @@ static void ggml_backend_cpu_buffer_free_buffer(ggml_backend_buffer_t buffer) {
396398
}
397399

398400
static void ggml_backend_cpu_buffer_set_tensor(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
399-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
400-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
401-
402401
memcpy((char *)tensor->data + offset, data, size);
403402

404403
GGML_UNUSED(buffer);
405404
}
406405

407406
static void ggml_backend_cpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
408-
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
409-
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
410-
411407
memcpy(data, (const char *)tensor->data + offset, size);
412408

413409
GGML_UNUSED(buffer);
@@ -618,10 +614,14 @@ static void ggml_backend_cpu_graph_compute(ggml_backend_t backend, struct ggml_c
618614
}
619615

620616
static bool ggml_backend_cpu_supports_op(ggml_backend_t backend, const struct ggml_tensor * op) {
621-
return true;
617+
switch (op->op) {
618+
case GGML_OP_MUL_MAT:
619+
return op->src[1]->type == GGML_TYPE_F32 || op->src[1]->type == ggml_internal_get_type_traits(op->src[0]->type).vec_dot_type;
620+
default:
621+
return true;
622+
}
622623

623624
GGML_UNUSED(backend);
624-
GGML_UNUSED(op);
625625
}
626626

627627
static struct ggml_backend_i cpu_backend_i = {

0 commit comments

Comments
 (0)