Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
04ff011
tests: add gradient tests for all backends (ggml/932)
JohannesGaessler Sep 3, 2024
1af890d
vulkan: correctly report support for OP_CONT (ggml/946)
smeso Sep 6, 2024
ae716f6
vulkan: add dryrun support to sin and cos ops (ggml/947)
smeso Sep 6, 2024
e8f550f
vulkan: fix compilation with GGML_VULKAN_DEBUG=ON (ggml/948)
smeso Sep 6, 2024
7883772
Threadpool: take 2 (llama/8672)
fmz Aug 29, 2024
53441ef
llama : support RWKV v6 models (llama/8980)
MollySophia Sep 1, 2024
e9e6641
ggml : add pthread includes on FreeBSD (llama/9258)
Sep 2, 2024
00f20db
Fix DMMV dequantization (llama/9279)
OuadiElfarouki Sep 4, 2024
d3393b1
ggml : AVX2 support for Q4_0_8_8 (llama/8713)
Srihari-mcw Sep 4, 2024
197141b
cuda : fix defrag with quantized KV (llama/9319)
slaren Sep 5, 2024
149112c
ggml-quants : ternary packing for TriLMs and BitNet b1.58 (llama/8151)
compilade Sep 6, 2024
376d0ec
Improve Vulkan shader build system (llama/9239)
mtavenrath Sep 6, 2024
37d02e0
ggml : fix missing `cpu_set_t` on emscripten (llama/9336)
ngxson Sep 7, 2024
9c9d7aa
ggml : always check bounds on get_rows operations (llama/9354)
slaren Sep 7, 2024
8de9666
metal : update support condition for im2col + fix warning (llama/0)
ggerganov Sep 8, 2024
329cc46
examples : add null threadpool args where needed (ggml/0)
ggerganov Sep 8, 2024
405264e
ggml/examples: add backend support for numerical optimization (ggml/949)
JohannesGaessler Sep 20, 2024
875a5ea
add check malloc result on device (llama/9346)
NeoZhangJianyu Sep 8, 2024
16f5517
cuda : fix FA Q src index (1 -> 0) (llama/9374)
ggerganov Sep 8, 2024
793bb46
Overlap cmdbuffer creation and cmdbuffer execution in Vulkan backend …
mtavenrath Sep 8, 2024
5924d9a
CUDA: fix variable name conflict for Windows build (llama/9382)
JohannesGaessler Sep 9, 2024
dbd164c
ggml : vector length agnostic SVE support (llama/9290)
Vithulep Sep 9, 2024
c00ff52
rpc : fix segfault with nkvo (llama/9389)
rgerganov Sep 9, 2024
705f30f
metal : fix compile warning with GGML_METAL_NDEBUG (llama/0)
ggerganov Sep 10, 2024
07ad5f9
sycl : update support conditions (llama/9394)
Alcpz Sep 11, 2024
0b65e7e
musa: remove Clang builtins mapping (llama/9421)
yeahdongcn Sep 11, 2024
66e69f6
CUDA: fix --split-mode row race condition (llama/9413)
JohannesGaessler Sep 11, 2024
87d979d
cann: Fix error when running a non-exist op (llama/9424)
bachelor-dou Sep 12, 2024
307c221
riscv : modify Makefile and add a RISCV_VECT to print log info (llama…
Tameem-10xE Sep 12, 2024
c614d88
cann: Add host buffer type for Ascend NPU (llama/9406)
bachelor-dou Sep 12, 2024
87b0f07
cmake : use list(APPEND ...) instead of set() + dedup linker (llama/9…
ggerganov Sep 14, 2024
aacf70d
ggml : ggml_type_name return "NONE" for invalid values (llama/9458)
ykhrustalev Sep 14, 2024
2c5adec
cmake : try to fix sycl+intel build (llama/9487)
Xarbirus Sep 15, 2024
c23f6e1
cmake : correct order of sycl flags (llama/9497)
Xarbirus Sep 15, 2024
3089f61
common : reimplement logging (llama/9418)
ggerganov Sep 15, 2024
372fb14
metal : handle zero-sized allocs (llama/9466)
ggerganov Sep 16, 2024
3f0861e
ggml : IQ4_NL sgemm + Q4_0 AVX optimization (llama/9422)
netrunnereve Sep 16, 2024
224fdd0
cmake : do not hide GGML options + rename option (llama/9465)
ggerganov Sep 16, 2024
2d520d9
ggml : link MATH_LIBRARY not by its full path (llama/9339)
Xarbirus Sep 16, 2024
0b98288
threadpool : skip polling for unused threads (llama/9461)
max-krasnyansky Sep 17, 2024
dd61e4a
ggml : fix n_threads_cur initialization with one thread (llama/9538)
slaren Sep 18, 2024
0dfb8b4
CUDA: fix sum.cu compilation for CUDA < 11.7 (llama/9562)
JohannesGaessler Sep 20, 2024
97e7971
ggml : fix trailing whitespace (llama/0)
ggerganov Sep 20, 2024
937f3aa
ggml : fix builds (llama/0)
ggerganov Sep 20, 2024
72e72e5
ggml : refactoring (llama/#0)
ggerganov Sep 20, 2024
eeea459
examples : adapt to ggml.h changes (ggml/0)
ggerganov Sep 20, 2024
6fb6220
Update CUDA graph on scale change plus clear nodes/params (llama/9550)
agray3 Sep 21, 2024
6207ec1
ggml-alloc : fix list of allocated tensors with GGML_ALLOCATOR_DEBUG …
slaren Sep 21, 2024
f4cb5d5
RWKV v6: RWKV_WKV op CUDA implementation (llama/9454)
MollySophia Sep 22, 2024
688ce6d
CUDA: enable Gemma FA for HIP/Pascal (llama/9581)
JohannesGaessler Sep 22, 2024
174af54
Fix merge error in #9454 (llama/9589)
MollySophia Sep 22, 2024
6e217a4
musa: enable building fat binaries, enable unified memory, and disabl…
yeahdongcn Sep 22, 2024
c3d1203
Revert "[SYCL] fallback mmvq (ggml/9088)" (llama/9579)
Sep 23, 2024
572c323
metal : use F32 prec for K*Q in vec FA (llama/9595)
ggerganov Sep 23, 2024
3bb490a
ggml : AVX512 gemm for Q4_0_8_8 (llama/9532)
Srihari-mcw Sep 23, 2024
8f4a846
threads: improve ggml_barrier scaling with large number of threads (l…
max-krasnyansky Sep 23, 2024
f841e81
cuda: add q8_0->f32 cpy operation (llama/9571)
Nekotekina Sep 24, 2024
5d85e06
threads: fix msvc build without openmp (llama/9615)
max-krasnyansky Sep 24, 2024
2c42639
log : add CONT level for continuing previous log entry (llama/9610)
ggerganov Sep 24, 2024
18beafe
ggml : add AVX512DQ requirement for AVX512 builds (llama/9622)
EZForever Sep 24, 2024
0be33ec
talk-llama : sync llama.cpp
ggerganov Sep 24, 2024
73d4421
sync : ggml
ggerganov Sep 24, 2024
5043478
ggml : add ggml-cpu-impl.h (skip) (#0)
ggerganov Sep 24, 2024
684e95c
make : remove "talk" target until updated
ggerganov Sep 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,8 @@ else
command \
stream \
lsp \
talk \
talk-llama
# talk (TODO: disalbed)
endif

default: $(BUILD_TARGETS)
Expand Down Expand Up @@ -1080,10 +1080,12 @@ lsp: examples/lsp/lsp.cpp \
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)

talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
# TODO: disabled until update
# https://github.com/ggerganov/whisper.cpp/issues/1818
#talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
# $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
# $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
# $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)

talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/llama-vocab.cpp examples/talk-llama/llama-grammar.cpp examples/talk-llama/llama-sampling.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
Expand Down
10 changes: 6 additions & 4 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ if (EMSCRIPTEN)
set_target_properties(libstream PROPERTIES FOLDER "libs")
add_subdirectory(command.wasm)
set_target_properties(libcommand PROPERTIES FOLDER "libs")
add_subdirectory(talk.wasm)
set_target_properties(libtalk PROPERTIES FOLDER "libs")
#add_subdirectory(talk.wasm)
#set_target_properties(libtalk PROPERTIES FOLDER "libs")
add_subdirectory(bench.wasm)
set_target_properties(libbench PROPERTIES FOLDER "libs")
elseif(CMAKE_JS_VERSION)
Expand All @@ -127,8 +127,10 @@ endif (WHISPER_SDL2)
add_subdirectory(quantize)
set_target_properties(quantize PROPERTIES FOLDER "examples")
if (WHISPER_SDL2)
add_subdirectory(talk)
set_target_properties(talk PROPERTIES FOLDER "examples")
# TODO: disabled until update
# https://github.com/ggerganov/whisper.cpp/issues/1818
#add_subdirectory(talk)
#set_target_properties(talk PROPERTIES FOLDER "examples")
add_subdirectory(talk-llama)
set_target_properties(talk-llama PROPERTIES FOLDER "examples")
add_subdirectory(lsp)
Expand Down
2 changes: 2 additions & 0 deletions examples/common-ggml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ bool ggml_common_quantize_0(
case GGML_TYPE_Q4_0_4_4:
case GGML_TYPE_Q4_0_4_8:
case GGML_TYPE_Q4_0_8_8:
case GGML_TYPE_TQ1_0:
case GGML_TYPE_TQ2_0:
case GGML_TYPE_COUNT:
{
fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
Expand Down
Loading
Loading