Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
05b3d1c
scripts : sync new files (#0)
ggerganov Aug 8, 2024
4fce5d4
cmake : only enable GGML_NATIVE and x86 flags if not crosscompiling (…
iboB Jul 12, 2024
f0de462
vulkan : initialize vk_buffer_struct members to VK_NULL_HANDLE (ggml/…
neobrain Jul 20, 2024
52be7d6
ggml: add support for float16 input tensors in pooling operations (gg…
vanaka11 Jul 22, 2024
bcf05d5
ggml : loop tiling optimizations for scalar path (ggml/898)
heshpdx Jul 25, 2024
12e31b1
sycl : fix powf call in device code (llama/8368)
Alcpz Jul 8, 2024
43816f9
sycl : Reenabled mmvq path for the SYCL Nvidia Backend (llama/8372)
Alcpz Jul 9, 2024
0fc4674
ggml : add AArch64 optimized GEMV and GEMM Q4 kernels (llama/5780)
Dibakar Jul 10, 2024
dc9508d
ggml : move sgemm sources to llamafile subfolder (llama/8394)
ggerganov Jul 10, 2024
6b733da
Use multi_ptr to clean up deprecated warnings (llama/8256)
Jul 10, 2024
aed5f19
CUDA: optimize and refactor MMQ (llama/8416)
JohannesGaessler Jul 11, 2024
c7bb780
cuda : suppress 'noreturn' warn in no_device_code (llama/8414)
danbev Jul 11, 2024
e298c16
ggml : add NVPL BLAS support (ggml/8329) (llama/8425)
nicholaiTukanov Jul 11, 2024
c46ee3e
fix the mul_mat_id ut issues (llama/8427)
ClarkChin08 Jul 12, 2024
33fe98b
ggml : minor naming changes (llama/8433)
ggerganov Jul 12, 2024
c4b504b
metal : template-ify some of the kernels (llama/8447)
ggerganov Jul 13, 2024
e88d9f6
vulkan : cmake integration (llama/8119)
bandoti Jul 13, 2024
df07dae
Vulkan MMQ Fix (llama/8479)
0cc4m Jul 15, 2024
fb2bd6f
add concat through dim 1/2 (llama/8483)
airMeng Jul 15, 2024
b946bf8
Refactor lora adapter support (llama/8332)
ngxson Jul 15, 2024
3c63419
make/cmake: add missing force MMQ/cuBLAS for HIP (llama/8515)
JohannesGaessler Jul 16, 2024
83f9da7
Add Ascend NPU backend (llama/6035)
hipudding Jul 17, 2024
53b9f09
cmake : install all ggml public headers (llama/8480)
65a Jul 18, 2024
c0760e2
CUDA: fix partial offloading for ne0 % 256 != 0 (llama/8572)
JohannesGaessler Jul 18, 2024
05a9656
ggml : add friendlier error message to fopen errors (llama/8575)
HanClinto Jul 19, 2024
027863f
ggml : fix quant dot product with odd number of blocks (llama/8549)
slaren Jul 19, 2024
a7e1d3a
gguf : handle null name during init (llama/8587)
ggerganov Jul 20, 2024
54d968f
CUDA: MMQ code deduplication + iquant support (llama/8495)
JohannesGaessler Jul 20, 2024
5b28839
ggml: fix compile error for RISC-V (llama/8623)
zqb-all Jul 22, 2024
8899f0b
fix scratch size of softmax (llama/8642)
luoyu-intel Jul 23, 2024
6ab790f
Allow all RDNA2 archs to use sdot4 intrinsic (llama/8629)
jeroen-mostert Jul 23, 2024
67a39e8
Vulkan IQ4_NL Support (llama/8613)
0cc4m Jul 23, 2024
5beea92
sycl : Add support for non-release DPC++ & oneMKL (llama/8644)
joeatodd Jul 23, 2024
41c25ae
Re-add erroneously removed -fsycl from GGML_EXTRA_LIBS (llama/8667)
joeatodd Jul 24, 2024
25ae1ae
ggml : add and use ggml_cpu_has_llamafile() (llama/8664)
ggerganov Jul 25, 2024
95a1378
fix multi-gpu issue on sycl (llama/8554)
ClarkChin08 Jul 25, 2024
5ad818e
ggml: handle ggml_init failure to fix NULL pointer deref (llama/8692)
DavidKorczynski Jul 25, 2024
10befde
ggml : reduce hash table reset cost (llama/8698)
slaren Jul 27, 2024
b6796f5
ggml : add ggml-aarch64 (ggml/0)
Dibakar Jul 27, 2024
6cd7612
common : handle new quant types (ggml/0)
ggerganov Jul 27, 2024
96cf53f
ggml : resolve sync conflicst (ggml/0)
ggerganov Jul 27, 2024
f57c22a
ggml : move c parameter comment to ggml_rope_ext (ggml/901)
danbev Jul 29, 2024
887537c
vulkan : implement Stable Diffusion operators (ggml/904)
0cc4m Aug 4, 2024
3f40857
metal : add abort callback (ggml/905)
conradev Aug 7, 2024
c626ad5
metal : fix struct name (ggml/912)
ggerganov Aug 7, 2024
2e4a575
ggml : ignore more msvc warnings (ggml/906)
iboB Aug 7, 2024
ded669b
feat: Support Moore Threads GPU (llama/8383)
yeahdongcn Jul 27, 2024
9ae2794
add conv support (llama/8688)
airMeng Jul 29, 2024
e6d4565
cuda : organize vendor-specific headers into vendors directory (llama…
yeahdongcn Jul 29, 2024
d153915
ggml: bugfix: fix the inactive elements is agnostic for risc-v vector…
CarterLi999 Jul 29, 2024
5eff773
Add `TIMESTEP_EMBEDDING` OP (llama/8707)
zhentaoyu Jul 30, 2024
7f82c97
cann: update cmake (llama/8765)
wangshuai09 Jul 30, 2024
362030c
added android implementation of ggml_print_backtrace_symbols (llama/8…
l3utterfly Jul 30, 2024
003db16
cuda : fix dmmv cols requirement to 2*GGML_CUDA_DMMV_X (llama/8800)
slaren Aug 1, 2024
a5550c9
Build: Only include execinfo.h on linux systems that support it (llam…
acon96 Aug 1, 2024
997ef81
ggml-cuda: Adding support for unified memory (llama/8035)
matteoserva Aug 1, 2024
94ea73e
Fixing wrong VDR iq4nl value (llama/8812)
OuadiElfarouki Aug 2, 2024
4996a13
Fix conversion of unnormalized BF16->BF16 weights (llama/7843)
CISC Aug 2, 2024
d044468
ggml : reading the runtime sve config of the cpu (llama/8709)
jdomke Aug 3, 2024
4898fae
ggml : fix overflows in elu function (llama/8866)
jart Aug 5, 2024
f26565a
ggml : add epsilon as a parameter for group_norm (llama/8818)
MollySophia Aug 6, 2024
5bc1f65
CUDA: fix padding logic for FP16/FP32 (llama/8884)
JohannesGaessler Aug 6, 2024
3b8a579
CUDA/HIP: fix tests/test-backend-ops (llama/8896)
JohannesGaessler Aug 7, 2024
4d1b494
Updated SYCL device filtering (llama/8901)
OuadiElfarouki Aug 7, 2024
69e9538
ggml-backend : fix async copy from CPU (llama/8897)
slaren Aug 7, 2024
17749ad
sync : ggml
ggerganov Aug 8, 2024
da43d55
talk-llama : sync llama.cpp
ggerganov Aug 8, 2024
0504fba
build : fix aarch64 (#0)
ggerganov Aug 8, 2024
224c75f
ci : try to fix FreeBSD (#0)
ggerganov Aug 8, 2024
bb21cb3
ci : disable ruby workflow (#0)
ggerganov Aug 8, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# TODO: fix this workflow file, disabled for now
name: Bindings Tests (Ruby)
on:
push:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ jobs:
uses: cross-platform-actions/[email protected]
with:
operating_system: freebsd
version: '13.2'
version: '13.3'
run: |
sudo pkg update
sudo pkg install -y gmake sdl2
Expand Down
12 changes: 10 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -785,7 +785,8 @@ OBJ_GGML += \
ggml/src/ggml.o \
ggml/src/ggml-alloc.o \
ggml/src/ggml-backend.o \
ggml/src/ggml-quants.o
ggml/src/ggml-quants.o \
ggml/src/ggml-aarch64.o

OBJ_WHISPER += \
src/whisper.o
Expand Down Expand Up @@ -916,6 +917,13 @@ ggml/src/ggml-quants.o: \
ggml/src/ggml-common.h
$(CC) $(CFLAGS) -c $< -o $@

ggml/src/ggml-aarch64.o: \
ggml/src/ggml-aarch64.c \
ggml/include/ggml.h \
ggml/src/ggml-aarch64.h \
ggml/src/ggml-common.h
$(CC) $(CFLAGS) -c $< -o $@

ggml/src/ggml-blas.o: \
ggml/src/ggml-blas.cpp \
ggml/include/ggml-blas.h
Expand Down Expand Up @@ -1076,7 +1084,7 @@ talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)

talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/llama-vocab.cpp examples/talk-llama/llama-grammar.cpp examples/talk-llama/llama-sampling.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
Expand Down
1 change: 1 addition & 0 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ let package = Package(
sources: [
"ggml/src/ggml.c",
"src/whisper.cpp",
"ggml/src/ggml-aarch64.c",
"ggml/src/ggml-alloc.c",
"ggml/src/ggml-backend.c",
"ggml/src/ggml-quants.c",
Expand Down
2 changes: 2 additions & 0 deletions bindings/ruby/ext/extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-aarch64.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-aarch64.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")
Expand Down
6 changes: 6 additions & 0 deletions examples/common-ggml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ bool ggml_common_quantize_0(
case GGML_FTYPE_MOSTLY_IQ4_XS:
case GGML_FTYPE_MOSTLY_IQ1_M:
case GGML_FTYPE_MOSTLY_BF16:
case GGML_FTYPE_MOSTLY_Q4_0_4_4:
case GGML_FTYPE_MOSTLY_Q4_0_4_8:
case GGML_FTYPE_MOSTLY_Q4_0_8_8:
{
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
return false;
Expand Down Expand Up @@ -209,6 +212,9 @@ bool ggml_common_quantize_0(
case GGML_TYPE_IQ4_XS:
case GGML_TYPE_IQ1_M:
case GGML_TYPE_BF16:
case GGML_TYPE_Q4_0_4_4:
case GGML_TYPE_Q4_0_4_8:
case GGML_TYPE_Q4_0_8_8:
case GGML_TYPE_COUNT:
{
fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
Expand Down
8 changes: 7 additions & 1 deletion examples/talk-llama/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
if (WHISPER_SDL2)
# talk-llama
set(TARGET talk-llama)
add_executable(${TARGET} talk-llama.cpp llama.cpp unicode.cpp unicode-data.cpp)
add_executable(${TARGET} talk-llama.cpp
llama.cpp
llama-vocab.cpp
llama-grammar.cpp
llama-sampling.cpp
unicode.cpp
unicode-data.cpp)
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})

if (WHISPER_CLBLAST)
Expand Down
Loading