ggml-org · ggerganov · Aug 8, 2024 · Aug 8, 2024 · Jul 12, 2024 · Jul 20, 2024
diff --git a/.github/workflows/bindings-ruby.yml → .github/workflows/bindings-ruby.yml.disabled b/.github/workflows/bindings-ruby.yml → .github/workflows/bindings-ruby.yml.disabled
@@ -1,3 +1,4 @@
+# TODO: fix this workflow file, disabled for now
 name: Bindings Tests (Ruby)
 on:
   push:

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -59,7 +59,7 @@ jobs:
         uses: cross-platform-actions/[email protected]
         with:
           operating_system: freebsd
-          version: '13.2'
+          version: '13.3'
           run: |
             sudo pkg update
             sudo pkg install -y gmake sdl2

diff --git a/Makefile b/Makefile
@@ -785,7 +785,8 @@ OBJ_GGML += \
 	ggml/src/ggml.o \
 	ggml/src/ggml-alloc.o \
 	ggml/src/ggml-backend.o \
-	ggml/src/ggml-quants.o
+	ggml/src/ggml-quants.o \
+	ggml/src/ggml-aarch64.o
 
 OBJ_WHISPER += \
 	src/whisper.o
@@ -916,6 +917,13 @@ ggml/src/ggml-quants.o: \
 	ggml/src/ggml-common.h
 	$(CC) $(CFLAGS)    -c $< -o $@
 
+ggml/src/ggml-aarch64.o: \
+	ggml/src/ggml-aarch64.c \
+	ggml/include/ggml.h \
+	ggml/src/ggml-aarch64.h \
+	ggml/src/ggml-common.h
+	$(CC) $(CFLAGS)    -c $< -o $@
+
 ggml/src/ggml-blas.o: \
 	ggml/src/ggml-blas.cpp \
 	ggml/include/ggml-blas.h
@@ -1076,7 +1084,7 @@ talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
 	$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
 
-talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
+talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/llama-vocab.cpp examples/talk-llama/llama-grammar.cpp examples/talk-llama/llama-sampling.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
 	$(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
 	$(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)

diff --git a/Package.swift b/Package.swift
@@ -32,6 +32,7 @@ let package = Package(
             sources: [
                 "ggml/src/ggml.c",
                 "src/whisper.cpp",
+                "ggml/src/ggml-aarch64.c",
                 "ggml/src/ggml-alloc.c",
                 "ggml/src/ggml-backend.c",
                 "ggml/src/ggml-quants.c",

diff --git a/bindings/ruby/ext/extconf.rb b/bindings/ruby/ext/extconf.rb
@@ -5,6 +5,8 @@
 system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.h')} .")
 system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml.c')} .")
 system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-impl.h')} .")
+system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-aarch64.h')} .")
+system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-aarch64.c')} .")
 system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.h')} .")
 system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-alloc.c')} .")
 system("cp #{File.join(File.dirname(__FILE__),'..','..','..','ggml-backend-impl.h')} .")

diff --git a/examples/common-ggml.cpp b/examples/common-ggml.cpp
@@ -72,6 +72,9 @@ bool ggml_common_quantize_0(
         case GGML_FTYPE_MOSTLY_IQ4_XS:
         case GGML_FTYPE_MOSTLY_IQ1_M:
         case GGML_FTYPE_MOSTLY_BF16:
+        case GGML_FTYPE_MOSTLY_Q4_0_4_4:
+        case GGML_FTYPE_MOSTLY_Q4_0_4_8:
+        case GGML_FTYPE_MOSTLY_Q4_0_8_8:
                 {
                     fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
                     return false;
@@ -209,6 +212,9 @@ bool ggml_common_quantize_0(
                 case GGML_TYPE_IQ4_XS:
                 case GGML_TYPE_IQ1_M:
                 case GGML_TYPE_BF16:
+                case GGML_TYPE_Q4_0_4_4:
+                case GGML_TYPE_Q4_0_4_8:
+                case GGML_TYPE_Q4_0_8_8:
                 case GGML_TYPE_COUNT:
                     {
                         fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));

diff --git a/examples/talk-llama/CMakeLists.txt b/examples/talk-llama/CMakeLists.txt
@@ -1,7 +1,13 @@
 if (WHISPER_SDL2)
     # talk-llama
     set(TARGET talk-llama)
-    add_executable(${TARGET} talk-llama.cpp llama.cpp unicode.cpp unicode-data.cpp)
+    add_executable(${TARGET} talk-llama.cpp
+        llama.cpp
+        llama-vocab.cpp
+        llama-grammar.cpp
+        llama-sampling.cpp
+        unicode.cpp
+        unicode-data.cpp)
     target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
 
     if (WHISPER_CLBLAST)