bygreencn
diff --git a/‎.devops/main-cuda.Dockerfile‎
Lines changed: 4 additions & 0 deletions b/‎.devops/main-cuda.Dockerfile‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 21 additions & 1 deletion b/‎.github/workflows/build.yml‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎CMakeLists.txt‎
Lines changed: 10 additions & 4 deletions b/‎CMakeLists.txt‎
Lines changed: 10 additions & 4 deletions
diff --git a/‎Makefile‎
Lines changed: 12 additions & 2 deletions b/‎Makefile‎
Lines changed: 12 additions & 2 deletions
diff --git a/‎Package.swift‎
Lines changed: 4 additions & 6 deletions b/‎Package.swift‎
Lines changed: 4 additions & 6 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 1 deletion b/‎README.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎bindings/go/params.go‎
Lines changed: 6 additions & 0 deletions b/‎bindings/go/params.go‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎bindings/go/pkg/whisper/context.go‎
Lines changed: 5 additions & 0 deletions b/‎bindings/go/pkg/whisper/context.go‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎bindings/go/pkg/whisper/interface.go‎
Lines changed: 12 additions & 11 deletions b/‎bindings/go/pkg/whisper/interface.go‎
Lines changed: 12 additions & 11 deletions
diff --git a/‎bindings/ios‎ b/‎bindings/ios‎
@@ -20,6 +20,10 @@ RUN apt-get update && \
     apt-get install -y build-essential \
     && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
 
+# Ref: https://stackoverflow.com/a/53464012
+ENV CUDA_MAIN_VERSION=12.3
+ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
+
 COPY .. .
 RUN make
 
 
@@ -223,9 +223,12 @@ jobs:
           - arch: Win32
             obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
             s2arc: x86
+            clblast: OFF
           - arch: x64
             obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
             s2arc: x64
+            clblast: ON
+            clver: 1.6.1
           - sdl2: ON
             s2ver: 2.28.5
 
@@ -252,13 +255,26 @@ jobs:
           7z x sdl2.zip
           echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
 
+      - name: Install OpenCL
+        if: matrix.clblast == 'ON'
+        run: vcpkg.exe --triplet=${{ matrix.arch }}-windows install opencl
+
+      - name: Fetch CLBlast and set CLBlast_DIR
+        if: matrix.clblast == 'ON'
+        run: |
+          C:/msys64/usr/bin/wget.exe -qO clblast.zip https://github.com/CNugteren/CLBlast/releases/download/${{ matrix.clver }}/CLBlast-${{ matrix.clver }}-windows-x64.zip
+          7z x clblast.zip
+          7z x CLBlast-${{ matrix.clver }}-windows-x64.7z
+          echo "CLBlast_DIR=$env:GITHUB_WORKSPACE/CLBlast-${{ matrix.clver }}-windows-x64/lib/cmake/CLBlast" >> $env:GITHUB_ENV
+
       - name: Configure
         run: >
           cmake -S . -B ./build -A ${{ matrix.arch }}
           -DCMAKE_BUILD_TYPE=${{ matrix.build }}
           -DWHISPER_OPENBLAS=${{ matrix.blas }}
           -DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
           -DWHISPER_SDL2=${{ matrix.sdl2 }}
+          -DWHISPER_CLBLAST=${{ matrix.clblast }}
 
       - name: Build
         run: |
@@ -273,11 +289,15 @@ jobs:
         if: matrix.sdl2 == 'ON'
         run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
 
+      - name: Copy clblast.dll
+        if: matrix.clblast == 'ON'
+        run: copy "$env:CLBlast_DIR/../../clblast.dll" build/bin/${{ matrix.build }}
+
       - name: Upload binaries
         if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
         uses: actions/upload-artifact@v1
         with:
-          name: whisper-blas-bin-${{ matrix.arch }}
+          name: whisper-blas${{ matrix.clblast == 'ON' && '-clblast' || ''}}-bin-${{ matrix.arch }}
           path: build/bin/${{ matrix.build }}
 
   windows-cublas:
 
@@ -1,6 +1,6 @@
 cmake_minimum_required (VERSION 3.5)
 
-project(whisper.cpp VERSION 1.5.2)
+project(whisper.cpp VERSION 1.5.4)
 
 # Add path to modules
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
@@ -223,11 +223,17 @@ if (WHISPER_CUBLAS)
         add_compile_definitions(GGML_USE_CUBLAS)
 
         if (WHISPER_STATIC)
-            set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
+            if (WIN32)
+                # As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
+                set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
+            else ()
+                set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
+            endif()
         else()
             set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
         endif()
 
+        set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cuda_driver)
     else()
         message(FATAL_ERROR "cuBLAS not found")
     endif()
@@ -343,8 +349,8 @@ else()
         endif()
     else()
         if (EMSCRIPTEN)
-            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread")
-            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
+            set(CMAKE_C_FLAGS   "${CMAKE_C_FLAGS}   -pthread -s TOTAL_STACK=5242880")
+            set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
         else()
             if(NOT WHISPER_NO_AVX)
                 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")
 
@@ -99,6 +99,16 @@ ifeq ($(filter $(UNAME_S),Linux Darwin DragonFly FreeBSD NetBSD OpenBSD Haiku),$
 	CXXFLAGS += -pthread
 endif
 
+# detect Windows
+ifneq ($(findstring _NT,$(UNAME_S)),)
+	_WIN32 := 1
+endif
+
+# Windows Sockets 2 (Winsock) for network-capable apps
+ifeq ($(_WIN32),1)
+	LWINSOCK2 := -lws2_32
+endif
+
 # Architecture specific
 # TODO: probably these flags need to be tweaked on some architectures
 #       feel free to update the Makefile for your architecture and send a pull request or issue
@@ -206,7 +216,7 @@ ifdef WHISPER_CUBLAS
 
 	CFLAGS      += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
 	CXXFLAGS    += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
-	LDFLAGS     += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
+	LDFLAGS     += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
 	WHISPER_OBJ += ggml-cuda.o
 	NVCC        = nvcc
 	NVCCFLAGS   = --forward-unknown-to-host-compiler -arch=$(CUDA_ARCH_FLAG)
@@ -360,7 +370,7 @@ quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
 	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
 
 server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
-	$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS) $(LWINSOCK2)
 
 stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
 	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)
 
@@ -13,9 +13,13 @@ let package = Package(
     products: [
         .library(name: "whisper", targets: ["whisper"]),
     ],
+    dependencies: [
+        .package(url: "https://github.com/ggerganov/ggml.git", .branch("release"))
+    ],
     targets: [
         .target(
             name: "whisper",
+            dependencies: ["ggml"],
             path: ".",
             exclude: [
                "bindings",
@@ -32,14 +36,8 @@ let package = Package(
                "Makefile"
             ],
             sources: [
-                "ggml.c",
                 "whisper.cpp",
-                "ggml-alloc.c",
-                "ggml-backend.c",
-                "ggml-quants.c",
-                "ggml-metal.m"
             ],
-            resources: [.process("ggml-metal.metal")],
             publicHeadersPath: "spm-headers",
             cSettings: [
                 .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
 
@@ -6,7 +6,7 @@
 [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
 
-Stable: [v1.5.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.2) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
+Stable: [v1.5.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
 
 High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
 
 
@@ -123,6 +123,11 @@ func (p *Params) SetAudioCtx(n int) {
 	p.audio_ctx = C.int(n)
 }
 
+// Set initial prompt
+func (p *Params) SetInitialPrompt(prompt string) {
+	p.initial_prompt = C.CString(prompt)
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // PRIVATE METHODS
 
@@ -147,6 +152,7 @@ func (p *Params) String() string {
 	str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
 	str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
 	str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
+	str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
 	if p.translate {
 		str += " translate"
 	}
 
@@ -130,6 +130,11 @@ func (context *context) SetAudioCtx(n uint) {
 	context.params.SetAudioCtx(int(n))
 }
 
+// Set initial prompt
+func (context *context) SetInitialPrompt(prompt string) {
+	context.params.SetInitialPrompt(prompt)
+}
+
 // ResetTimings resets the mode timings. Should be called before processing
 func (context *context) ResetTimings() {
 	context.model.ctx.Whisper_reset_timings()
 
@@ -38,17 +38,18 @@ type Context interface {
 	IsMultilingual() bool     // Return true if the model is multilingual.
 	Language() string         // Get language
 
-	SetOffset(time.Duration)      // Set offset
-	SetDuration(time.Duration)    // Set duration
-	SetThreads(uint)              // Set number of threads to use
-	SetSpeedup(bool)              // Set speedup flag
-	SetSplitOnWord(bool)          // Set split on word flag
-	SetTokenThreshold(float32)    // Set timestamp token probability threshold
-	SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
-	SetMaxSegmentLength(uint)     // Set max segment length in characters
-	SetTokenTimestamps(bool)      // Set token timestamps flag
-	SetMaxTokensPerSegment(uint)  // Set max tokens per segment (0 = no limit)
-	SetAudioCtx(uint)             // Set audio encoder context
+	SetOffset(time.Duration)        // Set offset
+	SetDuration(time.Duration)      // Set duration
+	SetThreads(uint)                // Set number of threads to use
+	SetSpeedup(bool)                // Set speedup flag
+	SetSplitOnWord(bool)            // Set split on word flag
+	SetTokenThreshold(float32)      // Set timestamp token probability threshold
+	SetTokenSumThreshold(float32)   // Set timestamp token sum probability threshold
+	SetMaxSegmentLength(uint)       // Set max segment length in characters
+	SetTokenTimestamps(bool)        // Set token timestamps flag
+	SetMaxTokensPerSegment(uint)    // Set max tokens per segment (0 = no limit)
+	SetAudioCtx(uint)               // Set audio encoder context
+	SetInitialPrompt(prompt string) // Set initial prompt
 
 	// Process mono audio data and return any errors.
 	// If defined, newly generated segments are passed to the