Skip to content

Commit eda6990

Browse files
committed
Merge remote-tracking branch 'ggerganov/master'
* ggerganov/master: (86 commits) server : fix building and simplify lib deps on Windows (ggml-org#1772) talk-llama : sync llama.cpp talk-llama : llama.cpp sync : ggml metal : correctly set SIMD support flags on iOS (llama/4923) 2-bit quantizations (llama/4897) scripts : sync-ggml-am.sh add option to skip commits talk-llama : sync llama.cpp sync : ggml examples : adapt to metal API ggml: cache sin/cos for RoPE (llama/4908) metal : remove old API (llama/4919) metal : disable log for loaded kernels (llama/4794) gguf : fix potential infinite for-loop (llama/4600) metal : refactor kernel loading code (llama/4794) CUDA: faster q8_0 -> f16 dequantization (llama/4895) talk-llama : add optional CLI arg to set the bot name (ggml-org#1764) examples : add python example for transcription (ggml-org#1744) whisper : load the model into multiple buffers of max size 1GB (ggml-org#1763) talk-llama : sync llama.cpp ...
2 parents b87e0b8 + f5f159c commit eda6990

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+9076
-4642
lines changed

.devops/main-cuda.Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ RUN apt-get update && \
2020
apt-get install -y build-essential \
2121
&& rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
2222

23+
# Ref: https://stackoverflow.com/a/53464012
24+
ENV CUDA_MAIN_VERSION=12.3
25+
ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
26+
2327
COPY .. .
2428
RUN make
2529

.github/workflows/build.yml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,9 +223,12 @@ jobs:
223223
- arch: Win32
224224
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
225225
s2arc: x86
226+
clblast: OFF
226227
- arch: x64
227228
obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
228229
s2arc: x64
230+
clblast: ON
231+
clver: 1.6.1
229232
- sdl2: ON
230233
s2ver: 2.28.5
231234

@@ -252,13 +255,26 @@ jobs:
252255
7z x sdl2.zip
253256
echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
254257
258+
- name: Install OpenCL
259+
if: matrix.clblast == 'ON'
260+
run: vcpkg.exe --triplet=${{ matrix.arch }}-windows install opencl
261+
262+
- name: Fetch CLBlast and set CLBlast_DIR
263+
if: matrix.clblast == 'ON'
264+
run: |
265+
C:/msys64/usr/bin/wget.exe -qO clblast.zip https://github.com/CNugteren/CLBlast/releases/download/${{ matrix.clver }}/CLBlast-${{ matrix.clver }}-windows-x64.zip
266+
7z x clblast.zip
267+
7z x CLBlast-${{ matrix.clver }}-windows-x64.7z
268+
echo "CLBlast_DIR=$env:GITHUB_WORKSPACE/CLBlast-${{ matrix.clver }}-windows-x64/lib/cmake/CLBlast" >> $env:GITHUB_ENV
269+
255270
- name: Configure
256271
run: >
257272
cmake -S . -B ./build -A ${{ matrix.arch }}
258273
-DCMAKE_BUILD_TYPE=${{ matrix.build }}
259274
-DWHISPER_OPENBLAS=${{ matrix.blas }}
260275
-DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
261276
-DWHISPER_SDL2=${{ matrix.sdl2 }}
277+
-DWHISPER_CLBLAST=${{ matrix.clblast }}
262278
263279
- name: Build
264280
run: |
@@ -273,11 +289,15 @@ jobs:
273289
if: matrix.sdl2 == 'ON'
274290
run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
275291

292+
- name: Copy clblast.dll
293+
if: matrix.clblast == 'ON'
294+
run: copy "$env:CLBlast_DIR/../../clblast.dll" build/bin/${{ matrix.build }}
295+
276296
- name: Upload binaries
277297
if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
278298
uses: actions/upload-artifact@v1
279299
with:
280-
name: whisper-blas-bin-${{ matrix.arch }}
300+
name: whisper-blas${{ matrix.clblast == 'ON' && '-clblast' || ''}}-bin-${{ matrix.arch }}
281301
path: build/bin/${{ matrix.build }}
282302

283303
windows-cublas:

CMakeLists.txt

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
cmake_minimum_required (VERSION 3.5)
22

3-
project(whisper.cpp VERSION 1.5.2)
3+
project(whisper.cpp VERSION 1.5.4)
44

55
# Add path to modules
66
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
@@ -223,11 +223,17 @@ if (WHISPER_CUBLAS)
223223
add_compile_definitions(GGML_USE_CUBLAS)
224224

225225
if (WHISPER_STATIC)
226-
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
226+
if (WIN32)
227+
# As of 12.3.1 CUDA Tookit for Windows does not offer a static cublas library
228+
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
229+
else ()
230+
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
231+
endif()
227232
else()
228233
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
229234
endif()
230235

236+
set(WHISPER_EXTRA_LIBS ${WHISPER_EXTRA_LIBS} CUDA::cuda_driver)
231237
else()
232238
message(FATAL_ERROR "cuBLAS not found")
233239
endif()
@@ -343,8 +349,8 @@ else()
343349
endif()
344350
else()
345351
if (EMSCRIPTEN)
346-
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread")
347-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
352+
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
353+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
348354
else()
349355
if(NOT WHISPER_NO_AVX)
350356
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx")

Makefile

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,16 @@ ifeq ($(filter $(UNAME_S),Linux Darwin DragonFly FreeBSD NetBSD OpenBSD Haiku),$
9999
CXXFLAGS += -pthread
100100
endif
101101

102+
# detect Windows
103+
ifneq ($(findstring _NT,$(UNAME_S)),)
104+
_WIN32 := 1
105+
endif
106+
107+
# Windows Sockets 2 (Winsock) for network-capable apps
108+
ifeq ($(_WIN32),1)
109+
LWINSOCK2 := -lws2_32
110+
endif
111+
102112
# Architecture specific
103113
# TODO: probably these flags need to be tweaked on some architectures
104114
# feel free to update the Makefile for your architecture and send a pull request or issue
@@ -206,7 +216,7 @@ ifdef WHISPER_CUBLAS
206216

207217
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
208218
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
209-
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
219+
LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib
210220
WHISPER_OBJ += ggml-cuda.o
211221
NVCC = nvcc
212222
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=$(CUDA_ARCH_FLAG)
@@ -360,7 +370,7 @@ quantize: examples/quantize/quantize.cpp $(WHISPER_OBJ) $(SRC_COMMON)
360370
$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o quantize $(LDFLAGS)
361371

362372
server: examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ)
363-
$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS)
373+
$(CXX) $(CXXFLAGS) examples/server/server.cpp $(SRC_COMMON) $(WHISPER_OBJ) -o server $(LDFLAGS) $(LWINSOCK2)
364374

365375
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ)
366376
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) $(WHISPER_OBJ) -o stream $(CC_SDL) $(LDFLAGS)

Package.swift

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,13 @@ let package = Package(
1313
products: [
1414
.library(name: "whisper", targets: ["whisper"]),
1515
],
16+
dependencies: [
17+
.package(url: "https://github.com/ggerganov/ggml.git", .branch("release"))
18+
],
1619
targets: [
1720
.target(
1821
name: "whisper",
22+
dependencies: ["ggml"],
1923
path: ".",
2024
exclude: [
2125
"bindings",
@@ -32,14 +36,8 @@ let package = Package(
3236
"Makefile"
3337
],
3438
sources: [
35-
"ggml.c",
3639
"whisper.cpp",
37-
"ggml-alloc.c",
38-
"ggml-backend.c",
39-
"ggml-quants.c",
40-
"ggml-metal.m"
4140
],
42-
resources: [.process("ggml-metal.metal")],
4341
publicHeadersPath: "spm-headers",
4442
cSettings: [
4543
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
77
[![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
88

9-
Stable: [v1.5.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.2) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
9+
Stable: [v1.5.4](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.5.4) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
1010

1111
High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
1212

bindings/go/params.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ func (p *Params) SetAudioCtx(n int) {
123123
p.audio_ctx = C.int(n)
124124
}
125125

126+
// Set initial prompt
127+
func (p *Params) SetInitialPrompt(prompt string) {
128+
p.initial_prompt = C.CString(prompt)
129+
}
130+
126131
///////////////////////////////////////////////////////////////////////////////
127132
// PRIVATE METHODS
128133

@@ -147,6 +152,7 @@ func (p *Params) String() string {
147152
str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
148153
str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
149154
str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
155+
str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
150156
if p.translate {
151157
str += " translate"
152158
}

bindings/go/pkg/whisper/context.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ func (context *context) SetAudioCtx(n uint) {
130130
context.params.SetAudioCtx(int(n))
131131
}
132132

133+
// Set initial prompt
134+
func (context *context) SetInitialPrompt(prompt string) {
135+
context.params.SetInitialPrompt(prompt)
136+
}
137+
133138
// ResetTimings resets the mode timings. Should be called before processing
134139
func (context *context) ResetTimings() {
135140
context.model.ctx.Whisper_reset_timings()

bindings/go/pkg/whisper/interface.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,18 @@ type Context interface {
3838
IsMultilingual() bool // Return true if the model is multilingual.
3939
Language() string // Get language
4040

41-
SetOffset(time.Duration) // Set offset
42-
SetDuration(time.Duration) // Set duration
43-
SetThreads(uint) // Set number of threads to use
44-
SetSpeedup(bool) // Set speedup flag
45-
SetSplitOnWord(bool) // Set split on word flag
46-
SetTokenThreshold(float32) // Set timestamp token probability threshold
47-
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
48-
SetMaxSegmentLength(uint) // Set max segment length in characters
49-
SetTokenTimestamps(bool) // Set token timestamps flag
50-
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
51-
SetAudioCtx(uint) // Set audio encoder context
41+
SetOffset(time.Duration) // Set offset
42+
SetDuration(time.Duration) // Set duration
43+
SetThreads(uint) // Set number of threads to use
44+
SetSpeedup(bool) // Set speedup flag
45+
SetSplitOnWord(bool) // Set split on word flag
46+
SetTokenThreshold(float32) // Set timestamp token probability threshold
47+
SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
48+
SetMaxSegmentLength(uint) // Set max segment length in characters
49+
SetTokenTimestamps(bool) // Set token timestamps flag
50+
SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
51+
SetAudioCtx(uint) // Set audio encoder context
52+
SetInitialPrompt(prompt string) // Set initial prompt
5253

5354
// Process mono audio data and return any errors.
5455
// If defined, newly generated segments are passed to the

0 commit comments

Comments
 (0)