From f1729267c9ce7488097ef7e351ecaa24cb75d423 Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 3 Apr 2025 14:03:01 +0200 Subject: [PATCH 1/2] whisper.wasm : fix unknown language issue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit addresses an issue with whisper.wasm where the following error was being displayed when running the application in github pages: ``` whisper_lang_id: unknown language 'д=␙c' ``` This turned out to be a memory corruption issue and further details can be found in the reference issue below. Refs: https://github.com/ggerganov/whisper.cpp/issues/2998 --- examples/whisper.wasm/emscripten.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/whisper.wasm/emscripten.cpp b/examples/whisper.wasm/emscripten.cpp index b84893dee73..45e0ac0d653 100644 --- a/examples/whisper.wasm/emscripten.cpp +++ b/examples/whisper.wasm/emscripten.cpp @@ -71,7 +71,7 @@ EMSCRIPTEN_BINDINGS(whisper) { params.print_timestamps = true; params.print_special = false; params.translate = translate; - params.language = whisper_is_multilingual(g_contexts[index]) ? lang.c_str() : "en"; + params.language = whisper_is_multilingual(g_contexts[index]) ? strdup(lang.c_str()) : "en"; params.n_threads = std::min(nthreads, std::min(16, mpow2(std::thread::hardware_concurrency()))); params.offset_ms = 0; @@ -106,6 +106,9 @@ EMSCRIPTEN_BINDINGS(whisper) { whisper_reset_timings(g_contexts[index]); whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size()); whisper_print_timings(g_contexts[index]); + if (params.language != nullptr && strcmp(params.language, "en") != 0) { + free((void*)params.language); + } }); } From e6dd84527e8116783bf0949a559bf72be431060e Mon Sep 17 00:00:00 2001 From: Daniel Bevenius Date: Thu, 3 Apr 2025 16:01:09 +0200 Subject: [PATCH 2/2] squash! whisper.wasm : fix unknown language issue Make sure that we always free the language pointer for multilingual models. --- examples/whisper.wasm/emscripten.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/whisper.wasm/emscripten.cpp b/examples/whisper.wasm/emscripten.cpp index 45e0ac0d653..03bf41329e4 100644 --- a/examples/whisper.wasm/emscripten.cpp +++ b/examples/whisper.wasm/emscripten.cpp @@ -65,13 +65,14 @@ EMSCRIPTEN_BINDINGS(whisper) { } struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY); + bool is_multilingual = whisper_is_multilingual(g_contexts[index]); params.print_realtime = true; params.print_progress = false; params.print_timestamps = true; params.print_special = false; params.translate = translate; - params.language = whisper_is_multilingual(g_contexts[index]) ? strdup(lang.c_str()) : "en"; + params.language = is_multilingual ? strdup(lang.c_str()) : "en"; params.n_threads = std::min(nthreads, std::min(16, mpow2(std::thread::hardware_concurrency()))); params.offset_ms = 0; @@ -102,11 +103,11 @@ EMSCRIPTEN_BINDINGS(whisper) { // run the worker { - g_worker = std::thread([index, params, pcmf32 = std::move(pcmf32)]() { + g_worker = std::thread([index, params, pcmf32 = std::move(pcmf32), is_multilingual]() { whisper_reset_timings(g_contexts[index]); whisper_full(g_contexts[index], params, pcmf32.data(), pcmf32.size()); whisper_print_timings(g_contexts[index]); - if (params.language != nullptr && strcmp(params.language, "en") != 0) { + if (is_multilingual) { free((void*)params.language); } });