From 6464269f83176aad9389e405a680fe3a72dd0544 Mon Sep 17 00:00:00 2001 From: Reese Levine Date: Tue, 23 Sep 2025 21:18:58 -0700 Subject: [PATCH 1/3] implement soft_max --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 7 +++++++ ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index de68c5689bba7..ad2554f09ac63 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -1060,6 +1060,9 @@ static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node) { case GGML_OP_SCALE: ggml_webgpu_scale(ctx, src0, node); break; + case GGML_OP_SOFT_MAX: + ggml_webgpu_soft_max(ctx, src0, src1, src2, node); + break; default: return false; } @@ -1806,6 +1809,9 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const case GGML_OP_SCALE: supports_op = op->type == GGML_TYPE_F32; break; + case GGML_OP_SOFT_MAX: + supports_op = op->type == GGML_TYPE_F32; + break; default: break; } @@ -1949,6 +1955,7 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t ggml_webgpu_init_rope_pipeline(ctx); ggml_webgpu_init_glu_pipeline(ctx); ggml_webgpu_init_scale_pipeline(ctx); + ggml_webgpu_init_soft_max_pipeline(ctx); #ifdef GGML_WEBGPU_DEBUG // Initialize debug buffers diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl index 4f72bb1c851ec..712b921f1abb9 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/rms_norm.wgsl @@ -84,7 +84,7 @@ fn main(@builtin(workgroup_id) wid: vec3, let i2 = i / params.ne1; let i1 = i % params.ne1; let i_src_row = params.offset_src + i3 * params.stride_src3 + i2 * params.stride_src2 + i1 * params.stride_src1; - let i_dst_row = params.offset_src + i3 * params.stride_dst3 + i2 * params.stride_dst2 + i1 * params.stride_dst1; + let i_dst_row = params.offset_dst + i3 * params.stride_dst3 + i2 * params.stride_dst2 + i1 * params.stride_dst1; let elems = (params.ne0 + wg_size - 1) / wg_size; From ee47adb3353a76dbdfb363d7b726a048791f1d74 Mon Sep 17 00:00:00 2001 From: Reese Levine Date: Fri, 3 Oct 2025 13:08:02 -0700 Subject: [PATCH 2/3] Fix soft_max data race --- ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl index 64ab576c08354..c74dc4cc9238a 100644 --- a/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl +++ b/ggml/src/ggml-webgpu/wgsl-shaders/soft_max.tmpl.wgsl @@ -300,6 +300,7 @@ fn main(@builtin(workgroup_id) wid: vec3, workgroupBarrier(); } let row_max = scratch[0]; + workgroupBarrier(); var sum = 0.0f; col = lid.x; From 84770b4968cf3addf24481080ad5f96b1f41ae74 Mon Sep 17 00:00:00 2001 From: Reese Levine Date: Sat, 4 Oct 2025 17:38:09 -0700 Subject: [PATCH 3/3] Temporary fix, wait on each submit --- ggml/src/ggml-webgpu/ggml-webgpu.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp index ad2554f09ac63..e795ca3fd92fd 100644 --- a/ggml/src/ggml-webgpu/ggml-webgpu.cpp +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -424,6 +424,7 @@ static void ggml_backend_webgpu_build_and_enqueue(webgpu_context & ctx->staged_param_bufs.push_back(params_bufs); if (ctx->staged_command_bufs.size() == WEBGPU_COMMAND_SUBMIT_BATCH_SIZE) { ggml_backend_webgpu_submit_queue(ctx); + ggml_backend_webgpu_wait_on_submission(ctx); } } }