diff --git a/python/sglang/srt/layers/moe/cutlass_moe.py b/python/sglang/srt/layers/moe/cutlass_moe.py index 2a5a5dccbd6..3774afac2d3 100755 --- a/python/sglang/srt/layers/moe/cutlass_moe.py +++ b/python/sglang/srt/layers/moe/cutlass_moe.py @@ -209,7 +209,7 @@ def cutlass_fused_experts_fp8( ) result = torch.empty((m, k), device=device, dtype=out_dtype) - apply_shuffle_mul_sum(c2, result, c_map, topk_weights) + apply_shuffle_mul_sum(c2, result, c_map, topk_weights.to(out_dtype)) return result