Skip to content
Closed
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
326361e
[opt] enable trtllm fp4
Alcanderian Jul 2, 2025
acccd2f
2nd
Alcanderian Jul 2, 2025
15240d2
3
Alcanderian Jul 2, 2025
e12859f
add test
Alcanderian Jul 2, 2025
e652b36
readbility
Alcanderian Jul 2, 2025
c313cca
4
Alcanderian Jul 2, 2025
4f92e77
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 2, 2025
5ebc8ad
fix acc
Alcanderian Jul 2, 2025
3331814
lint
Alcanderian Jul 2, 2025
6560082
router_gemm
Alcanderian Jul 2, 2025
794a37c
lint
Alcanderian Jul 2, 2025
b16dd1a
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 3, 2025
5a8bb9b
refine for merging
Alcanderian Jul 3, 2025
3c6b49b
Merge branch 'main' into trtllm-fp4
zhyncs Jul 3, 2025
1f09807
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 3, 2025
88ad6d1
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 3, 2025
38e652f
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 4, 2025
02fa3ad
support EP-TP combine
Alcanderian Jul 4, 2025
1ea5ef9
Merge branch 'main' into trtllm-fp4
zhyncs Jul 5, 2025
b7d66cc
use trt fp4 linear
Alcanderian Jul 5, 2025
56fd5cb
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 5, 2025
7f1a2e8
use trt router gemm and disable it for nextn
Alcanderian Jul 5, 2025
7e75c19
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 5, 2025
1dda8a0
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 6, 2025
e2d0675
Merge branch 'main' into trtllm-fp4
zhyncs Jul 7, 2025
ca3a714
Merge branch 'main' into trtllm-fp4
BBuf Jul 14, 2025
3c77c42
lint
BBuf Jul 14, 2025
4707908
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 16, 2025
4fdd5a7
Merge branch 'main' into trtllm-fp4
zhyncs Jul 16, 2025
bf0a000
upd
zhyncs Jul 16, 2025
ee5e054
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 20, 2025
f08c44c
Merge branch 'main' into trtllm-fp4
Alcanderian Jul 20, 2025
34d11dd
fix
Alcanderian Jul 20, 2025
3369c71
Merge branch 'main' into trtllm-fp4
zhyncs Jul 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions python/sglang/srt/layers/moe/fused_moe_triton/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from sglang.srt.utils import (
cpu_has_amx_support,
get_bool_env_var,
get_int_env_var,
is_cpu,
is_hip,
set_weight_attrs,
Expand Down Expand Up @@ -56,6 +57,8 @@
from aiter.fused_moe_bf16_asm import ck_moe_2stages
from aiter.ops.shuffle import shuffle_weight

TRTLMM_GEN_MOE_EP_SIZE = get_int_env_var("SGLANG_TRTLMM_GEN_MOE_EP_SIZE", "0")

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -468,10 +471,13 @@ def __init__(
assert (
self.enable_flashinfer_moe
), "FusedMoE only supports EP with --enable-flashinfer-moe"
self.ep_size = self.tp_size
self.ep_rank = self.tp_rank
self.tp_size = 1
self.tp_rank = 0
etp_ep_size = self.tp_size
if TRTLMM_GEN_MOE_EP_SIZE > 0:
etp_ep_size = TRTLMM_GEN_MOE_EP_SIZE
self.ep_size = etp_ep_size
self.ep_rank = self.tp_rank % etp_ep_size
self.tp_size = self.tp_size // etp_ep_size
self.tp_rank = self.tp_rank // etp_ep_size
# Create a tensor of size num_experts filled with -1
self.expert_map = torch.full((self.num_experts,), -1, dtype=torch.int32)
# Create a expert map for the local experts
Expand Down Expand Up @@ -722,8 +728,7 @@ def weight_loader(
if expert_id == -1:
return

# TP rank is set to 0 if EP is enabled
tp_rank = 0 if self.ep_size > 1 else get_tensor_model_parallel_rank()
tp_rank = self.tp_rank

# compressed-tensors checkpoints with packed weights are stored flipped
# TODO (mgoin): check self.quant_method.quant_config.quant_format
Expand Down
Loading
Loading