Skip to content

Commit 25ef410

Browse files
lbh2001ssssnow
authored andcommitted
[bugfix] Add 'disaggregation_mode' parameter to warmup function when compile deep_gemm manually (#8618)
1 parent e3ae750 commit 25ef410

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

python/sglang/compile_deep_gemm.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import requests
1919

20+
from sglang.srt.disaggregation.utils import FAKE_BOOTSTRAP_HOST
2021
from sglang.srt.entrypoints.http_server import launch_server
2122
from sglang.srt.managers.io_struct import GenerateReqInput
2223
from sglang.srt.managers.tokenizer_manager import TokenizerManager
@@ -52,7 +53,9 @@ def from_cli_args(cls, args: argparse.Namespace):
5253

5354

5455
@warmup("compile-deep-gemm")
55-
async def warm_up_compile(tokenizer_manager: TokenizerManager):
56+
async def warm_up_compile(
57+
disaggregation_mode: str, tokenizer_manager: TokenizerManager
58+
):
5659
print("\nGenerate warm up request for compiling DeepGEMM...\n")
5760
generate_req_input = GenerateReqInput(
5861
input_ids=[0, 1, 2, 3],
@@ -62,6 +65,10 @@ async def warm_up_compile(tokenizer_manager: TokenizerManager):
6265
"ignore_eos": True,
6366
},
6467
)
68+
if disaggregation_mode != "null":
69+
generate_req_input.bootstrap_room = 0
70+
generate_req_input.bootstrap_host = FAKE_BOOTSTRAP_HOST
71+
6572
await tokenizer_manager.generate_request(generate_req_input, None).__anext__()
6673

6774

0 commit comments

Comments
 (0)