1
1
"""
2
2
Usage:
3
3
# replay from a folder
4
- python3 replay_request_dump.py --file-number 100 --parallel 512 --input-folder /data/folder
4
+ python3 replay_request_dump.py --file-number 100 --parallel 512 --input-folder /data/lianmin/sglang_request_dump/grok-mini-0220-engine-5756f8f94-28bm6/
5
5
6
6
# replay from a single file
7
- python3 replay_request_dump.py --parallel 512 --input-file /data/file .pkl
7
+ python3 replay_request_dump.py --parallel 512 --input-file /data/sglang_crash_dump/memx-cti-34-sr1.xpop.twttr.net/crash_dump_2025-06-04_20-13-18 .pkl
8
8
"""
9
9
10
10
import argparse
@@ -38,9 +38,19 @@ def run_one_request_internal(record):
38
38
(req , output , replay_init_time , start_time , end_time , idx ) = record
39
39
time .sleep (max (0 , start_time - (time .time () - replay_init_time )))
40
40
41
+ if "completion_tokens" in output .get ("meta_info" , {}):
42
+ recorded_completion_tokens = output ["meta_info" ]["completion_tokens" ]
43
+ else :
44
+ recorded_completion_tokens = ""
45
+
41
46
json_data = asdict (req )
42
47
stream = json_data ["stream" ]
43
48
49
+ if args .ignore_eos :
50
+ json_data ["sampling_params" ]["ignore_eos" ] = True
51
+ if recorded_completion_tokens :
52
+ json_data ["sampling_params" ]["max_new_tokens" ] = recorded_completion_tokens
53
+
44
54
response = requests .post (
45
55
f"http://{ args .host } :{ args .port } /generate" ,
46
56
json = json_data ,
@@ -59,10 +69,6 @@ def run_one_request_internal(record):
59
69
60
70
prompt_tokens = ret ["meta_info" ]["prompt_tokens" ]
61
71
completion_tokens = ret ["meta_info" ]["completion_tokens" ]
62
- if "completion_tokens" in ret ["meta_info" ]:
63
- recorded_completion_tokens = ret ["meta_info" ]["completion_tokens" ]
64
- else :
65
- recorded_completion_tokens = ""
66
72
print (
67
73
f"{ idx = } , { start_time = :.2f} , { prompt_tokens = } , "
68
74
f"{ completion_tokens = } , { recorded_completion_tokens = } "
@@ -114,6 +120,7 @@ def main(records):
114
120
parser .add_argument ("--req-start" , type = int , default = 0 )
115
121
parser .add_argument ("--parallel" , type = int , default = 512 )
116
122
parser .add_argument ("--idx" , type = int , default = None )
123
+ parser .add_argument ("--ignore-eos" , action = "store_true" )
117
124
args = parser .parse_args ()
118
125
119
126
set_ulimit ()
0 commit comments