File tree Expand file tree Collapse file tree 2 files changed +15
-2
lines changed
python/sglang/srt/managers Expand file tree Collapse file tree 2 files changed +15
-2
lines changed Original file line number Diff line number Diff line change @@ -431,6 +431,7 @@ def __init__(
431
431
bootstrap_port : Optional [int ] = None ,
432
432
bootstrap_room : Optional [int ] = None ,
433
433
data_parallel_rank : Optional [int ] = None ,
434
+ vocab_size : Optional [int ] = None ,
434
435
):
435
436
# Input and output info
436
437
self .rid = rid
@@ -480,6 +481,7 @@ def __init__(
480
481
self .to_abort_message : str = None
481
482
self .stream = stream
482
483
self .eos_token_ids = eos_token_ids
484
+ self .vocab_size = vocab_size
483
485
484
486
# For incremental decoding
485
487
# ----- | --------- read_ids -------|
@@ -713,6 +715,14 @@ def check_finished(self):
713
715
self .finished_reason = FINISH_MATCHED_TOKEN (matched = last_token_id )
714
716
return
715
717
718
+ if last_token_id > self .vocab_size or last_token_id < 0 :
719
+ if self .sampling_params .stop_token_ids :
720
+ self .output_ids [- 1 ] = next (iter (self .sampling_params .stop_token_ids ))
721
+ if self .eos_token_ids :
722
+ self .output_ids [- 1 ] = next (iter (self .eos_token_ids ))
723
+ self .finished_reason = FINISH_MATCHED_STR (matched = "NaN happened" )
724
+ return
725
+
716
726
# Check stop strings
717
727
if len (self .sampling_params .stop_strs ) > 0 :
718
728
tail_str = self .tokenizer .decode (
Original file line number Diff line number Diff line change @@ -1129,6 +1129,7 @@ def handle_generate_request(
1129
1129
bootstrap_port = recv_req .bootstrap_port ,
1130
1130
bootstrap_room = recv_req .bootstrap_room ,
1131
1131
data_parallel_rank = recv_req .data_parallel_rank ,
1132
+ vocab_size = self .model_config .vocab_size ,
1132
1133
)
1133
1134
req .tokenizer = self .tokenizer
1134
1135
@@ -1395,8 +1396,10 @@ def log_prefill_stats(
1395
1396
logger .info (f )
1396
1397
1397
1398
if self .enable_metrics :
1398
- cache_hit_rate = adder .log_hit_tokens / (
1399
- adder .log_input_tokens + adder .log_hit_tokens
1399
+ total_tokens = adder .log_input_tokens + adder .log_hit_tokens
1400
+
1401
+ cache_hit_rate = (
1402
+ adder .log_hit_tokens / total_tokens if total_tokens > 0 else 0.0
1400
1403
)
1401
1404
self .stats .num_running_reqs = running_bs
1402
1405
self .stats .num_used_tokens = num_used
You can’t perform that action at this time.
0 commit comments