File tree Expand file tree Collapse file tree 1 file changed +7
-6
lines changed
python/sglang/srt/managers Expand file tree Collapse file tree 1 file changed +7
-6
lines changed Original file line number Diff line number Diff line change @@ -532,14 +532,11 @@ def stream_output_generation(
532
532
if self .model_config .is_multimodal_gen and req .to_abort :
533
533
continue
534
534
535
- # If this is the unconditioned half, skip it; if it's finished, drop the pair.
536
- if req in uncond_req_to_cond_rid :
537
- if req . finished () :
535
+ if req . finished ():
536
+ # If this is the unconditioned half of a CFG request, drop the pair from bookkeeping
537
+ if req in uncond_req_to_cond_rid :
538
538
del self .cfg_rid_to_uncond [uncond_req_to_cond_rid [req ]]
539
539
540
- continue
541
-
542
- if req .finished ():
543
540
if req .finished_output :
544
541
# With the overlap schedule, a request will try to output twice and hit this line twice
545
542
# because of the one additional delayed token. This "continue" prevented the dummy output.
@@ -564,6 +561,10 @@ def stream_output_generation(
564
561
else False
565
562
)
566
563
564
+ # If this is the unconditioned half of a CFG request, don't add to output
565
+ if req in uncond_req_to_cond_rid :
566
+ continue
567
+
567
568
if should_output :
568
569
send_token_offset = req .send_token_offset
569
570
send_output_token_logprobs_offset = (
You can’t perform that action at this time.
0 commit comments