Skip to content

Commit 9a1424c

Browse files
Merge pull request sgl-project#1 from glenliu-monoid/preview
change req.finished checking in stream_output_generation to fix missi…
2 parents df12fcf + 253777d commit 9a1424c

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

python/sglang/srt/managers/scheduler_output_processor_mixin.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -532,14 +532,11 @@ def stream_output_generation(
532532
if self.model_config.is_multimodal_gen and req.to_abort:
533533
continue
534534

535-
# If this is the unconditioned half, skip it; if it's finished, drop the pair.
536-
if req in uncond_req_to_cond_rid:
537-
if req.finished():
535+
if req.finished():
536+
# If this is the unconditioned half of a CFG request, drop the pair from bookkeeping
537+
if req in uncond_req_to_cond_rid:
538538
del self.cfg_rid_to_uncond[uncond_req_to_cond_rid[req]]
539539

540-
continue
541-
542-
if req.finished():
543540
if req.finished_output:
544541
# With the overlap schedule, a request will try to output twice and hit this line twice
545542
# because of the one additional delayed token. This "continue" prevented the dummy output.
@@ -564,6 +561,10 @@ def stream_output_generation(
564561
else False
565562
)
566563

564+
# If this is the unconditioned half of a CFG request, don't add to output
565+
if req in uncond_req_to_cond_rid:
566+
continue
567+
567568
if should_output:
568569
send_token_offset = req.send_token_offset
569570
send_output_token_logprobs_offset = (

0 commit comments

Comments
 (0)