Merge pull request sgl-project#1 from glenliu-monoid/preview

glenliu-monoid · web-flow · commit 9a1424c592eb · 2025-08-14T15:32:10.000-04:00
change req.finished checking in stream_output_generation to fix missi…
diff --git a/python/sglang/srt/managers/scheduler_output_processor_mixin.py b/python/sglang/srt/managers/scheduler_output_processor_mixin.py
@@ -532,14 +532,11 @@ def stream_output_generation(
             if self.model_config.is_multimodal_gen and req.to_abort:
                 continue
 
-            # If this is the unconditioned half, skip it; if it's finished, drop the pair.
-            if req in uncond_req_to_cond_rid:
-                if req.finished():
+            if req.finished():
+                # If this is the unconditioned half of a CFG request, drop the pair from bookkeeping
+                if req in uncond_req_to_cond_rid:
                     del self.cfg_rid_to_uncond[uncond_req_to_cond_rid[req]]
 
-                continue
-
-            if req.finished():
                 if req.finished_output:
                     # With the overlap schedule, a request will try to output twice and hit this line twice
                     # because of the one additional delayed token. This "continue" prevented the dummy output.
@@ -564,6 +561,10 @@ def stream_output_generation(
                         else False
                     )
 
+            # If this is the unconditioned half of a CFG request, don't add to output
+            if req in uncond_req_to_cond_rid:
+                continue
+
             if should_output:
                 send_token_offset = req.send_token_offset
                 send_output_token_logprobs_offset = (