Fix Bug vllm-project#4: Add debug logging for nucleus top-p check

yuz207 · yuz207 · commit a38f70d67256 · 2025-09-27T18:39:11.000-07:00
Add diagnostic logging to verify draft_top_p value and whether nucleus
will execute.

This will help diagnose why nucleus shows 32000 survivors (full vocab)
instead of filtered set.

Expected log output:
[NUCLEUS_DEBUG] draft_top_p from config: 0.95, will run nucleus: True

If we see 'will run nucleus: False', we'll know the config isn't loaded
or there's a logic bug in the condition.
diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py
@@ -262,6 +262,8 @@ def _sample_draft_tokens(
 
             # --- top-p (nucleus) ---
             tp = float(getattr(self.opt_config, "draft_top_p", 0.95) or 0.95)
+            print(f"[NUCLEUS_DEBUG] draft_top_p from config: {tp}, will run nucleus: {0.0 < tp < 1.0}",
+                  file=sys.stderr, flush=True)
 
             if 0.0 < tp < 1.0:
                 p = torch.softmax(x, dim=-1)