We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 2d96b83 commit eede58eCopy full SHA for eede58e
python/sglang/srt/managers/schedule_policy.py
@@ -455,7 +455,9 @@ def add_req_state(r, insert_sort=False):
455
if not self.is_hybrid:
456
# Skip this logic for swa. The SWA has different memory management, and
457
# this mechanism is underestimating the memory usage.
458
- cur_rem_tokens = self.cur_rem_tokens - len(req.origin_input_ids)
+ cur_rem_tokens = self.cur_rem_tokens - self.ceil_paged_tokens(
459
+ req.extend_input_len
460
+ )
461
tokens_freed = 0
462
for i, (tokens_left, tokens_occupied) in enumerate(self.req_states):
463
# tokens_left gives a reservative calculation as the last token is not stored
0 commit comments