Skip to content

Commit cee9f32

Browse files
authored
[minor fix] llama4 hybrid memory (#7950)
1 parent eb118d8 commit cee9f32

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

python/sglang/srt/mem_cache/memory_pool.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -520,8 +520,13 @@ def __init__(
520520
self.layers_mapping[global_layer_id] = (swa_layer_id, True)
521521
self.full_to_swa_index_mapping: Optional[torch.Tensor] = None
522522

523+
k_size, v_size = self.get_kv_size_bytes()
524+
self.mem_usage = (k_size + v_size) / GB
525+
523526
def get_kv_size_bytes(self):
524-
raise NotImplementedError
527+
k_size, v_size = self.full_kv_pool.get_kv_size_bytes()
528+
k_size_swa, v_size_swa = self.swa_kv_pool.get_kv_size_bytes()
529+
return k_size + k_size_swa, v_size + v_size_swa
525530

526531
def get_contiguous_buf_infos(self):
527532
full_kv_data_ptrs, full_kv_data_lens, full_kv_item_lens = (

0 commit comments

Comments
 (0)