We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent eb118d8 commit cee9f32Copy full SHA for cee9f32
python/sglang/srt/mem_cache/memory_pool.py
@@ -520,8 +520,13 @@ def __init__(
520
self.layers_mapping[global_layer_id] = (swa_layer_id, True)
521
self.full_to_swa_index_mapping: Optional[torch.Tensor] = None
522
523
+ k_size, v_size = self.get_kv_size_bytes()
524
+ self.mem_usage = (k_size + v_size) / GB
525
+
526
def get_kv_size_bytes(self):
- raise NotImplementedError
527
+ k_size, v_size = self.full_kv_pool.get_kv_size_bytes()
528
+ k_size_swa, v_size_swa = self.swa_kv_pool.get_kv_size_bytes()
529
+ return k_size + k_size_swa, v_size + v_size_swa
530
531
def get_contiguous_buf_infos(self):
532
full_kv_data_ptrs, full_kv_data_lens, full_kv_item_lens = (
0 commit comments