|
40 | 40 | import vllm.envs as envs
|
41 | 41 | from vllm.distributed.device_communicators.base_device_communicator import (
|
42 | 42 | DeviceCommunicatorBase)
|
43 |
| -from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase |
44 |
| -from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorBase_V1 |
45 | 43 | from vllm.distributed.utils import StatelessProcessGroup
|
46 | 44 | from vllm.logger import init_logger
|
47 | 45 | from vllm.utils import (direct_register_custom_op, resolve_obj_by_qualname,
|
48 | 46 | supports_custom_op)
|
49 | 47 |
|
50 | 48 | if TYPE_CHECKING:
|
51 |
| - from vllm.config import VllmConfig |
| 49 | + pass |
52 | 50 |
|
53 | 51 |
|
54 | 52 | @dataclass
|
@@ -771,44 +769,6 @@ def get_pp_group() -> GroupCoordinator:
|
771 | 769 | # kept for backward compatibility
|
772 | 770 | get_pipeline_model_parallel_group = get_pp_group
|
773 | 771 |
|
774 |
| -# TODO: once we deprecate V0 KV transfer, we can move this to |
775 |
| -# be a non-global object. |
776 |
| -_KV_CONNECTOR_AGENT: Union[KVConnectorBase, KVConnectorBase_V1, None] = None |
777 |
| - |
778 |
| - |
779 |
| -def get_kv_transfer_group() -> Union[KVConnectorBase, KVConnectorBase_V1]: |
780 |
| - assert _KV_CONNECTOR_AGENT is not None, ( |
781 |
| - "disaggregated KV cache transfer parallel group is not initialized") |
782 |
| - return _KV_CONNECTOR_AGENT |
783 |
| - |
784 |
| - |
785 |
| -def has_kv_transfer_group() -> bool: |
786 |
| - return _KV_CONNECTOR_AGENT is not None |
787 |
| - |
788 |
| - |
789 |
| -def is_v1_kv_transfer_group( |
790 |
| - connector: Union[KVConnectorBase_V1, KVConnectorBase, |
791 |
| - None] = None) -> bool: |
792 |
| - """Check if the KV connector is the v1 connector. |
793 |
| - If the argument is None, it will check the global KV connector |
794 |
| -
|
795 |
| - Args: |
796 |
| - connector: The KV connector to check. If None, it will check the |
797 |
| - global KV connector. |
798 |
| -
|
799 |
| - Note: |
800 |
| - This function will no-longer be needed after the v1 KV connector |
801 |
| - becomes the default. |
802 |
| - """ |
803 |
| - if connector is None: |
804 |
| - connector = _KV_CONNECTOR_AGENT |
805 |
| - |
806 |
| - if connector is None: |
807 |
| - # Global KV connector is not set |
808 |
| - return False |
809 |
| - |
810 |
| - return isinstance(connector, KVConnectorBase_V1) |
811 |
| - |
812 | 772 |
|
813 | 773 | @contextmanager
|
814 | 774 | def graph_capture(device: torch.device):
|
@@ -991,37 +951,6 @@ def initialize_model_parallel(
|
991 | 951 | _DP.rank_in_group, _PP.rank_in_group, _TP.rank_in_group)
|
992 | 952 |
|
993 | 953 |
|
994 |
| -def ensure_kv_transfer_initialized(vllm_config: "VllmConfig") -> None: |
995 |
| - """ |
996 |
| - Initialize KV cache transfer parallel group. |
997 |
| - """ |
998 |
| - |
999 |
| - global _KV_CONNECTOR_AGENT |
1000 |
| - |
1001 |
| - if vllm_config.kv_transfer_config is None: |
1002 |
| - return |
1003 |
| - |
1004 |
| - if all([ |
1005 |
| - vllm_config.kv_transfer_config.is_kv_transfer_instance, |
1006 |
| - _KV_CONNECTOR_AGENT is None |
1007 |
| - ]): |
1008 |
| - from vllm.distributed.kv_transfer.kv_connector.factory import ( |
1009 |
| - KVConnectorFactory) |
1010 |
| - from vllm.distributed.kv_transfer.kv_connector.v1 import ( |
1011 |
| - KVConnectorRole as KVConnectorRole_V1) |
1012 |
| - |
1013 |
| - kwargs = { |
1014 |
| - "rank": get_world_group().rank, |
1015 |
| - "local_rank": get_world_group().local_rank, |
1016 |
| - "config": vllm_config, |
1017 |
| - # NOTE(Kuntai): |
1018 |
| - # Parallel state is initialized in v1 worker, |
1019 |
| - # so this connector is for sure worker connector. |
1020 |
| - "role": KVConnectorRole_V1.WORKER, |
1021 |
| - } |
1022 |
| - _KV_CONNECTOR_AGENT = KVConnectorFactory.create_connector(**kwargs) |
1023 |
| - |
1024 |
| - |
1025 | 954 | def ensure_model_parallel_initialized(
|
1026 | 955 | tensor_model_parallel_size: int,
|
1027 | 956 | pipeline_model_parallel_size: int,
|
|
0 commit comments