62
62
get_tokenizer ,
63
63
get_tokenizer_from_processor ,
64
64
)
65
+ from sglang .srt .lora .lora_registry import LoRARef , LoRARegistry
65
66
from sglang .srt .managers .io_struct import (
66
67
AbortReq ,
67
68
BatchEmbeddingOut ,
@@ -242,11 +243,11 @@ def __init__(
242
243
revision = server_args .revision ,
243
244
)
244
245
245
- # Initialize loaded loRA adapters with the initial lora paths in the server_args.
246
- # This list will be updated when new LoRA adapters are loaded or unloaded dynamically.
247
- self . loaded_lora_adapters : Dict [ str , str ] = dict (
248
- self . server_args . lora_paths or {}
249
- )
246
+ # Initialize the `LoRARegistry` with initial LoRA adapter paths provided in ` server_args` .
247
+ # The registry dynamically updates as adapters are loaded / unloaded during runtime. It
248
+ # serves as the source of truth for available adapters and maps user-friendly LoRA names
249
+ # to internally used unique LoRA IDs.
250
+ self . lora_registry = LoRARegistry ( self . server_args . lora_paths or {} )
250
251
251
252
# Store states
252
253
self .no_create_loop = False
@@ -523,6 +524,10 @@ async def _tokenize_one_request(
523
524
else :
524
525
mm_inputs = None
525
526
527
+ if self .server_args .enable_lora and obj .lora_path :
528
+ # Replace the user-friendly LoRA names in `lora_path` with their corresponding unique LoRA IDs.
529
+ obj .lora_path = await self .lora_registry .acquire (obj .lora_path )
530
+
526
531
self ._validate_one_request (obj , input_ids )
527
532
return self ._create_tokenized_object (
528
533
obj , input_text , input_ids , input_embeds , mm_inputs , token_type_ids
@@ -574,8 +579,6 @@ def _validate_one_request(
574
579
"The server is not configured to enable custom logit processor. "
575
580
"Please set `--enable-custom-logits-processor` to enable this feature."
576
581
)
577
- if self .server_args .enable_lora and obj .lora_path :
578
- self ._validate_lora_adapters (obj )
579
582
580
583
def _validate_input_ids_in_vocab (
581
584
self , input_ids : List [int ], vocab_size : int
@@ -689,21 +692,6 @@ def _validate_batch_tokenization_constraints(
689
692
"Batch tokenization is not needed for input_embeds. Do not set `enable_tokenizer_batch_encode`."
690
693
)
691
694
692
- def _validate_lora_adapters (self , obj : GenerateReqInput ):
693
- """Validate that the requested LoRA adapters are loaded."""
694
- requested_adapters = (
695
- set (obj .lora_path ) if isinstance (obj .lora_path , list ) else {obj .lora_path }
696
- )
697
- loaded_adapters = (
698
- self .loaded_lora_adapters .keys () if self .loaded_lora_adapters else set ()
699
- )
700
- unloaded_adapters = requested_adapters - loaded_adapters
701
- if unloaded_adapters :
702
- raise ValueError (
703
- f"The following requested LoRA adapters are not loaded: { unloaded_adapters } \n "
704
- f"Loaded adapters: { loaded_adapters } ."
705
- )
706
-
707
695
def _send_one_request (
708
696
self ,
709
697
obj : Union [GenerateReqInput , EmbeddingReqInput ],
@@ -1054,8 +1042,18 @@ async def load_lora_adapter(
1054
1042
)
1055
1043
1056
1044
async with self .model_update_lock .writer_lock :
1045
+ # Generate new uniquely identifiable LoRARef object.
1046
+ new_adapter = LoRARef (
1047
+ lora_name = obj .lora_name ,
1048
+ lora_path = obj .lora_path ,
1049
+ )
1050
+
1051
+ # Register the new adapter in the registry.
1052
+ obj .lora_id = new_adapter .lora_id
1057
1053
result = (await self .update_lora_adapter_communicator (obj ))[0 ]
1058
- self .loaded_lora_adapters = result .loaded_adapters
1054
+ if result .success :
1055
+ await self .lora_registry .register (new_adapter )
1056
+
1059
1057
return result
1060
1058
1061
1059
async def unload_lora_adapter (
@@ -1069,6 +1067,10 @@ async def unload_lora_adapter(
1069
1067
"LoRA is not enabled. Please set `--enable-lora` to enable LoRA."
1070
1068
)
1071
1069
1070
+ assert (
1071
+ obj .lora_name is not None
1072
+ ), "lora_name must be provided to unload LoRA adapter"
1073
+
1072
1074
# TODO (lifuhuang): Remove this after we verify that dynamic lora loading works
1073
1075
# with dp_size > 1.
1074
1076
assert (
@@ -1080,8 +1082,9 @@ async def unload_lora_adapter(
1080
1082
)
1081
1083
1082
1084
async with self .model_update_lock .writer_lock :
1085
+ obj .lora_id = await self .lora_registry .unregister (obj .lora_name )
1083
1086
result = (await self .update_lora_adapter_communicator (obj ))[0 ]
1084
- self . loaded_lora_adapters = result . loaded_adapters
1087
+
1085
1088
return result
1086
1089
1087
1090
async def get_weights_by_name (
@@ -1309,7 +1312,7 @@ def dump_requests_before_crash(self):
1309
1312
filename = os .path .join (
1310
1313
self .crash_dump_folder ,
1311
1314
os .getenv ("HOSTNAME" , None ),
1312
- f' crash_dump_{ datetime .now ().strftime (" %Y-%m-%d_%H-%M-%S" )} .pkl' ,
1315
+ f" crash_dump_{ datetime .now ().strftime (' %Y-%m-%d_%H-%M-%S' )} .pkl" ,
1313
1316
)
1314
1317
1315
1318
os .makedirs (os .path .dirname (filename ), exist_ok = True )
0 commit comments