Skip to content

Commit e2d66f6

Browse files
ispobockmickqian
andauthored
Skip llama4 vision module loading when multimodal disabled (#8272)
Co-authored-by: Mick <[email protected]>
1 parent 01c0000 commit e2d66f6

File tree

2 files changed

+11
-3
lines changed

2 files changed

+11
-3
lines changed

python/sglang/srt/managers/schedule_batch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
"num_reserved_decode_tokens",
107107
"weight_loader_disable_mmap",
108108
"enable_triton_kernel_moe",
109+
"enable_multimodal",
109110
]
110111

111112
# Put some global args for easy access

python/sglang/srt/models/mllama4.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
Modality,
2424
MultimodalDataItem,
2525
MultimodalInputs,
26+
global_server_args_dict,
2627
)
2728
from sglang.srt.model_executor.forward_batch_info import ForwardBatch
2829
from sglang.srt.model_loader.weight_utils import default_weight_loader
@@ -55,13 +56,17 @@ def __init__(
5556
self.quant_config = quant_config
5657

5758
# Check if this is a text-only model (modelopt fp8 llama4 has no vision components)
58-
self.has_vision = self._has_vision_weights(config)
59-
if not self.has_vision:
59+
self.has_vision_weights = self._has_vision_weights(config)
60+
if not self.has_vision_weights:
6061
logger.warning(
6162
"No vision weights found in checkpoint. Model will run in text-only mode. "
6263
"Multimodal capabilities (image processing) will be unavailable."
6364
)
6465

66+
self.has_vision = (
67+
self.has_vision_weights and global_server_args_dict["enable_multimodal"]
68+
)
69+
6570
if self.has_vision:
6671
self.vision_model = Llama4VisionModel(config.vision_config)
6772
self.multi_modal_projector = Llama4MultiModalProjector(config)
@@ -269,7 +274,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]) -> Set[str]:
269274

270275
def _should_skip_weight(self, name: str) -> bool:
271276
"""Check if we should skip loading this weight."""
272-
return "vision" in name and not self.has_vision
277+
return not self.has_vision and (
278+
"vision" in name or "multi_modal_projector" in name
279+
)
273280

274281
def _transform_weight_name(self, name: str) -> str:
275282
"""Transform weight name by adding language_model prefix if needed."""

0 commit comments

Comments
 (0)