-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Fix MTP with Deepseek R1 Fp4 #7376
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
65490a2
c653b1b
bf8af63
241d55f
9d6da0f
357bbf8
1baaaea
6c2c8a7
055ee31
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2201,7 +2201,7 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=Fal | |
q_a_proj_weight = cached_a_proj[q_a_proj_name] | ||
kv_a_proj_weight = cached_a_proj[kv_a_proj_name] | ||
cat_dim = 0 | ||
if ( | ||
if self.quant_config is not None and ( | ||
self.quant_config.get_name() == "awq" | ||
or self.quant_config.get_name() == "moe_wna16" | ||
): | ||
|
@@ -2232,6 +2232,13 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]], is_nextn=Fal | |
for scale in ["k_scale", "v_scale"]: | ||
if scale in name: | ||
name = name.replace(f"{scale[0]}_proj", "attn_mqa") | ||
break | ||
|
||
if name not in params_dict: | ||
# modelopt ckpt contains not needed weights for MTP module: | ||
# model.decoder.self_attn.attn_mqa.v_scale and | ||
# model.decoder.self_attn.attn_mqa.k_scale | ||
logger.warning(f"{name} not found in params_dict.") | ||
continue | ||
|
||
param = params_dict[name] | ||
weight_loader = getattr( | ||
param, "weight_loader", default_weight_loader | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good addition of the
self.quant_config is not None
check. This prevents potentialAttributeError
ifself.quant_config
happens to beNone
, which is now possible due to the changes inpython/sglang/srt/model_loader/loader.py
for the Deepseek R1 Fp4 model.