Skip to content

Commit 48ddb75

Browse files
committed
[fix] use NEOX, and remove permute & split in convert process
1 parent 3709961 commit 48ddb75

File tree

2 files changed

+5
-34
lines changed

2 files changed

+5
-34
lines changed

convert_hf_to_gguf.py

Lines changed: 4 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -7828,28 +7828,6 @@ def prepare_tensors(self):
78287828
class BailingMoeV2Model(TextModel):
78297829
model_arch = gguf.MODEL_ARCH.BAILINGMOE_V2
78307830

7831-
@staticmethod
7832-
def permute(
7833-
weights: Tensor, n_head: int, n_head_kv: int | None, rope_dim: int | None
7834-
):
7835-
if n_head_kv is not None and n_head != n_head_kv:
7836-
n_head = n_head_kv
7837-
if rope_dim is None:
7838-
rope_dim = weights.shape[0] // n_head
7839-
weights_rope, weights_nope = weights.reshape(
7840-
n_head, weights.shape[0] // n_head, *weights.shape[1:]
7841-
).split([rope_dim, weights.shape[0] // n_head - rope_dim], dim=1)
7842-
return torch.cat(
7843-
[
7844-
weights_rope.reshape(
7845-
n_head, 2, rope_dim // 2, *weights_rope.shape[2:]
7846-
)
7847-
.swapaxes(1, 2)
7848-
.reshape(weights_rope.shape),
7849-
weights_nope,
7850-
], dim=1
7851-
).reshape(weights.shape)
7852-
78537831
def set_vocab(self):
78547832
self._set_vocab_gpt2()
78557833

@@ -7889,7 +7867,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
78897867
if match and int(match.group(1)) >= block_count:
78907868
return []
78917869

7892-
rope_dim = int(self.hparams['partial_rotary_factor'] * self.hparams['head_dim'])
78937870
if name.endswith("query_key_value.weight"):
78947871
n_head = self.hparams["num_attention_heads"]
78957872
n_kv_head = self.hparams.get("num_key_value_heads")
@@ -7899,18 +7876,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
78997876
q, k, v = data_torch.split([n_head * head_dim, n_kv_head * head_dim, n_kv_head * head_dim], dim=-2)
79007877

79017878
return [
7902-
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), BailingMoeV2Model.permute(q, n_head, n_head, rope_dim)),
7903-
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), BailingMoeV2Model.permute(k, n_head, n_kv_head, rope_dim)),
7879+
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_Q, bid), q),
7880+
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_K, bid), k),
79047881
(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_V, bid), v)
79057882
]
7906-
elif "attention.key_layernorm" in name or "attention.query_layernorm" in name:
7907-
mapping = {
7908-
"attention.key_layernorm": "self_attn.key_layernorm",
7909-
"attention.query_layernorm": "self_attn.query_layernorm",
7910-
}
7911-
for k, v in mapping.items():
7912-
name = name.replace(k, v)
7913-
return [(self.map_tensor_name(name), BailingMoeV2Model.permute(data_torch, 1, 1, rope_dim))]
79147883
elif name.find("mlp.experts") != -1:
79157884
n_experts = self.hparams["num_experts"]
79167885
assert bid is not None
@@ -7945,6 +7914,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
79457914
pre_tensor_name_mapping = {
79467915
"attention.dense": "self_attn.dense",
79477916
"mlp.gate.expert_bias": "mlp.gate.e_score_correction.bias",
7917+
"attention.key_layernorm": "self_attn.key_layernorm",
7918+
"attention.query_layernorm": "self_attn.query_layernorm",
79487919
}
79497920
for k, v in pre_tensor_name_mapping.items():
79507921
name = name.replace(k, v)

src/llama-model.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19485,7 +19485,6 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1948519485
case LLM_ARCH_GRANITE_HYBRID:
1948619486
case LLM_ARCH_CHAMELEON:
1948719487
case LLM_ARCH_BAILINGMOE:
19488-
case LLM_ARCH_BAILINGMOE_V2:
1948919488
case LLM_ARCH_NEO_BERT:
1949019489
case LLM_ARCH_SMOLLM3:
1949119490
case LLM_ARCH_ARCEE:
@@ -19539,6 +19538,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
1953919538
case LLM_ARCH_SMALLTHINKER:
1954019539
case LLM_ARCH_GLM4_MOE:
1954119540
case LLM_ARCH_SEED_OSS:
19541+
case LLM_ARCH_BAILINGMOE_V2:
1954219542
return LLAMA_ROPE_TYPE_NEOX;
1954319543

1954419544
case LLM_ARCH_QWEN2VL:

0 commit comments

Comments
 (0)