@@ -7828,28 +7828,6 @@ def prepare_tensors(self):
7828
7828
class BailingMoeV2Model (TextModel ):
7829
7829
model_arch = gguf .MODEL_ARCH .BAILINGMOE_V2
7830
7830
7831
- @staticmethod
7832
- def permute (
7833
- weights : Tensor , n_head : int , n_head_kv : int | None , rope_dim : int | None
7834
- ):
7835
- if n_head_kv is not None and n_head != n_head_kv :
7836
- n_head = n_head_kv
7837
- if rope_dim is None :
7838
- rope_dim = weights .shape [0 ] // n_head
7839
- weights_rope , weights_nope = weights .reshape (
7840
- n_head , weights .shape [0 ] // n_head , * weights .shape [1 :]
7841
- ).split ([rope_dim , weights .shape [0 ] // n_head - rope_dim ], dim = 1 )
7842
- return torch .cat (
7843
- [
7844
- weights_rope .reshape (
7845
- n_head , 2 , rope_dim // 2 , * weights_rope .shape [2 :]
7846
- )
7847
- .swapaxes (1 , 2 )
7848
- .reshape (weights_rope .shape ),
7849
- weights_nope ,
7850
- ], dim = 1
7851
- ).reshape (weights .shape )
7852
-
7853
7831
def set_vocab (self ):
7854
7832
self ._set_vocab_gpt2 ()
7855
7833
@@ -7889,7 +7867,6 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
7889
7867
if match and int (match .group (1 )) >= block_count :
7890
7868
return []
7891
7869
7892
- rope_dim = int (self .hparams ['partial_rotary_factor' ] * self .hparams ['head_dim' ])
7893
7870
if name .endswith ("query_key_value.weight" ):
7894
7871
n_head = self .hparams ["num_attention_heads" ]
7895
7872
n_kv_head = self .hparams .get ("num_key_value_heads" )
@@ -7899,18 +7876,10 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
7899
7876
q , k , v = data_torch .split ([n_head * head_dim , n_kv_head * head_dim , n_kv_head * head_dim ], dim = - 2 )
7900
7877
7901
7878
return [
7902
- (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_Q , bid ), BailingMoeV2Model . permute ( q , n_head , n_head , rope_dim ) ),
7903
- (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_K , bid ), BailingMoeV2Model . permute ( k , n_head , n_kv_head , rope_dim ) ),
7879
+ (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_Q , bid ), q ),
7880
+ (self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_K , bid ), k ),
7904
7881
(self .format_tensor_name (gguf .MODEL_TENSOR .ATTN_V , bid ), v )
7905
7882
]
7906
- elif "attention.key_layernorm" in name or "attention.query_layernorm" in name :
7907
- mapping = {
7908
- "attention.key_layernorm" : "self_attn.key_layernorm" ,
7909
- "attention.query_layernorm" : "self_attn.query_layernorm" ,
7910
- }
7911
- for k , v in mapping .items ():
7912
- name = name .replace (k , v )
7913
- return [(self .map_tensor_name (name ), BailingMoeV2Model .permute (data_torch , 1 , 1 , rope_dim ))]
7914
7883
elif name .find ("mlp.experts" ) != - 1 :
7915
7884
n_experts = self .hparams ["num_experts" ]
7916
7885
assert bid is not None
@@ -7945,6 +7914,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
7945
7914
pre_tensor_name_mapping = {
7946
7915
"attention.dense" : "self_attn.dense" ,
7947
7916
"mlp.gate.expert_bias" : "mlp.gate.e_score_correction.bias" ,
7917
+ "attention.key_layernorm" : "self_attn.key_layernorm" ,
7918
+ "attention.query_layernorm" : "self_attn.query_layernorm" ,
7948
7919
}
7949
7920
for k , v in pre_tensor_name_mapping .items ():
7950
7921
name = name .replace (k , v )
0 commit comments