Skip to content

Commit bb98b31

Browse files
committed
feat: Add conversion support in GraniteHybrid for non-hybrid (all attn)
This is a configuration of the hparams in the GraniteHybrid architecture that devolves to the Granite (or GraniteMoe) architecture (ie Granite 3.x). It may be used for some models in the Granite 4 family with the GraniteHybrid architecture acting as a superset arch. Rather than support it directly in the c++ graph, we simply coerce the architecture flag back to the correct "granite" or "granitemoe" architecture. Branch: gabe-l-hart/GraniteNonHybridConversion Signed-off-by: Gabe Goodhart <[email protected]>
1 parent 432cf43 commit bb98b31

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7656,6 +7656,24 @@ def __init__(self, *args, **kwargs):
76567656
if i not in self._attn_layers
76577657
]
76587658

7659+
# There are some models in this family that are non-hybrid, but keep the
7660+
# same parent class by setting all layers to "attention." If this is the
7661+
# case, the model architecture needs to be updated to a standard
7662+
# "granite" or "granitemoe" model
7663+
if not self._ssm_layers:
7664+
old_arch = self.gguf_writer.arch
7665+
has_experts = self.find_hparam(["num_experts_per_tok"])
7666+
new_arch = (
7667+
gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.GRANITE_MOE]
7668+
if has_experts else
7669+
gguf.MODEL_ARCH_NAMES[gguf.MODEL_ARCH.GRANITE]
7670+
)
7671+
self.gguf_writer.arch = new_arch
7672+
for kv_entry in self.gguf_writer.kv_data:
7673+
for kv_val in kv_entry.values():
7674+
if isinstance(kv_val.value, str) and old_arch in kv_val.value:
7675+
kv_val.value = kv_val.value.replace(old_arch, new_arch)
7676+
76597677
# n_group and d_inner are used during reshape_tensors for mamba2
76607678
# NOTE: Explicitly include hparam prefix prefix for d_model to
76617679
# disambiguate with top-level head_dim
@@ -7740,8 +7758,11 @@ def set_gguf_parameters(self):
77407758
self.gguf_writer.add_rope_dimension_count(rope_dim)
77417759
self.gguf_writer.add_head_count_kv(head_count_kv_vec)
77427760

7743-
## If Bamba, use rope, otherwise don't
7744-
use_rope = "BambaForCausalLM" in self.hparams["architectures"]
7761+
## If Bamba or non-hybrid, use rope, otherwise don't
7762+
use_rope = (
7763+
"BambaForCausalLM" in self.hparams["architectures"]
7764+
or not self._ssm_layers
7765+
)
77457766
self.gguf_writer.add_rope_scaling_finetuned(use_rope)
77467767
if not use_rope:
77477768
self.gguf_writer.add_context_length(2**20)

0 commit comments

Comments
 (0)