huggingface · alvarobartt · Sep 4, 2025 · Jul 25, 2025 · Jul 25, 2025 · Jul 28, 2025
diff --git a/backends/candle/src/layers/linear.rs b/backends/candle/src/layers/linear.rs
@@ -5,6 +5,7 @@ use serde::Deserialize;
 #[derive(Debug, Deserialize, PartialEq, Clone)]
 #[serde(rename_all = "lowercase")]
 pub enum HiddenAct {
+    #[serde(alias = "gelu_pytorch_tanh")]
     Gelu,
     Relu,
     Silu,

diff --git a/backends/candle/src/lib.rs b/backends/candle/src/lib.rs
@@ -23,9 +23,9 @@ use crate::compute_cap::{
 };
 use crate::models::{
     BertConfig, BertModel, Dense, DenseConfig, DenseLayer, DistilBertConfig, DistilBertModel,
-    GTEConfig, GTEModel, JinaBertModel, JinaCodeBertModel, MPNetConfig, MPNetModel, MistralConfig,
-    Model, ModernBertConfig, ModernBertModel, NomicBertModel, NomicConfig, Qwen2Config,
-    Qwen3Config, Qwen3Model,
+    GTEConfig, GTEModel, Gemma3Config, Gemma3Model, JinaBertModel, JinaCodeBertModel, MPNetConfig,
+    MPNetModel, MistralConfig, Model, ModernBertConfig, ModernBertModel, NomicBertModel,
+    NomicConfig, Qwen2Config, Qwen3Config, Qwen3Model,
 };
 #[cfg(feature = "cuda")]
 use crate::models::{
@@ -95,6 +95,8 @@ enum Config {
     Camembert(BertConfig),
     #[serde(rename(deserialize = "distilbert"))]
     DistilBert(DistilBertConfig),
+    #[serde(rename(deserialize = "gemma3_text"))]
+    Gemma3(Gemma3Config),
     #[serde(alias = "new")]
     Gte(GTEConfig),
     #[serde(rename = "mpnet")]
@@ -263,6 +265,16 @@ impl CandleBackend {
                     DistilBertModel::load(vb, &config, model_type).s()?,
                 ))
             }
+            (Config::Gemma3(config), Device::Cpu | Device::Metal(_)) => {
+                if dtype != DType::F32 {
+                    Err(BackendError::Start(
+                        "Gemma3 is only supported in fp32 precision".to_string(),
+                    ))
+                } else {
+                    tracing::info!("Starting Gemma3 model on {:?}", device);
+                    Ok(Box::new(Gemma3Model::load(vb, &config, model_type).s()?))
+                }
+            }
             (Config::Gte(config), Device::Cpu | Device::Metal(_)) => {
                 tracing::info!("Starting GTE model on {:?}", device);
                 Ok(Box::new(GTEModel::load(vb, &config, model_type).s()?))
@@ -381,6 +393,17 @@ impl CandleBackend {
                 }
             }
             #[cfg(feature = "cuda")]
+            (Config::Gemma3(config), Device::Cuda(_)) => {
+                if dtype != DType::F32 {
+                    Err(BackendError::Start(
+                        "Gemma3 is only supported in fp32 precision".to_string(),
+                    ))
+                } else {
+                    tracing::info!("Starting Gemma3 model on {:?}", device);
+                    Ok(Box::new(Gemma3Model::load(vb, &config, model_type).s()?))
+                }
+            }
+            #[cfg(feature = "cuda")]
             (Config::Gte(config), Device::Cuda(_)) => {
                 if dtype != DType::F16
                     || !cfg!(any(feature = "flash-attn", feature = "flash-attn-v1"))

diff --git a/backends/candle/src/models/flash_gte.rs b/backends/candle/src/models/flash_gte.rs
@@ -1,6 +1,8 @@
 use crate::flash_attn::flash_attn_varlen;
 use crate::layers::{get_cos_sin, get_inv_freqs, LayerNorm, Linear};
-use crate::models::{GTEClassificationHead, GTEConfig, Model, PositionEmbeddingType, GTEMLP};
+use crate::models::gte::{GTEClassificationHead, GTEConfig, GTEMLP};
+use crate::models::{Model, PositionEmbeddingType};
+
 use candle::{DType, Device, IndexOp, Result, Tensor};
 use candle_nn::{Embedding, Module, VarBuilder};
 use candle_rotary::apply_rotary_inplace;