Add support for embeddings pooling type

donderom · donderom · commit 02f24ca65450 · 2025-02-24T17:59:45.000+01:00
diff --git a/src/main/scala/com/donderom/llm4s/Llm.scala b/src/main/scala/com/donderom/llm4s/Llm.scala
@@ -47,7 +47,7 @@ object Llm:
         for
           llm <- llm
           config <- LlmParams.parse(params)
-          ctx <- createContext(llm, config.context, false)
+          ctx <- createContext(llm, config.context, llamaParams(false))
           _ <- loadLora(llm, ctx, config.lora)
         yield SlincLlm(ctx).generate(prompt, config)
 
@@ -56,9 +56,18 @@ object Llm:
           params: EmbeddingParams
       ): Result[Array[Float]] =
         for
+          _ <- Either.cond(
+            params.poolingType != Llama.PoolingType.RANK,
+            params,
+            LlmError.ConfigError("Rank pooling type is not supported")
+          )
           llm <- llm
           config <- EmbeddingParams.parse(params)
-          ctx <- createContext(llm, config.context, true)
+          ctx <- createContext(
+            llm,
+            config.context,
+            embeddingParams(params.poolingType)
+          )
         yield SlincLlm(ctx).embeddings(prompt, config)
 
       def close(): Unit =
@@ -104,19 +113,19 @@ object Llm:
       private def createContext(
           llm: Llama.Model,
           params: ContextParams,
-          embedding: Boolean
+          nativeParams: (
+              Llama.ContextParams,
+              ContextParams
+          ) => Llama.ContextParams
       ): Result[Llama.Ctx] =
         val error = s"Cannot initialize model context ($params)"
         for
           llama <- api
           ctx <- catchNonFatal(
             llama.llama_init_from_model(
               model = llm,
-              params = llamaParams(
-                llama.llama_context_default_params(),
-                params,
-                embedding
-              )
+              params =
+                nativeParams(llama.llama_context_default_params(), params)
             )
           )(error).filterOrElse(notNull, ModelError(error))
         yield ctx
@@ -156,9 +165,10 @@ object Llm:
         yield ()
 
       private def llamaParams(
-          defaultParams: Llama.ContextParams,
-          params: ContextParams,
           embedding: Boolean
+      )(
+          defaultParams: Llama.ContextParams,
+          params: ContextParams
       ): Llama.ContextParams =
         defaultParams.copy(
           n_ctx = params.size,
@@ -178,6 +188,15 @@ object Llm:
           embeddings = embedding
         )
 
+      private def embeddingParams(
+          poolingType: Llama.PoolingType
+      )(
+          defaultParams: Llama.ContextParams,
+          params: ContextParams
+      ): Llama.ContextParams =
+        llamaParams(true)(defaultParams, params)
+          .copy(pooling_type = poolingType)
+
   private def catchNonFatal[A](f: => A)(reason: => String): Result[A] =
     try Right(f)
     catch
diff --git a/src/main/scala/com/donderom/llm4s/Params.scala b/src/main/scala/com/donderom/llm4s/Params.scala
@@ -218,6 +218,7 @@ enum Norm:
 
 final case class EmbeddingParams(
     context: ContextParams = ContextParams(),
+    poolingType: Llama.PoolingType = Llama.PoolingType.NONE,
     // Normalisation for embeddings
     norm: Option[Norm] = None
 )
diff --git a/src/main/scala/com/donderom/llm4s/SlincLlm.scala b/src/main/scala/com/donderom/llm4s/SlincLlm.scala
@@ -161,7 +161,10 @@ private class SlincLlm private[llm4s] (private[llm4s] val ctx: Llama.Ctx):
     val ids = encode(prompt)
     val _ = evaluate(ids, Evaluated.none, params.context.batch)
     val size = llama.llama_model_n_embd(model)
-    val embeddings = llama.llama_get_embeddings(ctx).asArray(size).unsafeArray
+    val embeddings =
+      if params.poolingType == Llama.PoolingType.NONE then
+        llama.llama_get_embeddings(ctx).asArray(size).unsafeArray
+      else llama.llama_get_embeddings_seq(ctx, 0).asArray(size).unsafeArray
     llama.llama_free(ctx)
 
     def normalized(

Original file line number	Diff line number	Diff line change
`@@ -218,6 +218,7 @@ enum Norm:`
`218`	`218`
`219`	`219`	`final case class EmbeddingParams(`
`220`	`220`	`context: ContextParams = ContextParams(),`
	`221`	`+ poolingType: Llama.PoolingType = Llama.PoolingType.NONE,`
`221`	`222`	`// Normalisation for embeddings`
`222`	`223`	`norm: Option[Norm] = None`
`223`	`224`	`)`