Change the base effect from Try to Either

donderom · donderom · commit cc64dd65c366 · 2025-02-19T20:20:19.000+01:00
diff --git a/src/main/scala/com/donderom/llm4s/Llm.scala b/src/main/scala/com/donderom/llm4s/Llm.scala
@@ -4,55 +4,66 @@ import fr.hammons.slinc.runtime.given
 import fr.hammons.slinc.types.SizeT
 import fr.hammons.slinc.{FSet, Ptr, Scope, Slinc}
 
-import java.nio.file.Path
-
-import scala.util.{Success, Try}
+import java.nio.file.{Files, Path}
 
 final case class Logprob(token: String, value: Double)
 final case class Probability(logprob: Logprob, candidates: Array[Logprob])
 final case class Token(value: String, probs: Vector[Probability] = Vector.empty)
 final case class Usage(promptSize: Int, tokens: LazyList[Token])
 
+enum LlmError(message: String) extends Exception(message):
+  case ModelError(message: String) extends LlmError(message)
+  case ConfigError(message: String) extends LlmError(message)
+
+import LlmError.ModelError
+
+type Result[A] = Either[LlmError, A]
+object Result:
+  def unit: Result[Unit] = Right(())
+
 trait Llm(val modelPath: Path) extends AutoCloseable:
-  def generate(prompt: String, params: LlmParams): Try[Usage]
+  def generate(prompt: String, params: LlmParams): Result[Usage]
 
-  def embeddings(prompt: String): Try[Array[Float]] =
+  def embeddings(prompt: String): Result[Array[Float]] =
     embeddings(prompt, EmbeddingParams())
 
-  def embeddings(prompt: String, params: EmbeddingParams): Try[Array[Float]]
+  def embeddings(prompt: String, params: EmbeddingParams): Result[Array[Float]]
 
-  def apply(prompt: String): Try[LazyList[String]] = apply(prompt, LlmParams())
+  def apply(prompt: String): Result[LazyList[String]] =
+    apply(prompt, LlmParams())
 
-  def apply(prompt: String, params: LlmParams): Try[LazyList[String]] =
+  def apply(prompt: String, params: LlmParams): Result[LazyList[String]] =
     generate(prompt, params).map(_.tokens.map(_.value))
 
 object Llm:
   def apply(model: Path): Llm = apply(model, ModelParams())
 
   def apply(model: Path, params: ModelParams): Llm =
     new Llm(model):
-      val binding = Try(FSet.instance[Llama])
+      val api = catchNonFatal(FSet.instance[Llama])("Cannot load libllama")
       val llm = createModel(model, params)
 
-      def generate(prompt: String, params: LlmParams): Try[Usage] =
+      def generate(prompt: String, params: LlmParams): Result[Usage] =
         for
           llm <- llm
-          ctx <- createContext(llm, params.context, false)
-          _ <- loadLora(llm, ctx, params.lora)
-        yield SlincLlm(ctx).generate(prompt, params)
+          config <- LlmParams.parse(params)
+          ctx <- createContext(llm, config.context, false)
+          _ <- loadLora(llm, ctx, config.lora)
+        yield SlincLlm(ctx).generate(prompt, config)
 
       def embeddings(
           prompt: String,
           params: EmbeddingParams
-      ): Try[Array[Float]] =
+      ): Result[Array[Float]] =
         for
           llm <- llm
-          ctx <- createContext(llm, params.context, true)
-        yield SlincLlm(ctx).embeddings(prompt, params)
+          config <- EmbeddingParams.parse(params)
+          ctx <- createContext(llm, config.context, true)
+        yield SlincLlm(ctx).embeddings(prompt, config)
 
       def close(): Unit =
         for
-          llama <- binding
+          llama <- api
           llm <- llm
         do
           llama.llama_model_free(llm)
@@ -61,70 +72,88 @@ object Llm:
       private def createModel(
           model: Path,
           params: ModelParams
-      ): Try[Llama.Model] =
-        binding.map: llama =>
-          llama.llama_backend_init()
-          llama.llama_numa_init(params.numa)
-          Scope.confined:
-            llama.llama_model_load_from_file(
-              path_model = Ptr.copy(model.toAbsolutePath.toString),
-              params = llama.llama_model_default_params().copy(
-                n_gpu_layers = params.gpuLayers,
-                main_gpu = params.mainGpu,
-                use_mmap = params.mmap,
-                use_mlock = params.mlock
+      ): Result[Llama.Model] =
+        val error = s"Cannot load the model $model"
+        for
+          llama <- api
+          path <- Either.cond(
+            Files.exists(model),
+            model,
+            ModelError(s"Model file $model does not exist")
+          )
+          _ <- catchNonFatal(llama.llama_backend_init())(
+            "Cannot load libllama backend"
+          )
+          _ <- catchNonFatal(llama.llama_numa_init(params.numa))(
+            s"Cannot init Numa (${params.numa})"
+          )
+          m <- catchNonFatal(
+            Scope.confined:
+              llama.llama_model_load_from_file(
+                path_model = Ptr.copy(path.toAbsolutePath.toString),
+                params = llama.llama_model_default_params().copy(
+                  n_gpu_layers = params.gpuLayers,
+                  main_gpu = params.mainGpu,
+                  use_mmap = params.mmap,
+                  use_mlock = params.mlock
+                )
               )
-            )
+          )(error).filterOrElse(notNull, ModelError(error))
+        yield m
 
       private def createContext(
           llm: Llama.Model,
-          contextParams: ContextParams,
+          params: ContextParams,
           embedding: Boolean
-      ): Try[Llama.Ctx] =
+      ): Result[Llama.Ctx] =
+        val error = s"Cannot initialize model context ($params)"
         for
-          llama <- binding
-          ctx = llama.llama_init_from_model(
-            model = llm,
-            params = llamaParams(
-              llama.llama_context_default_params(),
-              contextParams,
-              embedding
+          llama <- api
+          ctx <- catchNonFatal(
+            llama.llama_init_from_model(
+              model = llm,
+              params = llamaParams(
+                llama.llama_context_default_params(),
+                params,
+                embedding
+              )
             )
-          ) if ctx != Slinc.getRuntime().Null
+          )(error).filterOrElse(notNull, ModelError(error))
         yield ctx
 
       private def loadLora(
           llm: Llama.Model,
           ctx: Llama.Ctx,
           lora: List[AdapterParams]
-      ): Try[Unit] =
-        lora.map(loadAdapter(llm, ctx, _)).foldLeft(Try(())):
-          case (acc, Success(_)) => acc
-          case (_, failure)      => failure
+      ): Result[Unit] =
+        lora.map(loadAdapter(llm, ctx, _)).foldLeft(Result.unit):
+          case (acc, Right(_)) => acc
+          case (_, failure)    => failure
 
       private def loadAdapter(
           llm: Llama.Model,
           ctx: Llama.Ctx,
           params: AdapterParams
-      ): Try[Unit] =
-        Scope.confined:
-          for
-            llama <- binding
-            adapter <- Try(
+      ): Result[Unit] =
+        val error = s"Cannot initialize LoRA adapter ($params)"
+        for
+          llama <- api
+          config <- AdapterParams.parse(params)
+          adapter <- catchNonFatal(
+            Scope.confined:
               llama.llama_adapter_lora_init(
                 model = llm,
-                path_lora = Ptr.copy(params.path.toAbsolutePath.toString)
-              )
-            )
-            if adapter != Slinc.getRuntime().Null
-            _ <- Try(
-              llama.llama_set_adapter_lora(
-                ctx = ctx,
-                adapter = adapter,
-                scale = params.scale
+                path_lora = Ptr.copy(config.path.toAbsolutePath.toString)
               )
+          )(error).filterOrElse(notNull, ModelError(error))
+          _ <- catchNonFatal(
+            llama.llama_set_adapter_lora(
+              ctx = ctx,
+              adapter = adapter,
+              scale = config.scale
             )
-          yield ()
+          )(error)
+        yield ()
 
       private def llamaParams(
           defaultParams: Llama.ContextParams,
@@ -148,3 +177,11 @@ object Llm:
           flash_attn = params.flashAttention,
           embeddings = embedding
         )
+
+  private def catchNonFatal[A](f: => A)(reason: => String): Result[A] =
+    try Right(f)
+    catch
+      case t if scala.util.control.NonFatal(t) =>
+        Left(ModelError(s"$reason: ${t.getMessage}"))
+
+  private def notNull(ptr: Ptr[Any]): Boolean = ptr != Slinc.getRuntime().Null
diff --git a/src/main/scala/com/donderom/llm4s/Params.scala b/src/main/scala/com/donderom/llm4s/Params.scala
@@ -1,7 +1,8 @@
 package com.donderom.llm4s
 
-import java.nio.file.Path
+import java.nio.file.{Files, Path}
 
+import LlmError.ConfigError
 import Llama.{NumaStrategy, RopeScalingType}
 
 object Default:
@@ -20,6 +21,11 @@ final case class AdapterParams(
     scale: Float = 1.0f
 )
 
+object AdapterParams:
+  def parse(params: AdapterParams): Result[AdapterParams] =
+    if Files.exists(params.path) then Right(params)
+    else Left(ConfigError(s"LoRA adapter file ${params.path} does not exist"))
+
 final case class ModelParams(
     gpuLayers: Int = -1,
     mainGpu: Int = 0,
@@ -48,8 +54,26 @@ final case class BatchParams(
     threads: Int = Default.threads
 )
 
+object BatchParams:
+  def parse(params: BatchParams): Result[BatchParams] =
+    if params.logical < 1 then
+      Left(ConfigError("Logical batch size should be positive"))
+    else if params.physical < 1 then
+      Left(ConfigError("Batch size should be positive"))
+    else if params.threads < 1 then
+      Left(ConfigError("Batch threads should be positive"))
+    else Right(params)
+
 final case class GroupAttention(factor: Int = 1, width: Int = 512)
 
+object GroupAttention:
+  def parse(params: GroupAttention): Result[GroupAttention] =
+    if params.factor <= 0 then
+      Left(ConfigError("Group attention factor should be positive"))
+    else if params.width % params.factor != 0 then
+      Left(ConfigError("Group attention width should be a multiple of factor"))
+    else Right(params)
+
 final case class ContextParams(
     size: Int = 4096,
     threads: Int = Default.threads,
@@ -59,6 +83,19 @@ final case class ContextParams(
     flashAttention: Boolean = false
 )
 
+object ContextParams:
+  def parse(params: ContextParams): Result[ContextParams] =
+    val config =
+      if params.size < 0 then
+        Left(ConfigError("Context size should be positive"))
+      else if params.threads < 1 then
+        Left(ConfigError("Context threads should be positive"))
+      else Right(params)
+    for
+      _ <- BatchParams.parse(params.batch)
+      config <- config
+    yield config
+
 final case class Penalty(
     lastN: Int = 64,
     repeat: Float = 1.0f,
@@ -131,6 +168,11 @@ final case class EmbeddingParams(
     norm: Option[Norm] = None
 )
 
+object EmbeddingParams:
+  def parse(params: EmbeddingParams): Result[EmbeddingParams] =
+    for _ <- ContextParams.parse(params.context)
+    yield params
+
 final case class LlmParams(
     context: ContextParams = ContextParams(),
     sampling: Sampling = Sampling.Dist(),
@@ -142,3 +184,10 @@ final case class LlmParams(
     groupAttention: GroupAttention = GroupAttention(),
     lora: List[AdapterParams] = Nil
 )
+
+object LlmParams:
+  def parse(params: LlmParams): Result[LlmParams] =
+    for
+      _ <- ContextParams.parse(params.context)
+      _ <- GroupAttention.parse(params.groupAttention)
+    yield params