@@ -4,55 +4,66 @@ import fr.hammons.slinc.runtime.given
4
4
import fr .hammons .slinc .types .SizeT
5
5
import fr .hammons .slinc .{FSet , Ptr , Scope , Slinc }
6
6
7
- import java .nio .file .Path
8
-
9
- import scala .util .{Success , Try }
7
+ import java .nio .file .{Files , Path }
10
8
11
9
final case class Logprob (token : String , value : Double )
12
10
final case class Probability (logprob : Logprob , candidates : Array [Logprob ])
13
11
final case class Token (value : String , probs : Vector [Probability ] = Vector .empty)
14
12
final case class Usage (promptSize : Int , tokens : LazyList [Token ])
15
13
14
+ enum LlmError (message : String ) extends Exception (message):
15
+ case ModelError (message : String ) extends LlmError (message)
16
+ case ConfigError (message : String ) extends LlmError (message)
17
+
18
+ import LlmError .ModelError
19
+
20
+ type Result [A ] = Either [LlmError , A ]
21
+ object Result :
22
+ def unit : Result [Unit ] = Right (())
23
+
16
24
trait Llm (val modelPath : Path ) extends AutoCloseable :
17
- def generate (prompt : String , params : LlmParams ): Try [Usage ]
25
+ def generate (prompt : String , params : LlmParams ): Result [Usage ]
18
26
19
- def embeddings (prompt : String ): Try [Array [Float ]] =
27
+ def embeddings (prompt : String ): Result [Array [Float ]] =
20
28
embeddings(prompt, EmbeddingParams ())
21
29
22
- def embeddings (prompt : String , params : EmbeddingParams ): Try [Array [Float ]]
30
+ def embeddings (prompt : String , params : EmbeddingParams ): Result [Array [Float ]]
23
31
24
- def apply (prompt : String ): Try [LazyList [String ]] = apply(prompt, LlmParams ())
32
+ def apply (prompt : String ): Result [LazyList [String ]] =
33
+ apply(prompt, LlmParams ())
25
34
26
- def apply (prompt : String , params : LlmParams ): Try [LazyList [String ]] =
35
+ def apply (prompt : String , params : LlmParams ): Result [LazyList [String ]] =
27
36
generate(prompt, params).map(_.tokens.map(_.value))
28
37
29
38
object Llm :
30
39
def apply (model : Path ): Llm = apply(model, ModelParams ())
31
40
32
41
def apply (model : Path , params : ModelParams ): Llm =
33
42
new Llm (model):
34
- val binding = Try (FSet .instance[Llama ])
43
+ val api = catchNonFatal (FSet .instance[Llama ])( " Cannot load libllama " )
35
44
val llm = createModel(model, params)
36
45
37
- def generate (prompt : String , params : LlmParams ): Try [Usage ] =
46
+ def generate (prompt : String , params : LlmParams ): Result [Usage ] =
38
47
for
39
48
llm <- llm
40
- ctx <- createContext(llm, params.context, false )
41
- _ <- loadLora(llm, ctx, params.lora)
42
- yield SlincLlm (ctx).generate(prompt, params)
49
+ config <- LlmParams .parse(params)
50
+ ctx <- createContext(llm, config.context, false )
51
+ _ <- loadLora(llm, ctx, config.lora)
52
+ yield SlincLlm (ctx).generate(prompt, config)
43
53
44
54
def embeddings (
45
55
prompt : String ,
46
56
params : EmbeddingParams
47
- ): Try [Array [Float ]] =
57
+ ): Result [Array [Float ]] =
48
58
for
49
59
llm <- llm
50
- ctx <- createContext(llm, params.context, true )
51
- yield SlincLlm (ctx).embeddings(prompt, params)
60
+ config <- EmbeddingParams .parse(params)
61
+ ctx <- createContext(llm, config.context, true )
62
+ yield SlincLlm (ctx).embeddings(prompt, config)
52
63
53
64
def close (): Unit =
54
65
for
55
- llama <- binding
66
+ llama <- api
56
67
llm <- llm
57
68
do
58
69
llama.llama_model_free(llm)
@@ -61,70 +72,88 @@ object Llm:
61
72
private def createModel (
62
73
model : Path ,
63
74
params : ModelParams
64
- ): Try [Llama .Model ] =
65
- binding.map: llama =>
66
- llama.llama_backend_init()
67
- llama.llama_numa_init(params.numa)
68
- Scope .confined:
69
- llama.llama_model_load_from_file(
70
- path_model = Ptr .copy(model.toAbsolutePath.toString),
71
- params = llama.llama_model_default_params().copy(
72
- n_gpu_layers = params.gpuLayers,
73
- main_gpu = params.mainGpu,
74
- use_mmap = params.mmap,
75
- use_mlock = params.mlock
75
+ ): Result [Llama .Model ] =
76
+ val error = s " Cannot load the model $model"
77
+ for
78
+ llama <- api
79
+ path <- Either .cond(
80
+ Files .exists(model),
81
+ model,
82
+ ModelError (s " Model file $model does not exist " )
83
+ )
84
+ _ <- catchNonFatal(llama.llama_backend_init())(
85
+ " Cannot load libllama backend"
86
+ )
87
+ _ <- catchNonFatal(llama.llama_numa_init(params.numa))(
88
+ s " Cannot init Numa ( ${params.numa}) "
89
+ )
90
+ m <- catchNonFatal(
91
+ Scope .confined:
92
+ llama.llama_model_load_from_file(
93
+ path_model = Ptr .copy(path.toAbsolutePath.toString),
94
+ params = llama.llama_model_default_params().copy(
95
+ n_gpu_layers = params.gpuLayers,
96
+ main_gpu = params.mainGpu,
97
+ use_mmap = params.mmap,
98
+ use_mlock = params.mlock
99
+ )
76
100
)
77
- )
101
+ )(error).filterOrElse(notNull, ModelError (error))
102
+ yield m
78
103
79
104
private def createContext (
80
105
llm : Llama .Model ,
81
- contextParams : ContextParams ,
106
+ params : ContextParams ,
82
107
embedding : Boolean
83
- ): Try [Llama .Ctx ] =
108
+ ): Result [Llama .Ctx ] =
109
+ val error = s " Cannot initialize model context ( $params) "
84
110
for
85
- llama <- binding
86
- ctx = llama.llama_init_from_model(
87
- model = llm,
88
- params = llamaParams(
89
- llama.llama_context_default_params(),
90
- contextParams,
91
- embedding
111
+ llama <- api
112
+ ctx <- catchNonFatal(
113
+ llama.llama_init_from_model(
114
+ model = llm,
115
+ params = llamaParams(
116
+ llama.llama_context_default_params(),
117
+ params,
118
+ embedding
119
+ )
92
120
)
93
- ) if ctx != Slinc .getRuntime(). Null
121
+ )(error).filterOrElse(notNull, ModelError (error))
94
122
yield ctx
95
123
96
124
private def loadLora (
97
125
llm : Llama .Model ,
98
126
ctx : Llama .Ctx ,
99
127
lora : List [AdapterParams ]
100
- ): Try [Unit ] =
101
- lora.map(loadAdapter(llm, ctx, _)).foldLeft(Try (()) ):
102
- case (acc, Success (_)) => acc
103
- case (_, failure) => failure
128
+ ): Result [Unit ] =
129
+ lora.map(loadAdapter(llm, ctx, _)).foldLeft(Result .unit ):
130
+ case (acc, Right (_)) => acc
131
+ case (_, failure) => failure
104
132
105
133
private def loadAdapter (
106
134
llm : Llama .Model ,
107
135
ctx : Llama .Ctx ,
108
136
params : AdapterParams
109
- ): Try [Unit ] =
110
- Scope .confined:
111
- for
112
- llama <- binding
113
- adapter <- Try (
137
+ ): Result [Unit ] =
138
+ val error = s " Cannot initialize LoRA adapter ( $params) "
139
+ for
140
+ llama <- api
141
+ config <- AdapterParams .parse(params)
142
+ adapter <- catchNonFatal(
143
+ Scope .confined:
114
144
llama.llama_adapter_lora_init(
115
145
model = llm,
116
- path_lora = Ptr .copy(params.path.toAbsolutePath.toString)
117
- )
118
- )
119
- if adapter != Slinc .getRuntime().Null
120
- _ <- Try (
121
- llama.llama_set_adapter_lora(
122
- ctx = ctx,
123
- adapter = adapter,
124
- scale = params.scale
146
+ path_lora = Ptr .copy(config.path.toAbsolutePath.toString)
125
147
)
148
+ )(error).filterOrElse(notNull, ModelError (error))
149
+ _ <- catchNonFatal(
150
+ llama.llama_set_adapter_lora(
151
+ ctx = ctx,
152
+ adapter = adapter,
153
+ scale = config.scale
126
154
)
127
- yield ()
155
+ )(error)
156
+ yield ()
128
157
129
158
private def llamaParams (
130
159
defaultParams : Llama .ContextParams ,
@@ -148,3 +177,11 @@ object Llm:
148
177
flash_attn = params.flashAttention,
149
178
embeddings = embedding
150
179
)
180
+
181
+ private def catchNonFatal [A ](f : => A )(reason : => String ): Result [A ] =
182
+ try Right (f)
183
+ catch
184
+ case t if scala.util.control.NonFatal (t) =>
185
+ Left (ModelError (s " $reason: ${t.getMessage}" ))
186
+
187
+ private def notNull (ptr : Ptr [Any ]): Boolean = ptr != Slinc .getRuntime().Null
0 commit comments