@@ -54,7 +54,7 @@ export class Llama {
54
54
public readonly onDispose = new EventRelay < void > ( ) ;
55
55
56
56
private constructor ( {
57
- bindings, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, vramPadding , debug
57
+ bindings, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug , gpu , vramOrchestrator , vramPadding
58
58
} : {
59
59
bindings : BindingModule ,
60
60
logLevel : LlamaLogLevel ,
@@ -65,32 +65,20 @@ export class Llama {
65
65
repo : string ,
66
66
release : string
67
67
} ,
68
- vramPadding : number | ( ( totalVram : number ) => number ) ,
69
- debug : boolean
68
+ debug : boolean ,
69
+ gpu : BuildGpu ,
70
+ vramOrchestrator : MemoryOrchestrator ,
71
+ vramPadding : MemoryReservation
70
72
} ) {
71
73
this . _bindings = bindings ;
72
- this . _gpu = bindings . getGpuType ( ) ?? false ;
74
+ this . _gpu = gpu ;
73
75
this . _supportsGpuOffloading = bindings . getSupportsGpuOffloading ( ) ;
74
76
this . _supportsMmap = bindings . getSupportsMmap ( ) ;
75
77
this . _supportsMlock = bindings . getSupportsMlock ( ) ;
76
78
this . _consts = bindings . getConsts ( ) ;
77
79
this . _debug = debug ;
78
-
79
- this . _vramOrchestrator = new MemoryOrchestrator ( ( ) => {
80
- const { total, used} = bindings . getGpuVramInfo ( ) ;
81
-
82
- return {
83
- total,
84
- free : Math . max ( 0 , total - used )
85
- } ;
86
- } ) ;
87
-
88
- if ( this . _gpu === false || vramPadding === 0 )
89
- this . _vramPadding = this . _vramOrchestrator . reserveMemory ( 0 ) ;
90
- else if ( vramPadding instanceof Function )
91
- this . _vramPadding = this . _vramOrchestrator . reserveMemory ( vramPadding ( this . _vramOrchestrator . getMemoryState ( ) . total ) ) ;
92
- else
93
- this . _vramPadding = this . _vramOrchestrator . reserveMemory ( vramPadding ) ;
80
+ this . _vramOrchestrator = vramOrchestrator ;
81
+ this . _vramPadding = vramPadding ;
94
82
95
83
this . _logLevel = this . _debug
96
84
? LlamaLogLevel . debug
@@ -204,7 +192,7 @@ export class Llama {
204
192
return this . _vramPadding . size ;
205
193
}
206
194
207
- public getVramState ( ) {
195
+ public async getVramState ( ) {
208
196
this . _ensureNotDisposed ( ) ;
209
197
210
198
const { total, used} = this . _bindings . getGpuVramInfo ( ) ;
@@ -216,7 +204,7 @@ export class Llama {
216
204
} ;
217
205
}
218
206
219
- public getGpuDeviceNames ( ) {
207
+ public async getGpuDeviceNames ( ) {
220
208
this . _ensureNotDisposed ( ) ;
221
209
222
210
const { deviceNames} = this . _bindings . getGpuDeviceInfo ( ) ;
@@ -360,6 +348,24 @@ export class Llama {
360
348
skipLlamaInit ?: boolean ,
361
349
debug : boolean
362
350
} ) {
351
+ const gpu = bindings . getGpuType ( ) ?? false ;
352
+ const vramOrchestrator = new MemoryOrchestrator ( ( ) => {
353
+ const { total, used} = bindings . getGpuVramInfo ( ) ;
354
+
355
+ return {
356
+ total,
357
+ free : Math . max ( 0 , total - used )
358
+ } ;
359
+ } ) ;
360
+
361
+ let resolvedVramPadding : MemoryReservation ;
362
+ if ( gpu === false || vramPadding === 0 )
363
+ resolvedVramPadding = vramOrchestrator . reserveMemory ( 0 ) ;
364
+ else if ( vramPadding instanceof Function )
365
+ resolvedVramPadding = vramOrchestrator . reserveMemory ( vramPadding ( ( await vramOrchestrator . getMemoryState ( ) ) . total ) ) ;
366
+ else
367
+ resolvedVramPadding = vramOrchestrator . reserveMemory ( vramPadding ) ;
368
+
363
369
const llama = new Llama ( {
364
370
bindings,
365
371
buildType,
@@ -370,8 +376,10 @@ export class Llama {
370
376
} ,
371
377
logLevel,
372
378
logger,
373
- vramPadding,
374
- debug
379
+ debug,
380
+ gpu,
381
+ vramOrchestrator,
382
+ vramPadding : resolvedVramPadding
375
383
} ) ;
376
384
377
385
if ( ! skipLlamaInit )
0 commit comments