Skip to content

Commit 56749c7

Browse files
authored
fix: async gpu info getters (#232)
* fix: make GPU info getters async * fix: Electron example build
1 parent 73cabcd commit 56749c7

20 files changed

+266
-227
lines changed

.github/workflows/build.yml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -471,6 +471,13 @@ jobs:
471471
with:
472472
node-version: "20"
473473

474+
- name: Install dependencies on Ubuntu
475+
if: startsWith(matrix.config.name, 'Ubuntu')
476+
run: |
477+
sudo apt-get update
478+
sudo apt-get install libarchive-tools rpm
479+
sudo snap install snapcraft --classic
480+
474481
- name: Install modules
475482
run: npm ci
476483

@@ -508,8 +515,8 @@ jobs:
508515
shopt -s nullglob
509516
510517
for file in ./electron-app-example/release/*.{dmg,zip,exe,appx,AppImage,snap,deb,tar.gz}; do
511-
echo "Adding $file to release"
512-
gh release upload "$RELEASE_TAG" "$file"
518+
echo "Adding $file to release $RELEASE_TAG"
519+
gh release upload "v$RELEASE_TAG" "$file"
513520
done
514521
515522
shopt -u nullglob

src/bindings/Llama.ts

Lines changed: 32 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ export class Llama {
5454
public readonly onDispose = new EventRelay<void>();
5555

5656
private constructor({
57-
bindings, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, vramPadding, debug
57+
bindings, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, gpu, vramOrchestrator, vramPadding
5858
}: {
5959
bindings: BindingModule,
6060
logLevel: LlamaLogLevel,
@@ -65,32 +65,20 @@ export class Llama {
6565
repo: string,
6666
release: string
6767
},
68-
vramPadding: number | ((totalVram: number) => number),
69-
debug: boolean
68+
debug: boolean,
69+
gpu: BuildGpu,
70+
vramOrchestrator: MemoryOrchestrator,
71+
vramPadding: MemoryReservation
7072
}) {
7173
this._bindings = bindings;
72-
this._gpu = bindings.getGpuType() ?? false;
74+
this._gpu = gpu;
7375
this._supportsGpuOffloading = bindings.getSupportsGpuOffloading();
7476
this._supportsMmap = bindings.getSupportsMmap();
7577
this._supportsMlock = bindings.getSupportsMlock();
7678
this._consts = bindings.getConsts();
7779
this._debug = debug;
78-
79-
this._vramOrchestrator = new MemoryOrchestrator(() => {
80-
const {total, used} = bindings.getGpuVramInfo();
81-
82-
return {
83-
total,
84-
free: Math.max(0, total - used)
85-
};
86-
});
87-
88-
if (this._gpu === false || vramPadding === 0)
89-
this._vramPadding = this._vramOrchestrator.reserveMemory(0);
90-
else if (vramPadding instanceof Function)
91-
this._vramPadding = this._vramOrchestrator.reserveMemory(vramPadding(this._vramOrchestrator.getMemoryState().total));
92-
else
93-
this._vramPadding = this._vramOrchestrator.reserveMemory(vramPadding);
80+
this._vramOrchestrator = vramOrchestrator;
81+
this._vramPadding = vramPadding;
9482

9583
this._logLevel = this._debug
9684
? LlamaLogLevel.debug
@@ -204,7 +192,7 @@ export class Llama {
204192
return this._vramPadding.size;
205193
}
206194

207-
public getVramState() {
195+
public async getVramState() {
208196
this._ensureNotDisposed();
209197

210198
const {total, used} = this._bindings.getGpuVramInfo();
@@ -216,7 +204,7 @@ export class Llama {
216204
};
217205
}
218206

219-
public getGpuDeviceNames() {
207+
public async getGpuDeviceNames() {
220208
this._ensureNotDisposed();
221209

222210
const {deviceNames} = this._bindings.getGpuDeviceInfo();
@@ -360,6 +348,24 @@ export class Llama {
360348
skipLlamaInit?: boolean,
361349
debug: boolean
362350
}) {
351+
const gpu = bindings.getGpuType() ?? false;
352+
const vramOrchestrator = new MemoryOrchestrator(() => {
353+
const {total, used} = bindings.getGpuVramInfo();
354+
355+
return {
356+
total,
357+
free: Math.max(0, total - used)
358+
};
359+
});
360+
361+
let resolvedVramPadding: MemoryReservation;
362+
if (gpu === false || vramPadding === 0)
363+
resolvedVramPadding = vramOrchestrator.reserveMemory(0);
364+
else if (vramPadding instanceof Function)
365+
resolvedVramPadding = vramOrchestrator.reserveMemory(vramPadding((await vramOrchestrator.getMemoryState()).total));
366+
else
367+
resolvedVramPadding = vramOrchestrator.reserveMemory(vramPadding);
368+
363369
const llama = new Llama({
364370
bindings,
365371
buildType,
@@ -370,8 +376,10 @@ export class Llama {
370376
},
371377
logLevel,
372378
logger,
373-
vramPadding,
374-
debug
379+
debug,
380+
gpu,
381+
vramOrchestrator,
382+
vramPadding: resolvedVramPadding
375383
});
376384

377385
if (!skipLlamaInit)

src/bindings/utils/MemoryOrchestrator.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export class MemoryOrchestrator {
1919
});
2020
}
2121

22-
public getMemoryState() {
22+
public async getMemoryState() {
2323
const {free, total} = this._getMemoryState();
2424

2525
return {

src/cli/commands/ChatCommand.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ async function RunChat({
435435
}
436436

437437
const padTitle = "Context".length + 1;
438-
printCommonInfoLines({
438+
await printCommonInfoLines({
439439
context,
440440
minTitleLength: padTitle,
441441
printBos: true,

src/cli/commands/CompleteCommand.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ async function RunCompletion({
324324
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
325325

326326
const padTitle = "Complete".length + 1;
327-
printCommonInfoLines({
327+
await printCommonInfoLines({
328328
context,
329329
minTitleLength: padTitle,
330330
logBatchSize,

src/cli/commands/DebugCommand.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ export const DebugCommand: CommandModule<object, DebugCommand> = {
3636
async function DebugVramFunction() {
3737
const llama = await getLlama("lastBuild");
3838

39-
const vramStatus = llama.getVramState();
39+
const vramStatus = await llama.getVramState();
4040
const totalMemory = os.totalmem();
4141
const freeMemory = os.freemem();
4242
const usedMemory = totalMemory - freeMemory;

src/cli/commands/InfillCommand.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ async function RunInfill({
348348
await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
349349

350350
const padTitle = "Context".length + 1;
351-
printCommonInfoLines({
351+
await printCommonInfoLines({
352352
context,
353353
minTitleLength: padTitle,
354354
logBatchSize,

src/cli/commands/inspect/commands/InspectGpuCommand.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,8 @@ async function logGpuVramUsage(gpu: BuildGpu) {
9393
skipLlamaInit: true
9494
});
9595
const gpuName = getPrettyBuildGpuName(gpu);
96-
const vramStatus = llama.getVramState();
97-
const gpuDeviceNames = llama.getGpuDeviceNames();
96+
const vramStatus = await llama.getVramState();
97+
const gpuDeviceNames = await llama.getGpuDeviceNames();
9898

9999
if (gpuDeviceNames.length > 0)
100100
console.info(`${chalk.yellow(`${gpuName} device${gpuDeviceNames.length > 1 ? "s" : ""}:`)} ${gpuDeviceNames.join(", ")}`);

src/cli/commands/inspect/commands/InspectMeasureCommand.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
147147
sourceType: "filesystem"
148148
});
149149
const ggufInsights = await GgufInsights.from(ggufMetadata, llama);
150-
const totalVram = llama.getVramState().total;
150+
const totalVram = (await llama.getVramState()).total;
151151

152152
let lastGpuLayers = maxLayers ?? ggufInsights.totalLayers;
153153
let previousContextSizeCheck: undefined | number = undefined;
@@ -588,7 +588,7 @@ async function runTestWorkerLogic() {
588588
currentContextSizeCheck = null;
589589

590590
try {
591-
const preContextVramUsage = llama.getVramState().used;
591+
const preContextVramUsage = (await llama.getVramState()).used;
592592
const context = await model.createContext({
593593
contextSize: currentContextSizeCheck ?? undefined,
594594
ignoreMemorySafetyChecks: currentContextSizeCheck != null
@@ -599,7 +599,7 @@ async function runTestWorkerLogic() {
599599
await sequence.evaluateWithoutGeneratingNewTokens(model.tokenize(evaluateText));
600600
}
601601

602-
const postContextVramUsage = llama.getVramState().used;
602+
const postContextVramUsage = (await llama.getVramState()).used;
603603

604604
sendInfoBack({
605605
type: "stats",
@@ -638,13 +638,13 @@ async function runTestWorkerLogic() {
638638
evaluateText?: string
639639
}) {
640640
try {
641-
const preModelVramUsage = llama.getVramState().used;
641+
const preModelVramUsage = (await llama.getVramState()).used;
642642
const model = await llama.loadModel({
643643
modelPath,
644644
gpuLayers,
645645
ignoreMemorySafetyChecks: true
646646
});
647-
const postModelVramUsage = llama.getVramState().used;
647+
const postModelVramUsage = (await llama.getVramState()).used;
648648

649649
sendInfoBack({
650650
type: "stats",

src/cli/utils/interactivelyAskForModel.ts

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ type ModelOption = {
3939
selectedUrl?: {
4040
url: string,
4141
ggufInsights: GgufInsights,
42-
compatibilityScore: ReturnType<typeof GgufInsightsConfigurationResolver.prototype.scoreModelConfigurationCompatibility>
42+
compatibilityScore: Awaited<ReturnType<typeof GgufInsightsConfigurationResolver.prototype.scoreModelConfigurationCompatibility>>
4343
},
4444
urlSelectionLoadingState?: "done" | "loading"
4545
} | {
@@ -67,8 +67,8 @@ export async function interactivelyAskForModel({
6767
const recommendedModelOptions: (ModelOption & { type: "recommendedModel" })[] = [];
6868
const activeInteractionController = new AbortController();
6969
let scheduledTitleRerenderTimeout: ReturnType<typeof setTimeout> | undefined = undefined;
70-
let lastVramState: { used: number, total: number } = llama.getVramState();
71-
const canUseGpu = lastVramState.total > 0;
70+
let vramState = await llama.getVramState();
71+
const canUseGpu = vramState.total > 0;
7272

7373
if (allowLocalModels && modelsDirectory != null && await fs.existsSync(modelsDirectory)) {
7474
const ggufFileNames = (await fs.readdir(modelsDirectory))
@@ -112,7 +112,7 @@ export async function interactivelyAskForModel({
112112
readItems++;
113113
progressUpdater.setProgress(readItems / ggufFileNames.length, renderProgress());
114114

115-
const compatibilityScore = ggufInsights?.configurationResolver.scoreModelConfigurationCompatibility();
115+
const compatibilityScore = await ggufInsights?.configurationResolver.scoreModelConfigurationCompatibility();
116116

117117
return {
118118
type: "localModel",
@@ -216,7 +216,6 @@ export async function interactivelyAskForModel({
216216
title(item, rerender) {
217217
const title = chalk.bold("Select a model:") + " ";
218218

219-
const vramState = llama.getVramState();
220219
const vramStateText = vramState.total === 0
221220
? chalk.bgGray(
222221
" " +
@@ -241,12 +240,13 @@ export async function interactivelyAskForModel({
241240

242241
const pad = Math.max(0, minWidth - (stripAnsi(title).length + stripAnsi(vramStateText).length));
243242

244-
lastVramState = vramState;
245243
clearTimeout(scheduledTitleRerenderTimeout);
246-
scheduledTitleRerenderTimeout = setTimeout(() => {
247-
const vramState = llama.getVramState();
248-
if (lastVramState.used !== vramState.used || lastVramState.total !== vramState.total)
244+
scheduledTitleRerenderTimeout = setTimeout(async () => {
245+
const newVramState = await llama.getVramState();
246+
if (vramState.used !== newVramState.used || vramState.total !== newVramState.total) {
247+
vramState = newVramState;
249248
rerender();
249+
}
250250
}, vramStateUpdateInterval);
251251

252252
return [
@@ -567,7 +567,7 @@ async function selectFileForModelRecommendation({
567567
if (abortSignal.aborted)
568568
return;
569569

570-
const compatibilityScore = ggufInsights.configurationResolver.scoreModelConfigurationCompatibility();
570+
const compatibilityScore = await ggufInsights.configurationResolver.scoreModelConfigurationCompatibility();
571571

572572
if (bestScore == null || compatibilityScore.compatibilityScore > bestScore) {
573573
bestScore = compatibilityScore.compatibilityScore;

0 commit comments

Comments
 (0)