withcatai
diff --git a/‎.github/workflows/build.yml‎
Lines changed: 9 additions & 2 deletions b/‎.github/workflows/build.yml‎
Lines changed: 9 additions & 2 deletions
diff --git a/‎src/bindings/Llama.ts‎
Lines changed: 32 additions & 24 deletions b/‎src/bindings/Llama.ts‎
Lines changed: 32 additions & 24 deletions
diff --git a/‎src/bindings/utils/MemoryOrchestrator.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/bindings/utils/MemoryOrchestrator.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cli/commands/ChatCommand.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/cli/commands/ChatCommand.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cli/commands/CompleteCommand.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/cli/commands/CompleteCommand.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cli/commands/DebugCommand.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/cli/commands/DebugCommand.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cli/commands/InfillCommand.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/cli/commands/InfillCommand.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cli/commands/inspect/commands/InspectGpuCommand.ts‎
Lines changed: 2 additions & 2 deletions b/‎src/cli/commands/inspect/commands/InspectGpuCommand.ts‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/cli/commands/inspect/commands/InspectMeasureCommand.ts‎
Lines changed: 5 additions & 5 deletions b/‎src/cli/commands/inspect/commands/InspectMeasureCommand.ts‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/cli/utils/interactivelyAskForModel.ts‎
Lines changed: 10 additions & 10 deletions b/‎src/cli/utils/interactivelyAskForModel.ts‎
Lines changed: 10 additions & 10 deletions
@@ -471,6 +471,13 @@ jobs:
         with:
           node-version: "20"
 
+      - name: Install dependencies on Ubuntu
+        if: startsWith(matrix.config.name, 'Ubuntu')
+        run: |
+          sudo apt-get update
+          sudo apt-get install libarchive-tools rpm
+          sudo snap install snapcraft --classic
+
       - name: Install modules
         run: npm ci
 
@@ -508,8 +515,8 @@ jobs:
           shopt -s nullglob
           
           for file in ./electron-app-example/release/*.{dmg,zip,exe,appx,AppImage,snap,deb,tar.gz}; do
-            echo "Adding $file to release"
-            gh release upload "$RELEASE_TAG" "$file"
+            echo "Adding $file to release $RELEASE_TAG"
+            gh release upload "v$RELEASE_TAG" "$file"
           done
           
           shopt -u nullglob
@@ -54,7 +54,7 @@ export class Llama {
     public readonly onDispose = new EventRelay<void>();
 
     private constructor({
-        bindings, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, vramPadding, debug
+        bindings, logLevel, logger, buildType, cmakeOptions, llamaCppRelease, debug, gpu, vramOrchestrator, vramPadding
     }: {
         bindings: BindingModule,
         logLevel: LlamaLogLevel,
@@ -65,32 +65,20 @@ export class Llama {
             repo: string,
             release: string
         },
-        vramPadding: number | ((totalVram: number) => number),
-        debug: boolean
+        debug: boolean,
+        gpu: BuildGpu,
+        vramOrchestrator: MemoryOrchestrator,
+        vramPadding: MemoryReservation
     }) {
         this._bindings = bindings;
-        this._gpu = bindings.getGpuType() ?? false;
+        this._gpu = gpu;
         this._supportsGpuOffloading = bindings.getSupportsGpuOffloading();
         this._supportsMmap = bindings.getSupportsMmap();
         this._supportsMlock = bindings.getSupportsMlock();
         this._consts = bindings.getConsts();
         this._debug = debug;
-
-        this._vramOrchestrator = new MemoryOrchestrator(() => {
-            const {total, used} = bindings.getGpuVramInfo();
-
-            return {
-                total,
-                free: Math.max(0, total - used)
-            };
-        });
-
-        if (this._gpu === false || vramPadding === 0)
-            this._vramPadding = this._vramOrchestrator.reserveMemory(0);
-        else if (vramPadding instanceof Function)
-            this._vramPadding = this._vramOrchestrator.reserveMemory(vramPadding(this._vramOrchestrator.getMemoryState().total));
-        else
-            this._vramPadding = this._vramOrchestrator.reserveMemory(vramPadding);
+        this._vramOrchestrator = vramOrchestrator;
+        this._vramPadding = vramPadding;
 
         this._logLevel = this._debug
             ? LlamaLogLevel.debug
@@ -204,7 +192,7 @@ export class Llama {
         return this._vramPadding.size;
     }
 
-    public getVramState() {
+    public async getVramState() {
         this._ensureNotDisposed();
 
         const {total, used} = this._bindings.getGpuVramInfo();
@@ -216,7 +204,7 @@ export class Llama {
         };
     }
 
-    public getGpuDeviceNames() {
+    public async getGpuDeviceNames() {
         this._ensureNotDisposed();
 
         const {deviceNames} = this._bindings.getGpuDeviceInfo();
@@ -360,6 +348,24 @@ export class Llama {
         skipLlamaInit?: boolean,
         debug: boolean
     }) {
+        const gpu = bindings.getGpuType() ?? false;
+        const vramOrchestrator = new MemoryOrchestrator(() => {
+            const {total, used} = bindings.getGpuVramInfo();
+
+            return {
+                total,
+                free: Math.max(0, total - used)
+            };
+        });
+
+        let resolvedVramPadding: MemoryReservation;
+        if (gpu === false || vramPadding === 0)
+            resolvedVramPadding = vramOrchestrator.reserveMemory(0);
+        else if (vramPadding instanceof Function)
+            resolvedVramPadding = vramOrchestrator.reserveMemory(vramPadding((await vramOrchestrator.getMemoryState()).total));
+        else
+            resolvedVramPadding = vramOrchestrator.reserveMemory(vramPadding);
+
         const llama =  new Llama({
             bindings,
             buildType,
@@ -370,8 +376,10 @@ export class Llama {
             },
             logLevel,
             logger,
-            vramPadding,
-            debug
+            debug,
+            gpu,
+            vramOrchestrator,
+            vramPadding: resolvedVramPadding
         });
 
         if (!skipLlamaInit)
 
@@ -19,7 +19,7 @@ export class MemoryOrchestrator {
         });
     }
 
-    public getMemoryState() {
+    public async getMemoryState() {
         const {free, total} = this._getMemoryState();
 
         return {
 
@@ -435,7 +435,7 @@ async function RunChat({
     }
 
     const padTitle = "Context".length + 1;
-    printCommonInfoLines({
+    await printCommonInfoLines({
         context,
         minTitleLength: padTitle,
         printBos: true,
 
@@ -324,7 +324,7 @@ async function RunCompletion({
     await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
 
     const padTitle = "Complete".length + 1;
-    printCommonInfoLines({
+    await printCommonInfoLines({
         context,
         minTitleLength: padTitle,
         logBatchSize,
 
@@ -36,7 +36,7 @@ export const DebugCommand: CommandModule<object, DebugCommand> = {
 async function DebugVramFunction() {
     const llama = await getLlama("lastBuild");
 
-    const vramStatus = llama.getVramState();
+    const vramStatus = await llama.getVramState();
     const totalMemory = os.totalmem();
     const freeMemory = os.freemem();
     const usedMemory = totalMemory - freeMemory;
 
@@ -348,7 +348,7 @@ async function RunInfill({
     await new Promise((accept) => setTimeout(accept, 0)); // wait for logs to finish printing
 
     const padTitle = "Context".length + 1;
-    printCommonInfoLines({
+    await printCommonInfoLines({
         context,
         minTitleLength: padTitle,
         logBatchSize,
 
@@ -93,8 +93,8 @@ async function logGpuVramUsage(gpu: BuildGpu) {
             skipLlamaInit: true
         });
         const gpuName = getPrettyBuildGpuName(gpu);
-        const vramStatus = llama.getVramState();
-        const gpuDeviceNames = llama.getGpuDeviceNames();
+        const vramStatus = await llama.getVramState();
+        const gpuDeviceNames = await llama.getGpuDeviceNames();
 
         if (gpuDeviceNames.length > 0)
             console.info(`${chalk.yellow(`${gpuName} device${gpuDeviceNames.length > 1 ? "s" : ""}:`)} ${gpuDeviceNames.join(", ")}`);
 
@@ -147,7 +147,7 @@ export const InspectMeasureCommand: CommandModule<object, InspectMeasureCommand>
             sourceType: "filesystem"
         });
         const ggufInsights = await GgufInsights.from(ggufMetadata, llama);
-        const totalVram = llama.getVramState().total;
+        const totalVram = (await llama.getVramState()).total;
 
         let lastGpuLayers = maxLayers ?? ggufInsights.totalLayers;
         let previousContextSizeCheck: undefined | number = undefined;
@@ -588,7 +588,7 @@ async function runTestWorkerLogic() {
                 currentContextSizeCheck = null;
 
             try {
-                const preContextVramUsage = llama.getVramState().used;
+                const preContextVramUsage = (await llama.getVramState()).used;
                 const context = await model.createContext({
                     contextSize: currentContextSizeCheck ?? undefined,
                     ignoreMemorySafetyChecks: currentContextSizeCheck != null
@@ -599,7 +599,7 @@ async function runTestWorkerLogic() {
                     await sequence.evaluateWithoutGeneratingNewTokens(model.tokenize(evaluateText));
                 }
 
-                const postContextVramUsage = llama.getVramState().used;
+                const postContextVramUsage = (await llama.getVramState()).used;
 
                 sendInfoBack({
                     type: "stats",
@@ -638,13 +638,13 @@ async function runTestWorkerLogic() {
         evaluateText?: string
     }) {
         try {
-            const preModelVramUsage = llama.getVramState().used;
+            const preModelVramUsage = (await llama.getVramState()).used;
             const model = await llama.loadModel({
                 modelPath,
                 gpuLayers,
                 ignoreMemorySafetyChecks: true
             });
-            const postModelVramUsage = llama.getVramState().used;
+            const postModelVramUsage = (await llama.getVramState()).used;
 
             sendInfoBack({
                 type: "stats",
 
@@ -39,7 +39,7 @@ type ModelOption = {
     selectedUrl?: {
         url: string,
         ggufInsights: GgufInsights,
-        compatibilityScore: ReturnType<typeof GgufInsightsConfigurationResolver.prototype.scoreModelConfigurationCompatibility>
+        compatibilityScore: Awaited<ReturnType<typeof GgufInsightsConfigurationResolver.prototype.scoreModelConfigurationCompatibility>>
     },
     urlSelectionLoadingState?: "done" | "loading"
 } | {
@@ -67,8 +67,8 @@ export async function interactivelyAskForModel({
     const recommendedModelOptions: (ModelOption & { type: "recommendedModel" })[] = [];
     const activeInteractionController = new AbortController();
     let scheduledTitleRerenderTimeout: ReturnType<typeof setTimeout> | undefined = undefined;
-    let lastVramState: { used: number, total: number } = llama.getVramState();
-    const canUseGpu = lastVramState.total > 0;
+    let vramState = await llama.getVramState();
+    const canUseGpu = vramState.total > 0;
 
     if (allowLocalModels && modelsDirectory != null && await fs.existsSync(modelsDirectory)) {
         const ggufFileNames = (await fs.readdir(modelsDirectory))
@@ -112,7 +112,7 @@ export async function interactivelyAskForModel({
                         readItems++;
                         progressUpdater.setProgress(readItems / ggufFileNames.length, renderProgress());
 
-                        const compatibilityScore = ggufInsights?.configurationResolver.scoreModelConfigurationCompatibility();
+                        const compatibilityScore = await ggufInsights?.configurationResolver.scoreModelConfigurationCompatibility();
 
                         return {
                             type: "localModel",
@@ -216,7 +216,6 @@ export async function interactivelyAskForModel({
                 title(item, rerender) {
                     const title = chalk.bold("Select a model:") + "  ";
 
-                    const vramState = llama.getVramState();
                     const vramStateText = vramState.total === 0
                         ? chalk.bgGray(
                             " " +
@@ -241,12 +240,13 @@ export async function interactivelyAskForModel({
 
                     const pad = Math.max(0, minWidth - (stripAnsi(title).length + stripAnsi(vramStateText).length));
 
-                    lastVramState = vramState;
                     clearTimeout(scheduledTitleRerenderTimeout);
-                    scheduledTitleRerenderTimeout = setTimeout(() => {
-                        const vramState = llama.getVramState();
-                        if (lastVramState.used !== vramState.used || lastVramState.total !== vramState.total)
+                    scheduledTitleRerenderTimeout = setTimeout(async () => {
+                        const newVramState = await llama.getVramState();
+                        if (vramState.used !== newVramState.used || vramState.total !== newVramState.total) {
+                            vramState = newVramState;
                             rerender();
+                        }
                     }, vramStateUpdateInterval);
 
                     return [
@@ -567,7 +567,7 @@ async function selectFileForModelRecommendation({
                 if (abortSignal.aborted)
                     return;
 
-                const compatibilityScore = ggufInsights.configurationResolver.scoreModelConfigurationCompatibility();
+                const compatibilityScore = await ggufInsights.configurationResolver.scoreModelConfigurationCompatibility();
 
                 if (bestScore == null || compatibilityScore.compatibilityScore > bestScore) {
                     bestScore = compatibilityScore.compatibilityScore;
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,7 @@ export class MemoryOrchestrator {`
`19`	`19`	`});`
`20`	`20`	`}`
`21`	`21`
`22`		`- public getMemoryState() {`
	`22`	`+ public async getMemoryState() {`
`23`	`23`	`const {free, total} = this._getMemoryState();`
`24`	`24`
`25`	`25`	`return {`
Original file line number	Diff line number	Diff line change
`@@ -435,7 +435,7 @@ async function RunChat({`
`435`	`435`	`}`
`436`	`436`
`437`	`437`	`const padTitle = "Context".length + 1;`
`438`		`- printCommonInfoLines({`
	`438`	`+ await printCommonInfoLines({`
`439`	`439`	`context,`
`440`	`440`	`minTitleLength: padTitle,`
`441`	`441`	`printBos: true,`