withcatai
diff --git a/‎llama/addon/AddonContext.cpp‎
Lines changed: 5 additions & 5 deletions b/‎llama/addon/AddonContext.cpp‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎package-lock.json‎
Lines changed: 4 additions & 4 deletions b/‎package-lock.json‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎package.json‎
Lines changed: 1 addition & 1 deletion b/‎package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/bindings/Llama.ts‎
Lines changed: 6 additions & 3 deletions b/‎src/bindings/Llama.ts‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎src/chatWrappers/QwenChatWrapper.ts‎
Lines changed: 5 additions & 3 deletions b/‎src/chatWrappers/QwenChatWrapper.ts‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎src/chatWrappers/generic/JinjaTemplateChatWrapper.ts‎
Lines changed: 1 addition & 1 deletion b/‎src/chatWrappers/generic/JinjaTemplateChatWrapper.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.ts‎
Lines changed: 97 additions & 9 deletions b/‎src/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.ts‎
Lines changed: 97 additions & 9 deletions
diff --git a/‎src/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.ts‎
Lines changed: 40 additions & 0 deletions b/‎src/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.ts‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/cli/commands/ChatCommand.ts‎
Lines changed: 1 addition & 3 deletions b/‎src/cli/commands/ChatCommand.ts‎
Lines changed: 1 addition & 3 deletions
@@ -587,7 +587,7 @@ Napi::Value AddonContext::DisposeSequence(const Napi::CallbackInfo& info) {
 
     int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
 
-    bool result = llama_kv_self_seq_rm(ctx, sequenceId, -1, -1);
+    bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, -1, -1);
 
     if (!result) {
         Napi::Error::New(info.Env(), "Failed to dispose sequence").ThrowAsJavaScriptException();
@@ -606,7 +606,7 @@ Napi::Value AddonContext::RemoveTokenCellsFromSequence(const Napi::CallbackInfo&
     int32_t startPos = info[1].As<Napi::Number>().Int32Value();
     int32_t endPos = info[2].As<Napi::Number>().Int32Value();
 
-    bool result = llama_kv_self_seq_rm(ctx, sequenceId, startPos, endPos);
+    bool result = llama_memory_seq_rm(llama_get_memory(ctx), sequenceId, startPos, endPos);
 
     return Napi::Boolean::New(info.Env(), result);
 }
@@ -621,7 +621,7 @@ Napi::Value AddonContext::ShiftSequenceTokenCells(const Napi::CallbackInfo& info
     int32_t endPos = info[2].As<Napi::Number>().Int32Value();
     int32_t shiftDelta = info[3].As<Napi::Number>().Int32Value();
 
-    llama_kv_self_seq_add(ctx, sequenceId, startPos, endPos, shiftDelta);
+    llama_memory_seq_add(llama_get_memory(ctx), sequenceId, startPos, endPos, shiftDelta);
 
     return info.Env().Undefined();
 }
@@ -634,7 +634,7 @@ Napi::Value AddonContext::GetSequenceKvCacheMinPosition(const Napi::CallbackInfo
     int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
 
 
-    const auto minPosition = llama_kv_self_seq_pos_min(ctx, sequenceId);
+    const auto minPosition = llama_memory_seq_pos_min(llama_get_memory(ctx), sequenceId);
 
     return Napi::Number::New(info.Env(), minPosition);
 }
@@ -647,7 +647,7 @@ Napi::Value AddonContext::GetSequenceKvCacheMaxPosition(const Napi::CallbackInfo
     int32_t sequenceId = info[0].As<Napi::Number>().Int32Value();
 
 
-    const auto maxPosition = llama_kv_self_seq_pos_max(ctx, sequenceId);
+    const auto maxPosition = llama_memory_seq_pos_max(llama_get_memory(ctx), sequenceId);
 
     return Napi::Number::New(info.Env(), maxPosition);
 }
 
@@ -197,7 +197,7 @@
     "ignore": "^7.0.4",
     "ipull": "^3.9.2",
     "is-unicode-supported": "^2.1.0",
-    "lifecycle-utils": "^2.0.0",
+    "lifecycle-utils": "^2.0.1",
     "log-symbols": "^7.0.0",
     "nanoid": "^5.1.5",
     "node-addon-api": "^8.3.1",
 
@@ -5,7 +5,7 @@ import {DisposedError, EventRelay, withLock} from "lifecycle-utils";
 import {getConsoleLogPrefix} from "../utils/getConsoleLogPrefix.js";
 import {LlamaModel, LlamaModelOptions} from "../evaluator/LlamaModel/LlamaModel.js";
 import {DisposeGuard} from "../utils/DisposeGuard.js";
-import {GbnfJsonSchema} from "../utils/gbnfJson/types.js";
+import {GbnfJsonDefList, GbnfJsonSchema} from "../utils/gbnfJson/types.js";
 import {LlamaJsonSchemaGrammar} from "../evaluator/LlamaJsonSchemaGrammar.js";
 import {LlamaGrammar, LlamaGrammarOptions} from "../evaluator/LlamaGrammar.js";
 import {ThreadsSplitter} from "../utils/ThreadsSplitter.js";
@@ -345,8 +345,11 @@ export class Llama {
      * @see [Using a JSON Schema Grammar](https://node-llama-cpp.withcat.ai/guide/grammar#json-schema) tutorial
      * @see [Reducing Hallucinations When Using JSON Schema Grammar](https://node-llama-cpp.withcat.ai/guide/grammar#reducing-json-schema-hallucinations) tutorial
      */
-    public async createGrammarForJsonSchema<const T extends GbnfJsonSchema>(schema: Readonly<T>) {
-        return new LlamaJsonSchemaGrammar<T>(this, schema);
+    public async createGrammarForJsonSchema<
+        const T extends GbnfJsonSchema<Defs>,
+        const Defs extends GbnfJsonDefList<Defs> = Record<any, any>
+    >(schema: Readonly<T> & GbnfJsonSchema<Defs>) {
+        return new LlamaJsonSchemaGrammar<T, Defs>(this, schema);
     }
     /* eslint-enable @stylistic/max-len */
 
 
@@ -84,8 +84,8 @@ export class QwenChatWrapper extends ChatWrapper {
             segments: {
                 reiterateStackAfterFunctionCalls: true,
                 thought: {
-                    prefix: LlamaText(new SpecialTokensText("<think>")),
-                    suffix: LlamaText(new SpecialTokensText("</think>"))
+                    prefix: LlamaText(new SpecialTokensText("<think>\n")),
+                    suffix: LlamaText(new SpecialTokensText("\n</think>"))
                 }
             }
         };
@@ -247,7 +247,9 @@ export class QwenChatWrapper extends ChatWrapper {
     public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {
         return [
             [{}, {}, {_requireFunctionCallSettingsExtraction: true}],
-            [{_lineBreakBeforeFunctionCallPrefix: true}, {}, {_requireFunctionCallSettingsExtraction: true}]
+            [{_lineBreakBeforeFunctionCallPrefix: true}, {}, {_requireFunctionCallSettingsExtraction: true}],
+            [{thoughts: "discourage"}, {}, {_requireFunctionCallSettingsExtraction: true}],
+            [{thoughts: "discourage", _lineBreakBeforeFunctionCallPrefix: true}, {}, {_requireFunctionCallSettingsExtraction: true}]
         ];
     }
 }
 
@@ -671,7 +671,7 @@ export class JinjaTemplateChatWrapper extends ChatWrapper {
             return res;
         };
 
-        const validateThatAllMessageIdsAreUsed = (parts: ReturnType<typeof splitText<string[]>>) => {
+        const validateThatAllMessageIdsAreUsed = (parts: ReturnType<typeof splitText<string>>) => {
             const messageIdsLeft = new Set(messageIds);
 
             for (const part of parts) {
 
@@ -94,6 +94,22 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
             modelMessage2
         ]
     }];
+    const chatHistoryOnlyCall: ChatHistoryItem[] = [...baseChatHistory, {
+        type: "model",
+        response: [
+            {
+                type: "functionCall",
+                name: func1name,
+
+                // convert to number since this will go through JSON.stringify,
+                // and we want to avoid escaping characters in the rendered output
+                params: Number(func1params),
+                result: Number(func1result),
+                startsNewChunk: true
+            },
+            modelMessage2
+        ]
+    }];
     const chatHistory2Calls: ChatHistoryItem[] = [...baseChatHistory, {
         type: "model",
         response: [
@@ -257,6 +273,17 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
         stringifyFunctionResults: stringifyResult,
         combineModelMessageAndToolCalls
     });
+    const renderedOnlyCall = getFirstValidResult([
+        () => renderTemplate({
+            chatHistory: chatHistoryOnlyCall,
+            functions: functions1,
+            additionalParams,
+            stringifyFunctionParams: stringifyParams,
+            stringifyFunctionResults: stringifyResult,
+            combineModelMessageAndToolCalls
+        }),
+        () => undefined
+    ]);
     const rendered2Calls = getFirstValidResult([
         () => renderTemplate({
             chatHistory: chatHistory2Calls,
@@ -411,14 +438,46 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
         parallelismResultPrefix
     } = resolveParallelismBetweenSectionsParts(func2ParamsToFunc1Result.text.slice(callSuffixLength, -resultPrefixLength));
 
+    let revivedCallPrefix = reviveSeparatorText(callPrefixText, idToStaticContent, contentIds);
+    const revivedParallelismCallSectionPrefix = removeCommonRevivedPrefix(
+        reviveSeparatorText(parallelismCallPrefix, idToStaticContent, contentIds),
+        !combineModelMessageAndToolCalls
+            ? textBetween2TextualModelResponses
+            : LlamaText()
+    );
+    let revivedParallelismCallBetweenCalls = reviveSeparatorText(parallelismBetweenCallsText, idToStaticContent, contentIds);
+
+    if (revivedParallelismCallSectionPrefix.values.length === 0 && renderedOnlyCall != null) {
+        const userMessage1ToModelMessage1Start = getTextBetweenIds(rendered1Call, userMessage1, modelMessage1);
+        const onlyCallUserMessage1ToFunc1Name = getTextBetweenIds(renderedOnlyCall, userMessage1, func1name);
+
+        if (userMessage1ToModelMessage1Start.text != null && onlyCallUserMessage1ToFunc1Name.text != null) {
+            const onlyCallModelMessagePrefixLength = findCommandStartLength(
+                userMessage1ToModelMessage1Start.text,
+                onlyCallUserMessage1ToFunc1Name.text
+            );
+            const onlyCallCallPrefixText = onlyCallUserMessage1ToFunc1Name.text.slice(onlyCallModelMessagePrefixLength);
+            const revivedOnlyCallCallPrefixText = reviveSeparatorText(onlyCallCallPrefixText, idToStaticContent, contentIds);
+
+            const optionalCallPrefix = removeCommonRevivedSuffix(revivedCallPrefix, revivedOnlyCallCallPrefixText);
+            if (optionalCallPrefix.values.length > 0) {
+                revivedCallPrefix = removeCommonRevivedPrefix(revivedCallPrefix, optionalCallPrefix);
+                revivedParallelismCallBetweenCalls = LlamaText([
+                    optionalCallPrefix,
+                    revivedParallelismCallBetweenCalls
+                ]);
+            }
+        }
+    }
+
     return {
         stringifyParams,
         stringifyResult,
         combineModelMessageAndToolCalls,
         settings: {
             call: {
                 optionalPrefixSpace: true,
-                prefix: reviveSeparatorText(callPrefixText, idToStaticContent, contentIds),
+                prefix: revivedCallPrefix,
                 paramsPrefix: reviveSeparatorText(callParamsPrefixText, idToStaticContent, contentIds),
                 suffix: reviveSeparatorText(callSuffixText, idToStaticContent, contentIds),
                 emptyCallParamsPlaceholder: {}
@@ -445,13 +504,8 @@ export function extractFunctionCallSettingsFromJinjaTemplate({
             },
             parallelism: {
                 call: {
-                    sectionPrefix: removeCommonRevivedPrefix(
-                        reviveSeparatorText(parallelismCallPrefix, idToStaticContent, contentIds),
-                        !combineModelMessageAndToolCalls
-                            ? textBetween2TextualModelResponses
-                            : LlamaText()
-                    ),
-                    betweenCalls: reviveSeparatorText(parallelismBetweenCallsText, idToStaticContent, contentIds),
+                    sectionPrefix: revivedParallelismCallSectionPrefix,
+                    betweenCalls: revivedParallelismCallBetweenCalls,
                     sectionSuffix: reviveSeparatorText(parallelismCallSuffixText, idToStaticContent, contentIds)
                 },
                 result: {
@@ -524,14 +578,48 @@ function removeCommonRevivedPrefix(target: LlamaText, matchStart: LlamaText) {
         } else if (targetValue instanceof SpecialToken && matchStartValue instanceof SpecialToken) {
             if (targetValue.value === matchStartValue.value)
                 continue;
-        }
+        } else if (LlamaText(targetValue ?? "").compare(LlamaText(matchStartValue ?? "")))
+            continue;
 
         return LlamaText(target.values.slice(commonStartLength));
     }
 
     return LlamaText(target.values.slice(matchStart.values.length));
 }
 
+function removeCommonRevivedSuffix(target: LlamaText, matchEnd: LlamaText) {
+    for (
+        let commonEndLength = 0;
+        commonEndLength < target.values.length && commonEndLength < matchEnd.values.length;
+        commonEndLength++
+    ) {
+        const targetValue = target.values[target.values.length - commonEndLength - 1];
+        const matchEndValue = matchEnd.values[matchEnd.values.length - commonEndLength - 1];
+
+        if (typeof targetValue === "string" && typeof matchEndValue === "string") {
+            if (targetValue === matchEndValue)
+                continue;
+        } else if (targetValue instanceof SpecialTokensText && matchEndValue instanceof SpecialTokensText) {
+            const commonLength = findCommonEndLength(targetValue.value, matchEndValue.value);
+            if (commonLength === targetValue.value.length && commonLength === matchEndValue.value.length)
+                continue;
+
+            return LlamaText([
+                ...target.values.slice(0, target.values.length - commonEndLength - 1),
+                new SpecialTokensText(targetValue.value.slice(0, targetValue.value.length - commonLength))
+            ]);
+        } else if (targetValue instanceof SpecialToken && matchEndValue instanceof SpecialToken) {
+            if (targetValue.value === matchEndValue.value)
+                continue;
+        } else if (LlamaText(targetValue ?? "").compare(LlamaText(matchEndValue ?? "")))
+            continue;
+
+        return LlamaText(target.values.slice(0, target.values.length - commonEndLength - 1));
+    }
+
+    return LlamaText(target.values.slice(0, target.values.length - matchEnd.values.length));
+}
+
 function findCommandStartLength(text1: string, text2: string) {
     let commonStartLength = 0;
     while (commonStartLength < text1.length && commonStartLength < text2.length) {
 
@@ -8,6 +8,42 @@ export function extractSegmentSettingsFromTokenizerAndChatTemplate(
     function tryMatchPrefixSuffixPair(tryMatchGroups: [prefix: string, suffix: string][]) {
         if (chatTemplate != null) {
             for (const [prefix, suffix] of tryMatchGroups) {
+                if (
+                    (
+                        hasAll(chatTemplate.replaceAll(prefix + "\\n\\n" + suffix, ""), [
+                            prefix + "\\n\\n",
+                            "\\n\\n" + suffix
+                        ])
+                    ) || (
+                        hasAll(chatTemplate.replaceAll(prefix + "\n\n" + suffix, ""), [
+                            prefix + "\n\n",
+                            "\n\n" + suffix
+                        ])
+                    )
+                )
+                    return {
+                        prefix: LlamaText(new SpecialTokensText(prefix + "\n\n")),
+                        suffix: LlamaText(new SpecialTokensText("\n\n" + suffix))
+                    };
+
+                if (
+                    (
+                        hasAll(chatTemplate.replaceAll(prefix + "\\n" + suffix, ""), [
+                            prefix + "\\n",
+                            "\\n" + suffix
+                        ])
+                    ) || (
+                        hasAll(chatTemplate.replaceAll(prefix + "\n" + suffix, ""), [
+                            prefix + "\n",
+                            "\n" + suffix
+                        ])
+                    )
+                )
+                    return {
+                        prefix: LlamaText(new SpecialTokensText(prefix + "\n")),
+                        suffix: LlamaText(new SpecialTokensText("\n" + suffix))
+                    };
+
                 if (chatTemplate.includes(prefix) && chatTemplate.includes(suffix))
                     return {
                         prefix: LlamaText(new SpecialTokensText(prefix)),
@@ -46,3 +82,7 @@ export function extractSegmentSettingsFromTokenizerAndChatTemplate(
         ])
     });
 }
+
+function hasAll(text: string, matches: string[]) {
+    return matches.every((match) => text.includes(match));
+}
@@ -12,7 +12,6 @@ import {defineChatSessionFunction} from "../../evaluator/LlamaChatSession/utils/
 import {getLlama} from "../../bindings/getLlama.js";
 import {LlamaGrammar} from "../../evaluator/LlamaGrammar.js";
 import {LlamaChatSession} from "../../evaluator/LlamaChatSession/LlamaChatSession.js";
-import {LlamaJsonSchemaGrammar} from "../../evaluator/LlamaJsonSchemaGrammar.js";
 import {
     BuildGpu, LlamaLogLevel, LlamaLogLevelGreaterThan, nodeLlamaCppGpuOptions, parseNodeLlamaCppGpuOption
 } from "../../bindings/types.js";
@@ -529,8 +528,7 @@ async function RunChat({
     });
 
     const grammar = jsonSchemaGrammarFilePath != null
-        ? new LlamaJsonSchemaGrammar(
-            llama,
+        ? await llama.createGrammarForJsonSchema(
             await fs.readJson(
                 path.resolve(process.cwd(), jsonSchemaGrammarFilePath)
             )
Original file line number	Diff line number	Diff line change
`@@ -84,8 +84,8 @@ export class QwenChatWrapper extends ChatWrapper {`
`84`	`84`	`segments: {`
`85`	`85`	`reiterateStackAfterFunctionCalls: true,`
`86`	`86`	`thought: {`
`87`		`- prefix: LlamaText(new SpecialTokensText("<think>")),`
`88`		`- suffix: LlamaText(new SpecialTokensText("</think>"))`
	`87`	`+ prefix: LlamaText(new SpecialTokensText("<think>\n")),`
	`88`	`+ suffix: LlamaText(new SpecialTokensText("\n</think>"))`
`89`	`89`	`}`
`90`	`90`	`}`
`91`	`91`	`};`
`@@ -247,7 +247,9 @@ export class QwenChatWrapper extends ChatWrapper {`
`247`	`247`	`public static override _getOptionConfigurationsToTestIfCanSupersedeJinjaTemplate(): ChatWrapperJinjaMatchConfiguration<typeof this> {`
`248`	`248`	`return [`
`249`	`249`	`[{}, {}, {_requireFunctionCallSettingsExtraction: true}],`
`250`		`- [{_lineBreakBeforeFunctionCallPrefix: true}, {}, {_requireFunctionCallSettingsExtraction: true}]`
	`250`	`+ [{_lineBreakBeforeFunctionCallPrefix: true}, {}, {_requireFunctionCallSettingsExtraction: true}],`
	`251`	`+ [{thoughts: "discourage"}, {}, {_requireFunctionCallSettingsExtraction: true}],`
	`252`	`+ [{thoughts: "discourage", _lineBreakBeforeFunctionCallPrefix: true}, {}, {_requireFunctionCallSettingsExtraction: true}]`
`251`	`253`	`];`
`252`	`254`	`}`
`253`	`255`	`}`