diff --git a/.vitepress/config/apiReferenceSidebar.ts b/.vitepress/config/apiReferenceSidebar.ts
index 8cb8aa9d..6b8c35ed 100644
--- a/.vitepress/config/apiReferenceSidebar.ts
+++ b/.vitepress/config/apiReferenceSidebar.ts
@@ -44,8 +44,12 @@ const chatWrappersOrder = [
"GeneralChatWrapper",
"TemplateChatWrapper",
"JinjaTemplateChatWrapper",
+ "QwenChatWrapper",
+ "HarmonyChatWrapper",
+ "SeedChatWrapper",
"DeepSeekChatWrapper",
"Llama3_1ChatWrapper",
+ "Llama3_2LightweightChatWrapper",
"Llama3ChatWrapper",
"Llama2ChatWrapper",
"MistralChatWrapper",
diff --git a/package-lock.json b/package-lock.json
index 138ef557..c2541230 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -53,7 +53,7 @@
"@nolebase/vitepress-plugin-og-image": "^2.17.0",
"@resvg/resvg-js": "^2.6.2",
"@semantic-release/exec": "^7.1.0",
- "@semantic-release/github": "11.0.4",
+ "@semantic-release/github": "11.0.5",
"@semantic-release/npm": "12.0.2",
"@shikijs/vitepress-twoslash": "^3.4.0",
"@stylistic/eslint-plugin": "^4.2.0",
@@ -106,12 +106,14 @@
"@node-llama-cpp/linux-armv7l": "0.1.0",
"@node-llama-cpp/linux-x64": "0.1.0",
"@node-llama-cpp/linux-x64-cuda": "0.1.0",
+ "@node-llama-cpp/linux-x64-cuda-ext": "0.1.0",
"@node-llama-cpp/linux-x64-vulkan": "0.1.0",
"@node-llama-cpp/mac-arm64-metal": "0.1.0",
"@node-llama-cpp/mac-x64": "0.1.0",
"@node-llama-cpp/win-arm64": "0.1.0",
"@node-llama-cpp/win-x64": "0.1.0",
"@node-llama-cpp/win-x64-cuda": "0.1.0",
+ "@node-llama-cpp/win-x64-cuda-ext": "0.1.0",
"@node-llama-cpp/win-x64-vulkan": "0.1.0"
},
"peerDependencies": {
@@ -2301,6 +2303,9 @@
"node_modules/@node-llama-cpp/linux-x64-cuda": {
"optional": true
},
+ "node_modules/@node-llama-cpp/linux-x64-cuda-ext": {
+ "optional": true
+ },
"node_modules/@node-llama-cpp/linux-x64-vulkan": {
"optional": true
},
@@ -2319,6 +2324,9 @@
"node_modules/@node-llama-cpp/win-x64-cuda": {
"optional": true
},
+ "node_modules/@node-llama-cpp/win-x64-cuda-ext": {
+ "optional": true
+ },
"node_modules/@node-llama-cpp/win-x64-vulkan": {
"optional": true
},
@@ -3622,9 +3630,9 @@
}
},
"node_modules/@semantic-release/github": {
- "version": "11.0.4",
- "resolved": "https://registry.npmjs.org/@semantic-release/github/-/github-11.0.4.tgz",
- "integrity": "sha512-fU/nLSjkp9DmB0h7FVO5imhhWJMvq2LjD4+3lz3ZAzpDLY9+KYwC+trJ+g7LbZeJv9y3L9fSFSg2DduUpiT6bw==",
+ "version": "11.0.5",
+ "resolved": "https://registry.npmjs.org/@semantic-release/github/-/github-11.0.5.tgz",
+ "integrity": "sha512-wJamzHteXwBdopvkTD6BJjPz1UHLm20twlVCSMA9zpd3B5KrOQX137jfTbNJT6ZVz3pXtg0S1DroQl4wifJ4WQ==",
"dev": true,
"license": "MIT",
"dependencies": {
diff --git a/package.json b/package.json
index ac229831..17b3a234 100644
--- a/package.json
+++ b/package.json
@@ -143,7 +143,7 @@
"@nolebase/vitepress-plugin-og-image": "^2.17.0",
"@resvg/resvg-js": "^2.6.2",
"@semantic-release/exec": "^7.1.0",
- "@semantic-release/github": "11.0.4",
+ "@semantic-release/github": "11.0.5",
"@semantic-release/npm": "12.0.2",
"@shikijs/vitepress-twoslash": "^3.4.0",
"@stylistic/eslint-plugin": "^4.2.0",
diff --git a/src/chatWrappers/SeedChatWrapper.ts b/src/chatWrappers/SeedChatWrapper.ts
new file mode 100644
index 00000000..3c8de7e2
--- /dev/null
+++ b/src/chatWrappers/SeedChatWrapper.ts
@@ -0,0 +1,255 @@
+import {ChatWrapper} from "../ChatWrapper.js";
+import {
+ ChatModelFunctions, ChatWrapperGenerateContextStateOptions, ChatWrapperGeneratedContextState, ChatWrapperSettings,
+ isChatModelResponseSegment
+} from "../types.js";
+import {SpecialToken, LlamaText, SpecialTokensText} from "../utils/LlamaText.js";
+import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctionsDocumentationGenerator.js";
+
+const defaultThinkingBudget = null;
+
+// source: https://huggingface.co/ByteDance-Seed/Seed-OSS-36B-Instruct/blob/main/chat_template.jinja
+export class SeedChatWrapper extends ChatWrapper {
+ public readonly wrapperName: string = "Seed";
+
+ public readonly thinkingBudget: number | 0 | null;
+
+ public override readonly settings: ChatWrapperSettings = {
+ supportsSystemMessages: true,
+ functions: {
+ call: {
+ optionalPrefixSpace: true,
+ prefix: LlamaText(new SpecialTokensText("\n"), "")),
+ suffix: LlamaText(new SpecialTokensText("\n\n\n")),
+ emptyCallParamsPlaceholder: {}
+ },
+ result: {
+ prefix: LlamaText(new SpecialTokensText("tool\n")),
+ suffix: LlamaText(new SpecialTokensText(""))
+ }
+ },
+ segments: {
+ thought: {
+ prefix: LlamaText(new SpecialTokensText("")),
+ suffix: LlamaText(new SpecialTokensText("")),
+ reopenAfterFunctionCalls: true
+ }
+ }
+ };
+
+ public constructor(options: {
+ /**
+ * The thinking budget to instruct the model to conform to.
+ *
+ * This is purely a request, the model may ignore it.
+ *
+ * Set to `0` to instruct the model to not use any reasoning.
+ *
+ * When set to `null`, the instruction will be omitted (unlimited reasoning).
+ *
+ * Defaults to `null`.
+ */
+ thinkingBudget?: number | 0 | null
+ } = {}) {
+ super();
+
+ const {
+ thinkingBudget = defaultThinkingBudget
+ } = options;
+
+ this.thinkingBudget = thinkingBudget;
+ }
+
+ public override generateContextState({
+ chatHistory, availableFunctions, documentFunctionParams
+ }: ChatWrapperGenerateContextStateOptions): ChatWrapperGeneratedContextState {
+ const hasFunctions = Object.keys(availableFunctions ?? {}).length > 0;
+ const modifiedChatHistory = chatHistory.slice();
+
+ let systemMessage: LlamaText = LlamaText();
+ if (modifiedChatHistory[0]?.type === "system") {
+ systemMessage = LlamaText.fromJSON(modifiedChatHistory[0].text);
+ modifiedChatHistory.shift();
+ }
+
+ const contextContent: LlamaText[] = [];
+
+ if (systemMessage.values.length > 0 || hasFunctions)
+ contextContent.push(
+ LlamaText([
+ new SpecialTokensText("system\n"),
+ this._getFirstSystemMessage(systemMessage, availableFunctions, {documentParams: documentFunctionParams}),
+ new SpecialTokensText("\n")
+ ])
+ );
+
+ const thinkingBudgetSystemMessage = this._getThinkingBudgetSystemMessage();
+ if (thinkingBudgetSystemMessage.values.length > 0)
+ contextContent.push(
+ LlamaText([
+ new SpecialTokensText("system\n"),
+ thinkingBudgetSystemMessage,
+ new SpecialTokensText("\n")
+ ])
+ );
+
+ for (let i = 0; i < modifiedChatHistory.length; i++) {
+ const isLastItem = i === modifiedChatHistory.length - 1;
+ const item = modifiedChatHistory[i];
+
+ if (item == null)
+ continue;
+
+ if (item.type === "system") {
+ contextContent.push(
+ LlamaText([
+ new SpecialTokensText("system\n"),
+ LlamaText.fromJSON(item.text),
+ isLastItem
+ ? LlamaText([])
+ : new SpecialTokensText("\n")
+ ])
+ );
+ } else if (item.type === "user") {
+ contextContent.push(
+ LlamaText([
+ new SpecialTokensText("system\n"),
+ item.text,
+ isLastItem
+ ? LlamaText([])
+ : new SpecialTokensText("\n")
+ ])
+ );
+ } else if (item.type === "model") {
+ const injectNoThinkingThought = this.thinkingBudget === 0 && (
+ isLastItem ||
+ !item.response.some(
+ (item) => (
+ isChatModelResponseSegment(item) && item.segmentType === "thought"
+ )
+ )
+ );
+
+ contextContent.push(
+ LlamaText([
+ new SpecialTokensText("assistant\n"),
+ !injectNoThinkingThought
+ ? []
+ : [
+ new SpecialTokensText("\n"),
+ [
+ new SpecialTokensText(""),
+ "The current thinking budget is 0, so I will directly start answering the question.",
+ new SpecialTokensText("")
+ ],
+ new SpecialTokensText("\n")
+ ],
+ this.generateModelResponseText(item.response, true),
+ isLastItem
+ ? LlamaText([])
+ : new SpecialTokensText("\n")
+ ])
+ );
+ } else
+ void (item satisfies never);
+ }
+
+ const contextText = LlamaText(contextContent);
+
+ return {
+ contextText,
+ stopGenerationTriggers: [
+ LlamaText(new SpecialToken("EOS")),
+ LlamaText(new SpecialTokensText("")),
+ LlamaText("")
+ ]
+ };
+ }
+
+ public override generateAvailableFunctionsSystemText(availableFunctions: ChatModelFunctions, {documentParams = true}: {
+ documentParams?: boolean
+ }) {
+ const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions);
+
+ if (!functionsDocumentationGenerator.hasAnyFunctions)
+ return LlamaText([]);
+
+ return LlamaText.joinValues("\n", [
+ "",
+ "Tool List:",
+ (
+ "You are authorized to use the following tools (described in JSON Schema format). " +
+ "Before performing any task, you must decide how to call them based on the descriptions and parameters of these tools."
+ ),
+ functionsDocumentationGenerator.getSeedFunctionSignatures({documentParams}),
+ "When invoking tools, strictly adhere to the following format:", // the original text for this is in Chinese, translated to English here
+ new SpecialTokensText("\n\n{\"example_parameter_1\": \"value_1\", \"example_parameter_2\": \"This is the value for the second parameter\"}\n")
+ ]);
+ }
+
+ /** @internal */
+ private _getFirstSystemMessage(
+ systemPrompt: LlamaText,
+ availableFunctions?: ChatModelFunctions,
+ {documentParams = true}: {documentParams?: boolean} = {}
+ ) {
+ const res: LlamaText[] = [];
+
+ const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions);
+
+ if (systemPrompt.values.length === 0 && functionsDocumentationGenerator.hasAnyFunctions)
+ res.push(
+ LlamaText("You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query.")
+ );
+ else if (systemPrompt.values.length > 0)
+ res.push(systemPrompt);
+
+ if (functionsDocumentationGenerator.hasAnyFunctions)
+ res.push(this.generateAvailableFunctionsSystemText(availableFunctions!, {documentParams}));
+
+ return LlamaText(res);
+ }
+
+ /** @internal */
+ private _getThinkingBudgetSystemMessage() {
+ if (this.thinkingBudget == null || this.thinkingBudget < 0)
+ return LlamaText([]);
+
+ if (this.thinkingBudget === 0)
+ return LlamaText([
+ "You are an intelligent assistant that can answer questions in one step without the need for reasoning and thinking, " +
+ "that is, your thinking budget is 0. " +
+ "Next, please skip the thinking process and directly start answering the user's questions."
+ ]);
+
+ let reflectionInterval: number = 1024;
+ const reflectionIntervals = new Map([
+ [16384, 1024],
+ [8192, 1024],
+ [4096, 512],
+ [2048, 512],
+ [1024, 256],
+ [512, 128],
+ [0, 0]
+ ]);
+ for (const [maxBudget, interval] of reflectionIntervals.entries()) {
+ if (this.thinkingBudget <= maxBudget) {
+ reflectionInterval = interval;
+ break;
+ }
+ }
+
+ return LlamaText([
+ new SpecialTokensText("system\n"),
+ "You are an intelligent assistant with reflective ability. In the process of thinking and reasoning, you need to strictly follow the thinking budget, which is ",
+ this.thinkingBudget,
+ ". That is, you need to complete your thinking within ",
+ this.thinkingBudget,
+ " tokens and start answering the user's questions. You will reflect on your thinking process every ",
+ reflectionInterval,
+ " tokens, stating how many tokens have been used and how many are left.",
+ new SpecialTokensText("\n")
+ ]);
+ }
+}
diff --git a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts
index c92bec3c..1b797bd8 100644
--- a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts
+++ b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts
@@ -191,6 +191,10 @@ export class ChatModelFunctionsDocumentationGenerator {
.join("\n");
}
+ public getSeedFunctionSignatures({documentParams = true}: {documentParams?: boolean} = {}) {
+ return jsonDumps(this._convertToJinjaTools({documentParams}));
+ }
+
/** @internal */
private _convertToJinjaTools({documentParams = true}: {documentParams?: boolean} = {}) {
const chatModelFunctions = this.chatModelFunctions;
diff --git a/src/chatWrappers/utils/resolveChatWrapper.ts b/src/chatWrappers/utils/resolveChatWrapper.ts
index abc25c46..8cadf9e8 100644
--- a/src/chatWrappers/utils/resolveChatWrapper.ts
+++ b/src/chatWrappers/utils/resolveChatWrapper.ts
@@ -19,6 +19,7 @@ import {includesText} from "../../utils/includesText.js";
import {LlamaModel} from "../../evaluator/LlamaModel/LlamaModel.js";
import {QwenChatWrapper} from "../QwenChatWrapper.js";
import {HarmonyChatWrapper} from "../HarmonyChatWrapper.js";
+import {SeedChatWrapper} from "../SeedChatWrapper.js";
import {isJinjaTemplateEquivalentToSpecializedChatWrapper} from "./isJinjaTemplateEquivalentToSpecializedChatWrapper.js";
import {getModelLinageNames} from "./getModelLinageNames.js";
import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js";
@@ -26,7 +27,7 @@ import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js";
export const specializedChatWrapperTypeNames = Object.freeze([
"general", "deepSeek", "qwen", "llama3.2-lightweight", "llama3.1", "llama3", "llama2Chat", "mistral", "alpacaChat", "functionary",
- "chatML", "falconChat", "gemma", "harmony"
+ "chatML", "falconChat", "gemma", "harmony", "seed"
] as const);
export type SpecializedChatWrapperTypeName = (typeof specializedChatWrapperTypeNames)[number];
@@ -57,6 +58,7 @@ export const chatWrappers = Object.freeze({
"falconChat": FalconChatWrapper,
"gemma": GemmaChatWrapper,
"harmony": HarmonyChatWrapper,
+ "seed": SeedChatWrapper,
"template": TemplateChatWrapper,
"jinjaTemplate": JinjaTemplateChatWrapper
} as const satisfies Record);
@@ -366,12 +368,18 @@ export function resolveChatWrapper(
return createSpecializedChatWrapper(GemmaChatWrapper);
else if (includesText(modelNames, ["gpt-oss", "Gpt Oss", "Gpt-Oss", "openai_gpt-oss", "Openai_Gpt Oss", "openai.gpt-oss", "Openai.Gpt Oss"]))
return createSpecializedChatWrapper(HarmonyChatWrapper);
+ else if (includesText(modelNames, ["seed-oss", "Seed Oss", "Seed OSS", "Seed-Oss", "Seed-OSS", "ByteDance-Seed_Seed-OSS", "ByteDance-Seed.Seed-OSS"]))
+ return createSpecializedChatWrapper(SeedChatWrapper);
}
// try to find a pattern in the Jinja template to resolve to a specialized chat wrapper,
// with a logic similar to `llama.cpp`'s `llama_chat_apply_template_internal` function
if (modelJinjaTemplate != null && modelJinjaTemplate.trim() !== "") {
- if (modelJinjaTemplate.includes("<|start|>") && modelJinjaTemplate.includes("<|channel|>"))
+ if (modelJinjaTemplate.includes("") || (
+ modelJinjaTemplate.includes("") && modelJinjaTemplate.includes("")
+ ))
+ return createSpecializedChatWrapper(SeedChatWrapper);
+ else if (modelJinjaTemplate.includes("<|start|>") && modelJinjaTemplate.includes("<|channel|>"))
return createSpecializedChatWrapper(HarmonyChatWrapper);
else if (modelJinjaTemplate.includes("<|im_start|>"))
return createSpecializedChatWrapper(ChatMLChatWrapper);
diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts
index 458c9b3b..47b06166 100644
--- a/src/cli/recommendedModels.ts
+++ b/src/cli/recommendedModels.ts
@@ -92,6 +92,20 @@ export const recommendedModels: ModelRecommendation[] = [{
fileOptions: [
"hf:Qwen/Qwen3-0.6B-GGUF:Q8_0"
]
+}, {
+ name: "Seed OSS 36B",
+ abilities: ["chat", "complete", "functionCalling", "reasoning"],
+ description: "The Seed OSS model was created by ByteDance and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
+ "It's optimized for agentic use cases, with native support for function calling and flexible control of the thinking budget (via `SeedChatWrapper` options).\n" +
+ "This model can support a context size of up to 512K tokens (if you have enough VRAM to accommodate it).\n" +
+ "This is a 36 billion parameters model.",
+
+ fileOptions: [
+ "hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q8_0",
+ "hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q6_K",
+ "hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q5_K_M",
+ "hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q4_K_M"
+ ]
}, {
name: "DeepSeek R1 Distill Qwen 7B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],