diff --git a/.vitepress/config/apiReferenceSidebar.ts b/.vitepress/config/apiReferenceSidebar.ts index 8cb8aa9d..6b8c35ed 100644 --- a/.vitepress/config/apiReferenceSidebar.ts +++ b/.vitepress/config/apiReferenceSidebar.ts @@ -44,8 +44,12 @@ const chatWrappersOrder = [ "GeneralChatWrapper", "TemplateChatWrapper", "JinjaTemplateChatWrapper", + "QwenChatWrapper", + "HarmonyChatWrapper", + "SeedChatWrapper", "DeepSeekChatWrapper", "Llama3_1ChatWrapper", + "Llama3_2LightweightChatWrapper", "Llama3ChatWrapper", "Llama2ChatWrapper", "MistralChatWrapper", diff --git a/package-lock.json b/package-lock.json index 138ef557..c2541230 100644 --- a/package-lock.json +++ b/package-lock.json @@ -53,7 +53,7 @@ "@nolebase/vitepress-plugin-og-image": "^2.17.0", "@resvg/resvg-js": "^2.6.2", "@semantic-release/exec": "^7.1.0", - "@semantic-release/github": "11.0.4", + "@semantic-release/github": "11.0.5", "@semantic-release/npm": "12.0.2", "@shikijs/vitepress-twoslash": "^3.4.0", "@stylistic/eslint-plugin": "^4.2.0", @@ -106,12 +106,14 @@ "@node-llama-cpp/linux-armv7l": "0.1.0", "@node-llama-cpp/linux-x64": "0.1.0", "@node-llama-cpp/linux-x64-cuda": "0.1.0", + "@node-llama-cpp/linux-x64-cuda-ext": "0.1.0", "@node-llama-cpp/linux-x64-vulkan": "0.1.0", "@node-llama-cpp/mac-arm64-metal": "0.1.0", "@node-llama-cpp/mac-x64": "0.1.0", "@node-llama-cpp/win-arm64": "0.1.0", "@node-llama-cpp/win-x64": "0.1.0", "@node-llama-cpp/win-x64-cuda": "0.1.0", + "@node-llama-cpp/win-x64-cuda-ext": "0.1.0", "@node-llama-cpp/win-x64-vulkan": "0.1.0" }, "peerDependencies": { @@ -2301,6 +2303,9 @@ "node_modules/@node-llama-cpp/linux-x64-cuda": { "optional": true }, + "node_modules/@node-llama-cpp/linux-x64-cuda-ext": { + "optional": true + }, "node_modules/@node-llama-cpp/linux-x64-vulkan": { "optional": true }, @@ -2319,6 +2324,9 @@ "node_modules/@node-llama-cpp/win-x64-cuda": { "optional": true }, + "node_modules/@node-llama-cpp/win-x64-cuda-ext": { + "optional": true + }, "node_modules/@node-llama-cpp/win-x64-vulkan": { "optional": true }, @@ -3622,9 +3630,9 @@ } }, "node_modules/@semantic-release/github": { - "version": "11.0.4", - "resolved": "https://registry.npmjs.org/@semantic-release/github/-/github-11.0.4.tgz", - "integrity": "sha512-fU/nLSjkp9DmB0h7FVO5imhhWJMvq2LjD4+3lz3ZAzpDLY9+KYwC+trJ+g7LbZeJv9y3L9fSFSg2DduUpiT6bw==", + "version": "11.0.5", + "resolved": "https://registry.npmjs.org/@semantic-release/github/-/github-11.0.5.tgz", + "integrity": "sha512-wJamzHteXwBdopvkTD6BJjPz1UHLm20twlVCSMA9zpd3B5KrOQX137jfTbNJT6ZVz3pXtg0S1DroQl4wifJ4WQ==", "dev": true, "license": "MIT", "dependencies": { diff --git a/package.json b/package.json index ac229831..17b3a234 100644 --- a/package.json +++ b/package.json @@ -143,7 +143,7 @@ "@nolebase/vitepress-plugin-og-image": "^2.17.0", "@resvg/resvg-js": "^2.6.2", "@semantic-release/exec": "^7.1.0", - "@semantic-release/github": "11.0.4", + "@semantic-release/github": "11.0.5", "@semantic-release/npm": "12.0.2", "@shikijs/vitepress-twoslash": "^3.4.0", "@stylistic/eslint-plugin": "^4.2.0", diff --git a/src/chatWrappers/SeedChatWrapper.ts b/src/chatWrappers/SeedChatWrapper.ts new file mode 100644 index 00000000..3c8de7e2 --- /dev/null +++ b/src/chatWrappers/SeedChatWrapper.ts @@ -0,0 +1,255 @@ +import {ChatWrapper} from "../ChatWrapper.js"; +import { + ChatModelFunctions, ChatWrapperGenerateContextStateOptions, ChatWrapperGeneratedContextState, ChatWrapperSettings, + isChatModelResponseSegment +} from "../types.js"; +import {SpecialToken, LlamaText, SpecialTokensText} from "../utils/LlamaText.js"; +import {ChatModelFunctionsDocumentationGenerator} from "./utils/ChatModelFunctionsDocumentationGenerator.js"; + +const defaultThinkingBudget = null; + +// source: https://huggingface.co/ByteDance-Seed/Seed-OSS-36B-Instruct/blob/main/chat_template.jinja +export class SeedChatWrapper extends ChatWrapper { + public readonly wrapperName: string = "Seed"; + + public readonly thinkingBudget: number | 0 | null; + + public override readonly settings: ChatWrapperSettings = { + supportsSystemMessages: true, + functions: { + call: { + optionalPrefixSpace: true, + prefix: LlamaText(new SpecialTokensText("\n"), "")), + suffix: LlamaText(new SpecialTokensText("\n\n\n")), + emptyCallParamsPlaceholder: {} + }, + result: { + prefix: LlamaText(new SpecialTokensText("tool\n")), + suffix: LlamaText(new SpecialTokensText("")) + } + }, + segments: { + thought: { + prefix: LlamaText(new SpecialTokensText("")), + suffix: LlamaText(new SpecialTokensText("")), + reopenAfterFunctionCalls: true + } + } + }; + + public constructor(options: { + /** + * The thinking budget to instruct the model to conform to. + * + * This is purely a request, the model may ignore it. + * + * Set to `0` to instruct the model to not use any reasoning. + * + * When set to `null`, the instruction will be omitted (unlimited reasoning). + * + * Defaults to `null`. + */ + thinkingBudget?: number | 0 | null + } = {}) { + super(); + + const { + thinkingBudget = defaultThinkingBudget + } = options; + + this.thinkingBudget = thinkingBudget; + } + + public override generateContextState({ + chatHistory, availableFunctions, documentFunctionParams + }: ChatWrapperGenerateContextStateOptions): ChatWrapperGeneratedContextState { + const hasFunctions = Object.keys(availableFunctions ?? {}).length > 0; + const modifiedChatHistory = chatHistory.slice(); + + let systemMessage: LlamaText = LlamaText(); + if (modifiedChatHistory[0]?.type === "system") { + systemMessage = LlamaText.fromJSON(modifiedChatHistory[0].text); + modifiedChatHistory.shift(); + } + + const contextContent: LlamaText[] = []; + + if (systemMessage.values.length > 0 || hasFunctions) + contextContent.push( + LlamaText([ + new SpecialTokensText("system\n"), + this._getFirstSystemMessage(systemMessage, availableFunctions, {documentParams: documentFunctionParams}), + new SpecialTokensText("\n") + ]) + ); + + const thinkingBudgetSystemMessage = this._getThinkingBudgetSystemMessage(); + if (thinkingBudgetSystemMessage.values.length > 0) + contextContent.push( + LlamaText([ + new SpecialTokensText("system\n"), + thinkingBudgetSystemMessage, + new SpecialTokensText("\n") + ]) + ); + + for (let i = 0; i < modifiedChatHistory.length; i++) { + const isLastItem = i === modifiedChatHistory.length - 1; + const item = modifiedChatHistory[i]; + + if (item == null) + continue; + + if (item.type === "system") { + contextContent.push( + LlamaText([ + new SpecialTokensText("system\n"), + LlamaText.fromJSON(item.text), + isLastItem + ? LlamaText([]) + : new SpecialTokensText("\n") + ]) + ); + } else if (item.type === "user") { + contextContent.push( + LlamaText([ + new SpecialTokensText("system\n"), + item.text, + isLastItem + ? LlamaText([]) + : new SpecialTokensText("\n") + ]) + ); + } else if (item.type === "model") { + const injectNoThinkingThought = this.thinkingBudget === 0 && ( + isLastItem || + !item.response.some( + (item) => ( + isChatModelResponseSegment(item) && item.segmentType === "thought" + ) + ) + ); + + contextContent.push( + LlamaText([ + new SpecialTokensText("assistant\n"), + !injectNoThinkingThought + ? [] + : [ + new SpecialTokensText("\n"), + [ + new SpecialTokensText(""), + "The current thinking budget is 0, so I will directly start answering the question.", + new SpecialTokensText("") + ], + new SpecialTokensText("\n") + ], + this.generateModelResponseText(item.response, true), + isLastItem + ? LlamaText([]) + : new SpecialTokensText("\n") + ]) + ); + } else + void (item satisfies never); + } + + const contextText = LlamaText(contextContent); + + return { + contextText, + stopGenerationTriggers: [ + LlamaText(new SpecialToken("EOS")), + LlamaText(new SpecialTokensText("")), + LlamaText("") + ] + }; + } + + public override generateAvailableFunctionsSystemText(availableFunctions: ChatModelFunctions, {documentParams = true}: { + documentParams?: boolean + }) { + const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions); + + if (!functionsDocumentationGenerator.hasAnyFunctions) + return LlamaText([]); + + return LlamaText.joinValues("\n", [ + "", + "Tool List:", + ( + "You are authorized to use the following tools (described in JSON Schema format). " + + "Before performing any task, you must decide how to call them based on the descriptions and parameters of these tools." + ), + functionsDocumentationGenerator.getSeedFunctionSignatures({documentParams}), + "When invoking tools, strictly adhere to the following format:", // the original text for this is in Chinese, translated to English here + new SpecialTokensText("\n\n{\"example_parameter_1\": \"value_1\", \"example_parameter_2\": \"This is the value for the second parameter\"}\n") + ]); + } + + /** @internal */ + private _getFirstSystemMessage( + systemPrompt: LlamaText, + availableFunctions?: ChatModelFunctions, + {documentParams = true}: {documentParams?: boolean} = {} + ) { + const res: LlamaText[] = []; + + const functionsDocumentationGenerator = new ChatModelFunctionsDocumentationGenerator(availableFunctions); + + if (systemPrompt.values.length === 0 && functionsDocumentationGenerator.hasAnyFunctions) + res.push( + LlamaText("You are Doubao, a helpful AI assistant. You may call one or more functions to assist with the user query.") + ); + else if (systemPrompt.values.length > 0) + res.push(systemPrompt); + + if (functionsDocumentationGenerator.hasAnyFunctions) + res.push(this.generateAvailableFunctionsSystemText(availableFunctions!, {documentParams})); + + return LlamaText(res); + } + + /** @internal */ + private _getThinkingBudgetSystemMessage() { + if (this.thinkingBudget == null || this.thinkingBudget < 0) + return LlamaText([]); + + if (this.thinkingBudget === 0) + return LlamaText([ + "You are an intelligent assistant that can answer questions in one step without the need for reasoning and thinking, " + + "that is, your thinking budget is 0. " + + "Next, please skip the thinking process and directly start answering the user's questions." + ]); + + let reflectionInterval: number = 1024; + const reflectionIntervals = new Map([ + [16384, 1024], + [8192, 1024], + [4096, 512], + [2048, 512], + [1024, 256], + [512, 128], + [0, 0] + ]); + for (const [maxBudget, interval] of reflectionIntervals.entries()) { + if (this.thinkingBudget <= maxBudget) { + reflectionInterval = interval; + break; + } + } + + return LlamaText([ + new SpecialTokensText("system\n"), + "You are an intelligent assistant with reflective ability. In the process of thinking and reasoning, you need to strictly follow the thinking budget, which is ", + this.thinkingBudget, + ". That is, you need to complete your thinking within ", + this.thinkingBudget, + " tokens and start answering the user's questions. You will reflect on your thinking process every ", + reflectionInterval, + " tokens, stating how many tokens have been used and how many are left.", + new SpecialTokensText("\n") + ]); + } +} diff --git a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts index c92bec3c..1b797bd8 100644 --- a/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts +++ b/src/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.ts @@ -191,6 +191,10 @@ export class ChatModelFunctionsDocumentationGenerator { .join("\n"); } + public getSeedFunctionSignatures({documentParams = true}: {documentParams?: boolean} = {}) { + return jsonDumps(this._convertToJinjaTools({documentParams})); + } + /** @internal */ private _convertToJinjaTools({documentParams = true}: {documentParams?: boolean} = {}) { const chatModelFunctions = this.chatModelFunctions; diff --git a/src/chatWrappers/utils/resolveChatWrapper.ts b/src/chatWrappers/utils/resolveChatWrapper.ts index abc25c46..8cadf9e8 100644 --- a/src/chatWrappers/utils/resolveChatWrapper.ts +++ b/src/chatWrappers/utils/resolveChatWrapper.ts @@ -19,6 +19,7 @@ import {includesText} from "../../utils/includesText.js"; import {LlamaModel} from "../../evaluator/LlamaModel/LlamaModel.js"; import {QwenChatWrapper} from "../QwenChatWrapper.js"; import {HarmonyChatWrapper} from "../HarmonyChatWrapper.js"; +import {SeedChatWrapper} from "../SeedChatWrapper.js"; import {isJinjaTemplateEquivalentToSpecializedChatWrapper} from "./isJinjaTemplateEquivalentToSpecializedChatWrapper.js"; import {getModelLinageNames} from "./getModelLinageNames.js"; import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js"; @@ -26,7 +27,7 @@ import type {GgufFileInfo} from "../../gguf/types/GgufFileInfoTypes.js"; export const specializedChatWrapperTypeNames = Object.freeze([ "general", "deepSeek", "qwen", "llama3.2-lightweight", "llama3.1", "llama3", "llama2Chat", "mistral", "alpacaChat", "functionary", - "chatML", "falconChat", "gemma", "harmony" + "chatML", "falconChat", "gemma", "harmony", "seed" ] as const); export type SpecializedChatWrapperTypeName = (typeof specializedChatWrapperTypeNames)[number]; @@ -57,6 +58,7 @@ export const chatWrappers = Object.freeze({ "falconChat": FalconChatWrapper, "gemma": GemmaChatWrapper, "harmony": HarmonyChatWrapper, + "seed": SeedChatWrapper, "template": TemplateChatWrapper, "jinjaTemplate": JinjaTemplateChatWrapper } as const satisfies Record); @@ -366,12 +368,18 @@ export function resolveChatWrapper( return createSpecializedChatWrapper(GemmaChatWrapper); else if (includesText(modelNames, ["gpt-oss", "Gpt Oss", "Gpt-Oss", "openai_gpt-oss", "Openai_Gpt Oss", "openai.gpt-oss", "Openai.Gpt Oss"])) return createSpecializedChatWrapper(HarmonyChatWrapper); + else if (includesText(modelNames, ["seed-oss", "Seed Oss", "Seed OSS", "Seed-Oss", "Seed-OSS", "ByteDance-Seed_Seed-OSS", "ByteDance-Seed.Seed-OSS"])) + return createSpecializedChatWrapper(SeedChatWrapper); } // try to find a pattern in the Jinja template to resolve to a specialized chat wrapper, // with a logic similar to `llama.cpp`'s `llama_chat_apply_template_internal` function if (modelJinjaTemplate != null && modelJinjaTemplate.trim() !== "") { - if (modelJinjaTemplate.includes("<|start|>") && modelJinjaTemplate.includes("<|channel|>")) + if (modelJinjaTemplate.includes("") || ( + modelJinjaTemplate.includes("") && modelJinjaTemplate.includes("") + )) + return createSpecializedChatWrapper(SeedChatWrapper); + else if (modelJinjaTemplate.includes("<|start|>") && modelJinjaTemplate.includes("<|channel|>")) return createSpecializedChatWrapper(HarmonyChatWrapper); else if (modelJinjaTemplate.includes("<|im_start|>")) return createSpecializedChatWrapper(ChatMLChatWrapper); diff --git a/src/cli/recommendedModels.ts b/src/cli/recommendedModels.ts index 458c9b3b..47b06166 100644 --- a/src/cli/recommendedModels.ts +++ b/src/cli/recommendedModels.ts @@ -92,6 +92,20 @@ export const recommendedModels: ModelRecommendation[] = [{ fileOptions: [ "hf:Qwen/Qwen3-0.6B-GGUF:Q8_0" ] +}, { + name: "Seed OSS 36B", + abilities: ["chat", "complete", "functionCalling", "reasoning"], + description: "The Seed OSS model was created by ByteDance and is using chain of though (CoT) to reason across a wide variety of topics.\n" + + "It's optimized for agentic use cases, with native support for function calling and flexible control of the thinking budget (via `SeedChatWrapper` options).\n" + + "This model can support a context size of up to 512K tokens (if you have enough VRAM to accommodate it).\n" + + "This is a 36 billion parameters model.", + + fileOptions: [ + "hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q8_0", + "hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q6_K", + "hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q5_K_M", + "hf:giladgd/Seed-OSS-36B-Instruct-GGUF:Q4_K_M" + ] }, { name: "DeepSeek R1 Distill Qwen 7B", abilities: ["chat", "complete", "functionCalling", "reasoning"],