Skip to content

Commit ea8d904

Browse files
authored
feat: reasoning budget (#468)
* feat: reasoning budget * feat: SWA (Sliding Window Attention) support - greatly reduced context memory consumption on supported models * fix: prompt completion edge cases * fix: adapt to `llama.cpp` changes * fix: bugs * docs: generate an `llms.txt` file
1 parent 1799127 commit ea8d904

38 files changed

+1518
-470
lines changed

.vitepress/config.ts

Lines changed: 20 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@ import {rehype} from "rehype";
1212
import sharp from "sharp";
1313
import {GitChangelog, GitChangelogMarkdownSection} from "@nolebase/vitepress-plugin-git-changelog/vite";
1414
import {buildEndGenerateOpenGraphImages} from "@nolebase/vitepress-plugin-og-image/vitepress";
15+
import llmstxt from "vitepress-plugin-llms";
1516
import {Resvg, initWasm as initResvgWasm, type ResvgRenderOptions} from "@resvg/resvg-wasm";
1617
import {BlogPageInfoPlugin} from "./config/BlogPageInfoPlugin.js";
17-
import {getApiReferenceSidebar} from "./config/apiReferenceSidebar.js";
1818
import {ensureLocalImage} from "./utils/ensureLocalImage.js";
1919
import {getExcerptFromMarkdownFile} from "./utils/getExcerptFromMarkdownFile.js";
20+
import {getVitepressSidebar, getVitepressSidebarWithBlog} from "./config/sidebar.js";
21+
import {getBlogPosts} from "./config/getBlogPosts.js";
2022
import type {Element as HastElement, Parent} from "hast";
2123

2224
import type {Node as UnistNode} from "unist";
@@ -365,6 +367,12 @@ export default defineConfig({
365367
}) as VitepressPlugin,
366368
BlogPageInfoPlugin({
367369
include: (id) => id.includes(path.sep + "blog" + path.sep) && !id.endsWith(path.sep + "blog" + path.sep + "index.md")
370+
}),
371+
llmstxt({
372+
ignoreFiles: ["index.md"],
373+
domain: resolveHref("/test").slice(0, -"/test".length) || undefined,
374+
excludeBlog: false,
375+
sidebar: () => getVitepressSidebarWithBlog(true, false)
368376
})
369377
],
370378
build: {
@@ -434,6 +442,9 @@ export default defineConfig({
434442
}, {
435443
text: "GitHub Discussions",
436444
link: "https://github.com/withcatai/node-llama-cpp/discussions"
445+
}, {
446+
text: "Awesome List",
447+
link: "/guide/awesome"
437448
}, {
438449
text: "Contribute",
439450
link: "/guide/contributing"
@@ -469,100 +480,14 @@ export default defineConfig({
469480
}
470481
}
471482
},
472-
sidebar: {
473-
"/guide/": [{
474-
text: "Guide",
475-
base: "/guide",
476-
items: [
477-
{text: "Getting Started", link: "/"},
478-
{text: "Chat Session", link: "/chat-session"},
479-
{text: "Chat Wrapper", link: "/chat-wrapper"},
480-
{text: "Grammar", link: "/grammar"},
481-
{text: "Function Calling", link: "/function-calling"},
482-
{text: "Embedding", link: "/embedding"},
483-
{text: "Text Completion", link: "/text-completion"},
484-
{text: "Choosing a Model", link: "/choosing-a-model"},
485-
{text: "Downloading Models", link: "/downloading-models"}
486-
]
487-
}, {
488-
text: "Advanced",
489-
base: "/guide",
490-
items: [
491-
{text: "Building From Source", link: "/building-from-source"},
492-
{text: "Metal Support", link: "/Metal"},
493-
{text: "CUDA Support", link: "/CUDA"},
494-
{text: "Vulkan Support", link: "/Vulkan"},
495-
{text: "Electron Support", link: "/electron"},
496-
{text: "Using in Docker", link: "/docker"},
497-
{text: "Using Tokens", link: "/tokens"},
498-
{text: "LlamaText", link: "/llama-text"},
499-
{text: "External Chat State", link: "/external-chat-state"},
500-
{text: "Token Bias", link: "/token-bias"},
501-
{text: "Objects Lifecycle", link: "/objects-lifecycle"},
502-
{text: "Chat Context Shift", link: "/chat-context-shift"},
503-
{text: "Batching", link: "/batching"},
504-
{text: "Token Prediction", link: "/token-prediction"},
505-
{text: "Low Level API", link: "/low-level-api"},
506-
{text: "Awesome List", link: "/awesome"},
507-
{text: "Troubleshooting", link: "/troubleshooting"},
508-
{text: "Tips and Tricks", link: "/tips-and-tricks"}
509-
]
510-
}, {
511-
text: "Contributing",
512-
base: "/guide",
513-
items: [
514-
{text: "Setting Up a Dev Environment", link: "/development"},
515-
{text: "Pull Request Guidelines", link: "/contributing"}
516-
]
517-
}],
518-
519-
"/cli/": [{
520-
text: "CLI",
521-
base: "/cli",
522-
link: "/",
523-
items: [
524-
{text: "Init", link: "/init"},
525-
{text: "Chat", link: "/chat"},
526-
{text: "Pull", link: "/pull"},
527-
{
528-
text: "Source",
529-
link: "/source",
530-
collapsed: true,
531-
items: [
532-
{text: "Download", link: "/source/download"},
533-
{text: "Build", link: "/source/build"},
534-
{text: "Clear", link: "/source/clear"}
535-
]
536-
},
537-
{text: "Complete", link: "/complete"},
538-
{text: "Infill", link: "/infill"},
539-
{
540-
text: "Inspect",
541-
link: "/inspect",
542-
collapsed: true,
543-
items: [
544-
{text: "GPU", link: "/inspect/gpu"},
545-
{text: "GGUF", link: "/inspect/gguf"},
546-
{text: "Measure", link: "/inspect/measure"},
547-
{text: "Estimate", link: "/inspect/estimate"}
548-
]
549-
}
550-
]
551-
}],
552-
553-
"/api/": getApiReferenceSidebar()
554-
},
483+
sidebar: getVitepressSidebar(),
555484
socialLinks: [
556485
{icon: "npm", link: "https://www.npmjs.com/package/node-llama-cpp"},
557486
{icon: "github", link: "https://github.com/withcatai/node-llama-cpp"}
558487
]
559488
},
560489
async buildEnd(siteConfig) {
561-
const blogPosts = await createContentLoader("blog/*.md", {
562-
excerpt: true,
563-
render: true
564-
})
565-
.load();
490+
const blogPosts = await getBlogPosts(false);
566491

567492
async function loadSvgFontBuffers() {
568493
const interFontFilesDirectoryPath = path.join(require.resolve("@fontsource/inter"), "..", "files");
@@ -699,24 +624,7 @@ export default defineConfig({
699624
...siteConfig.site,
700625
themeConfig: {
701626
...siteConfig.site.themeConfig,
702-
sidebar: {
703-
...siteConfig.site.themeConfig.sidebar,
704-
"/_blog/": {
705-
text: "Blog",
706-
link: "/blog/",
707-
items: blogPosts
708-
.filter((post) => {
709-
const hasCoverImage = typeof post.frontmatter?.image === "string" ||
710-
typeof post.frontmatter?.image?.url === "string";
711-
712-
return !hasCoverImage;
713-
})
714-
.map((post) => ({
715-
text: post.frontmatter.title,
716-
link: post.url
717-
}))
718-
}
719-
}
627+
sidebar: await getVitepressSidebarWithBlog(true, true)
720628
}
721629
}
722630
});
@@ -744,22 +652,6 @@ export default defineConfig({
744652
hub: "https://pubsubhubbub.appspot.com/"
745653
});
746654

747-
blogPosts.sort((a, b) => {
748-
const aDate = a.frontmatter.date
749-
? new Date(a.frontmatter.date)
750-
: null;
751-
const bDate = b.frontmatter.date
752-
? new Date(b.frontmatter.date)
753-
: null;
754-
755-
if (aDate == null)
756-
return -1;
757-
if (bDate == null)
758-
return 1;
759-
760-
return bDate.getTime() - aDate.getTime();
761-
});
762-
763655
for (const {url, frontmatter, html, src, excerpt: originalExcerpt} of blogPosts) {
764656
const ogImageElement = findElementInHtml(html, (element) => (
765657
element.tagName === "meta" && (element.properties?.name === "og:image" || element.properties?.property === "og:image")
@@ -819,12 +711,6 @@ export default defineConfig({
819711

820712
await addOgImages();
821713

822-
const indexPageIndex = blogPosts.findIndex((post) => post.url === "/blog/");
823-
if (indexPageIndex < 0)
824-
throw new Error("Blog index page not found");
825-
826-
blogPosts.splice(indexPageIndex, 1);
827-
828714
await addBlogRssFeed();
829715

830716
try {
@@ -853,6 +739,11 @@ export default defineConfig({
853739
path.join(siteConfig.outDir, "logo.preview.avif"),
854740
24
855741
);
742+
743+
await Promise.all([
744+
fs.copy(path.join(siteConfig.outDir, "llms.txt"), path.join(siteConfig.outDir, "llms.md")),
745+
fs.copy(path.join(siteConfig.outDir, "llms-full.txt"), path.join(siteConfig.outDir, "llms-full.md"))
746+
]);
856747
}
857748
});
858749

.vitepress/config/getBlogPosts.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import {ContentData, createContentLoader} from "vitepress";
2+
3+
let blogPosts: ContentData[] | undefined = undefined;
4+
export async function getBlogPosts(includeIndex: boolean = false) {
5+
if (includeIndex)
6+
return await _getBlogPosts();
7+
8+
const blogPosts = (await _getBlogPosts()).slice();
9+
10+
const indexPageIndex = blogPosts.findIndex((post) => post.url === "/blog/");
11+
if (indexPageIndex < 0)
12+
throw new Error("Blog index page not found");
13+
14+
blogPosts.splice(indexPageIndex, 1);
15+
16+
return blogPosts;
17+
}
18+
19+
async function _getBlogPosts() {
20+
if (blogPosts != null)
21+
return blogPosts;
22+
23+
blogPosts = await createContentLoader("blog/*.md", {
24+
excerpt: true,
25+
render: true
26+
})
27+
.load();
28+
29+
blogPosts.sort((a, b) => {
30+
const aDate = a.frontmatter.date
31+
? new Date(a.frontmatter.date)
32+
: null;
33+
const bDate = b.frontmatter.date
34+
? new Date(b.frontmatter.date)
35+
: null;
36+
37+
if (aDate == null)
38+
return -1;
39+
if (bDate == null)
40+
return 1;
41+
42+
return bDate.getTime() - aDate.getTime();
43+
});
44+
45+
return blogPosts;
46+
}

.vitepress/config/sidebar.ts

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
import {DefaultTheme} from "vitepress";
2+
import {getApiReferenceSidebar} from "./apiReferenceSidebar.js";
3+
import {getBlogPosts} from "./getBlogPosts.js";
4+
5+
const apiReferenceSidebar = getApiReferenceSidebar();
6+
7+
export function getVitepressSidebar(blog?: DefaultTheme.SidebarItem[]): DefaultTheme.Sidebar {
8+
return {
9+
"/guide/": [{
10+
text: "Guide",
11+
base: "/guide",
12+
items: [
13+
{text: "Getting Started", link: "/"},
14+
{text: "Chat Session", link: "/chat-session"},
15+
{text: "Chat Wrapper", link: "/chat-wrapper"},
16+
{text: "Grammar", link: "/grammar"},
17+
{text: "Function Calling", link: "/function-calling"},
18+
{text: "Embedding", link: "/embedding"},
19+
{text: "Text Completion", link: "/text-completion"},
20+
{text: "Choosing a Model", link: "/choosing-a-model"},
21+
{text: "Downloading Models", link: "/downloading-models"}
22+
]
23+
}, {
24+
text: "Advanced",
25+
base: "/guide",
26+
items: [
27+
{text: "Building From Source", link: "/building-from-source"},
28+
{text: "Metal Support", link: "/Metal"},
29+
{text: "CUDA Support", link: "/CUDA"},
30+
{text: "Vulkan Support", link: "/Vulkan"},
31+
{text: "Electron Support", link: "/electron"},
32+
{text: "Using in Docker", link: "/docker"},
33+
{text: "Using Tokens", link: "/tokens"},
34+
{text: "LlamaText", link: "/llama-text"},
35+
{text: "External Chat State", link: "/external-chat-state"},
36+
{text: "Token Bias", link: "/token-bias"},
37+
{text: "Objects Lifecycle", link: "/objects-lifecycle"},
38+
{text: "Chat Context Shift", link: "/chat-context-shift"},
39+
{text: "Batching", link: "/batching"},
40+
{text: "Token Prediction", link: "/token-prediction"},
41+
{text: "Low Level API", link: "/low-level-api"},
42+
{text: "Awesome List", link: "/awesome"},
43+
{text: "Troubleshooting", link: "/troubleshooting"},
44+
{text: "Tips and Tricks", link: "/tips-and-tricks"}
45+
]
46+
}, {
47+
text: "Contributing",
48+
base: "/guide",
49+
items: [
50+
{text: "Setting Up a Dev Environment", link: "/development"},
51+
{text: "Pull Request Guidelines", link: "/contributing"}
52+
]
53+
}],
54+
55+
...(
56+
blog != null
57+
? {
58+
"/_blog/": [{
59+
text: "Blog",
60+
link: "/blog/",
61+
items: blog
62+
}]
63+
}
64+
: {}
65+
),
66+
67+
"/cli/": [{
68+
text: "CLI",
69+
base: "/cli",
70+
link: "/",
71+
items: [
72+
{text: "Init", link: "/init"},
73+
{text: "Chat", link: "/chat"},
74+
{text: "Pull", link: "/pull"},
75+
{
76+
text: "Source",
77+
link: "/source",
78+
collapsed: true,
79+
items: [
80+
{text: "Download", link: "/source/download"},
81+
{text: "Build", link: "/source/build"},
82+
{text: "Clear", link: "/source/clear"}
83+
]
84+
},
85+
{text: "Complete", link: "/complete"},
86+
{text: "Infill", link: "/infill"},
87+
{
88+
text: "Inspect",
89+
link: "/inspect",
90+
collapsed: true,
91+
items: [
92+
{text: "GPU", link: "/inspect/gpu"},
93+
{text: "GGUF", link: "/inspect/gguf"},
94+
{text: "Measure", link: "/inspect/measure"},
95+
{text: "Estimate", link: "/inspect/estimate"}
96+
]
97+
}
98+
]
99+
}],
100+
101+
"/api/": structuredClone(apiReferenceSidebar)
102+
};
103+
}
104+
105+
export async function getSidebarBlogPostItems(
106+
includeIndex: boolean = false,
107+
onlyItemsWithoutCoverImage: boolean = false
108+
): Promise<DefaultTheme.SidebarItem[]> {
109+
const blogPosts = await getBlogPosts(includeIndex);
110+
111+
return blogPosts
112+
.filter((post) => {
113+
if (!onlyItemsWithoutCoverImage)
114+
return true;
115+
116+
const hasCoverImage = typeof post.frontmatter?.image === "string" ||
117+
typeof post.frontmatter?.image?.url === "string";
118+
119+
return !hasCoverImage;
120+
})
121+
.map((post) => ({
122+
text: post.frontmatter.title,
123+
link: post.url
124+
}));
125+
}
126+
127+
export async function getVitepressSidebarWithBlog(
128+
includeIndex: boolean = false,
129+
onlyItemsWithoutCoverImage: boolean = false
130+
) {
131+
const blogItems = await getSidebarBlogPostItems(includeIndex, onlyItemsWithoutCoverImage);
132+
133+
return getVitepressSidebar(blogItems);
134+
}

0 commit comments

Comments
 (0)