Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions cpp/actions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -649,13 +649,14 @@ glue_msg_get_kv_remove_res action_kv_remove(app_t &app, const char *req_raw)
PARSE_REQ(glue_msg_get_kv_remove_req);
const int n_keep = req.n_keep.value;
const int n_discard = req.n_discard.value;
auto * mem = llama_get_memory(app.ctx);

if (n_discard > 0)
{
// TODO: this code branch is kinda broken, to be fixed later
const int n_past = app.tokens.size();
llama_kv_self_seq_rm(app.ctx, 0, n_keep, n_keep + n_discard);
llama_kv_self_seq_add(app.ctx, 0, n_keep + n_discard, n_past, -n_discard);
llama_memory_seq_rm(mem, 0, n_keep, n_keep + n_discard);
llama_memory_seq_add(mem, 0, n_keep + n_discard, n_past, -n_discard);
app.tokens.erase(
app.tokens.begin() + n_keep,
app.tokens.begin() + n_keep + n_discard);
Expand All @@ -664,11 +665,11 @@ glue_msg_get_kv_remove_res action_kv_remove(app_t &app, const char *req_raw)
{
if (n_keep == 0)
{
llama_kv_self_clear(app.ctx);
llama_memory_clear(mem, true);
}
else
{
llama_kv_self_seq_rm(app.ctx, 0, n_keep, -1);
llama_memory_seq_rm(mem, 0, n_keep, -1);
app.tokens.erase(
app.tokens.begin() + n_keep,
app.tokens.end());
Expand All @@ -685,7 +686,8 @@ glue_msg_get_kv_remove_res action_kv_remove(app_t &app, const char *req_raw)
glue_msg_get_kv_clear_res action_kv_clear(app_t &app, const char *req_raw)
{
PARSE_REQ(glue_msg_get_kv_clear_req);
llama_kv_self_clear(app.ctx);
auto * mem = llama_get_memory(app.ctx);
llama_memory_clear(mem, true);
app.tokens.clear();

glue_msg_get_kv_clear_res res;
Expand Down Expand Up @@ -766,7 +768,7 @@ glue_msg_test_benchmark_res action_test_benchmark(app_t &app, const char *req_ra
std::string type = req.type.value; // "pp" (prompt proc) or "tg" (tok gen)
int n_samples = req.n_samples.value; // n_batch in pp and n_predict in pg

llama_kv_self_clear(app.ctx);
llama_memory_clear(llama_get_memory(app.ctx), true);
int n_vocab = llama_vocab_n_tokens(app.vocab);
int64_t t_start = ggml_time_ms();

Expand Down Expand Up @@ -837,7 +839,7 @@ glue_msg_test_perplexity_res action_test_perplexity(app_t &app, const char *req_
}

// Clear existing context to start fresh
llama_kv_self_clear(app.ctx);
llama_memory_clear(llama_get_memory(app.ctx), true);
app.tokens.clear();

const int32_t n_vocab = llama_vocab_n_tokens(app.vocab);
Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@wllama/wllama",
"version": "2.3.1",
"version": "2.3.2",
"description": "WebAssembly binding for llama.cpp - Enabling on-browser LLM inference",
"main": "index.js",
"type": "module",
Expand Down
2 changes: 1 addition & 1 deletion src/multi-thread/wllama.js

Large diffs are not rendered by default.

Binary file modified src/multi-thread/wllama.wasm
Binary file not shown.
2 changes: 1 addition & 1 deletion src/single-thread/wllama.js

Large diffs are not rendered by default.

Binary file modified src/single-thread/wllama.wasm
Binary file not shown.
4 changes: 2 additions & 2 deletions src/wasm-from-cdn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
// Do not edit this file directly

const WasmFromCDN = {
'single-thread/wllama.wasm': 'https://cdn.jsdelivr.net/npm/@wllama/[email protected].1/src/single-thread/wllama.wasm',
'multi-thread/wllama.wasm': 'https://cdn.jsdelivr.net/npm/@wllama/[email protected].1/src/multi-thread/wllama.wasm',
'single-thread/wllama.wasm': 'https://cdn.jsdelivr.net/npm/@wllama/[email protected].2/src/single-thread/wllama.wasm',
'multi-thread/wllama.wasm': 'https://cdn.jsdelivr.net/npm/@wllama/[email protected].2/src/multi-thread/wllama.wasm',
};

export default WasmFromCDN;
4 changes: 2 additions & 2 deletions src/workers-code/generated.ts

Large diffs are not rendered by default.