@@ -108,12 +108,12 @@ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
108
108
}
109
109
}
110
110
111
- std::string addon_model_token_to_piece (const struct llama_model * model, llama_token token) {
111
+ std::string addon_model_token_to_piece (const struct llama_model * model, llama_token token, bool specialTokens ) {
112
112
std::vector<char > result (8 , 0 );
113
- const int n_tokens = llama_token_to_piece (model, token, result.data (), result.size ());
113
+ const int n_tokens = llama_token_to_piece (model, token, result.data (), result.size (), specialTokens );
114
114
if (n_tokens < 0 ) {
115
115
result.resize (-n_tokens);
116
- int check = llama_token_to_piece (model, token, result.data (), result.size ());
116
+ int check = llama_token_to_piece (model, token, result.data (), result.size (), specialTokens );
117
117
GGML_ASSERT (check == -n_tokens);
118
118
} else {
119
119
result.resize (n_tokens);
@@ -378,13 +378,16 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
378
378
}
379
379
380
380
Napi::Uint32Array tokens = info[0 ].As <Napi::Uint32Array>();
381
+ bool decodeSpecialTokens = info.Length () > 0
382
+ ? info[1 ].As <Napi::Boolean>().Value ()
383
+ : false ;
381
384
382
385
// Create a stringstream for accumulating the decoded string.
383
386
std::stringstream ss;
384
387
385
388
// Decode each token and accumulate the result.
386
389
for (size_t i = 0 ; i < tokens.ElementLength (); i++) {
387
- const std::string piece = addon_model_token_to_piece (model, (llama_token)tokens[i]);
390
+ const std::string piece = addon_model_token_to_piece (model, (llama_token)tokens[i], decodeSpecialTokens );
388
391
389
392
if (piece.empty ()) {
390
393
continue ;
@@ -534,6 +537,20 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
534
537
535
538
return Napi::Number::From (info.Env (), int32_t (tokenType));
536
539
}
540
+ Napi::Value IsEogToken (const Napi::CallbackInfo& info) {
541
+ if (disposed) {
542
+ Napi::Error::New (info.Env (), " Model is disposed" ).ThrowAsJavaScriptException ();
543
+ return info.Env ().Undefined ();
544
+ }
545
+
546
+ if (info[0 ].IsNumber () == false ) {
547
+ return Napi::Boolean::New (info.Env (), false );
548
+ }
549
+
550
+ int token = info[0 ].As <Napi::Number>().Int32Value ();
551
+
552
+ return Napi::Boolean::New (info.Env (), llama_token_is_eog (model, token));
553
+ }
537
554
Napi::Value GetVocabularyType (const Napi::CallbackInfo& info) {
538
555
if (disposed) {
539
556
Napi::Error::New (info.Env (), " Model is disposed" ).ThrowAsJavaScriptException ();
@@ -581,6 +598,7 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
581
598
InstanceMethod (" eotToken" , &AddonModel::EotToken),
582
599
InstanceMethod (" getTokenString" , &AddonModel::GetTokenString),
583
600
InstanceMethod (" getTokenType" , &AddonModel::GetTokenType),
601
+ InstanceMethod (" isEogToken" , &AddonModel::IsEogToken),
584
602
InstanceMethod (" getVocabularyType" , &AddonModel::GetVocabularyType),
585
603
InstanceMethod (" shouldPrependBosToken" , &AddonModel::ShouldPrependBosToken),
586
604
InstanceMethod (" getModelSize" , &AddonModel::GetModelSize),
@@ -1054,6 +1072,30 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
1054
1072
return info.Env ().Undefined ();
1055
1073
}
1056
1074
1075
+ Napi::Value CanBeNextTokenForGrammarEvaluationState (const Napi::CallbackInfo& info) {
1076
+ AddonGrammarEvaluationState* grammar_evaluation_state =
1077
+ Napi::ObjectWrap<AddonGrammarEvaluationState>::Unwrap (info[0 ].As <Napi::Object>());
1078
+ llama_token tokenId = info[1 ].As <Napi::Number>().Int32Value ();
1079
+
1080
+ if ((grammar_evaluation_state)->grammar != nullptr ) {
1081
+ std::vector<llama_token_data> candidates;
1082
+ candidates.reserve (1 );
1083
+ candidates.emplace_back (llama_token_data { tokenId, 1 , 0 .0f });
1084
+
1085
+ llama_token_data_array candidates_p = { candidates.data (), candidates.size (), false };
1086
+
1087
+ llama_sample_grammar (ctx, &candidates_p, (grammar_evaluation_state)->grammar );
1088
+
1089
+ if (candidates_p.size == 0 || candidates_p.data [0 ].logit == -INFINITY) {
1090
+ return Napi::Boolean::New (info.Env (), false );
1091
+ }
1092
+
1093
+ return Napi::Boolean::New (info.Env (), true );
1094
+ }
1095
+
1096
+ return Napi::Boolean::New (info.Env (), false );
1097
+ }
1098
+
1057
1099
Napi::Value GetEmbedding (const Napi::CallbackInfo& info) {
1058
1100
if (disposed) {
1059
1101
Napi::Error::New (info.Env (), " Context is disposed" ).ThrowAsJavaScriptException ();
@@ -1118,6 +1160,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
1118
1160
InstanceMethod (" decodeBatch" , &AddonContext::DecodeBatch),
1119
1161
InstanceMethod (" sampleToken" , &AddonContext::SampleToken),
1120
1162
InstanceMethod (" acceptGrammarEvaluationStateToken" , &AddonContext::AcceptGrammarEvaluationStateToken),
1163
+ InstanceMethod (" canBeNextTokenForGrammarEvaluationState" , &AddonContext::CanBeNextTokenForGrammarEvaluationState),
1121
1164
InstanceMethod (" getEmbedding" , &AddonContext::GetEmbedding),
1122
1165
InstanceMethod (" getStateSize" , &AddonContext::GetStateSize),
1123
1166
InstanceMethod (" printTimings" , &AddonContext::PrintTimings),
@@ -1442,7 +1485,6 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
1442
1485
// Select the best prediction.
1443
1486
auto logits = llama_get_logits_ith (ctx->ctx , batchLogitIndex);
1444
1487
auto n_vocab = llama_n_vocab (ctx->model ->model );
1445
- auto eos_token = llama_token_eos (ctx->model ->model );
1446
1488
1447
1489
std::vector<llama_token_data> candidates;
1448
1490
candidates.reserve (n_vocab);
@@ -1455,7 +1497,7 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
1455
1497
if (hasTokenBias) {
1456
1498
auto logitBias = tokenBiases.at (token_id);
1457
1499
if (logitBias == -INFINITY || logitBias < -INFINITY) {
1458
- if (token_id != eos_token ) {
1500
+ if (! llama_token_is_eog (ctx-> model -> model , token_id) ) {
1459
1501
logit = -INFINITY;
1460
1502
}
1461
1503
} else {
@@ -1513,7 +1555,7 @@ class AddonContextSampleTokenWorker : public Napi::AsyncWorker {
1513
1555
new_token_id = llama_sample_token (ctx->ctx , &candidates_p);
1514
1556
}
1515
1557
1516
- if (new_token_id != eos_token && use_grammar && (grammar_evaluation_state)->grammar != nullptr ) {
1558
+ if (! llama_token_is_eog (ctx-> model -> model , new_token_id) && use_grammar && (grammar_evaluation_state)->grammar != nullptr ) {
1517
1559
llama_grammar_accept_token (ctx->ctx , (grammar_evaluation_state)->grammar , new_token_id);
1518
1560
}
1519
1561
0 commit comments