Skip to content

Commit 5c3251a

Browse files
pavel-esirCopilot
andauthored
preserve properties from allowlist in Tokenizer (#2604)
Co-authored-by: Copilot <[email protected]>
1 parent ddd7f85 commit 5c3251a

File tree

2 files changed

+27
-4
lines changed

2 files changed

+27
-4
lines changed

.github/workflows/windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ env:
2323
CMAKE_C_COMPILER_LAUNCHER: ccache
2424
CCACHE_MAXSIZE: 500Mi
2525
HF_HOME: C:/mount/caches/huggingface/win
26-
OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/194c936
26+
OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/
2727
ARTIFACTS_SHARE: '/mount/build-artifacts'
2828
BASE_PRODUCT_TYPE: public_windows_vs2022
2929
GENAI_WHEELS_ARTIFACT_NAME: 'genai_wheels'

src/cpp/src/tokenizer/tokenizer.cpp

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,29 @@ class Tokenizer::TokenizerImpl {
280280
setup_tokenizer(models, properties);
281281
}
282282

283+
void filter_properties(ov::AnyMap& properties) {
284+
// Properties allowed for tokenizer/detokenizer on CPU
285+
std::set<std::string> allowed_argnames = {
286+
ov::hint::performance_mode.name(),
287+
ov::hint::num_requests.name(),
288+
ov::hint::enable_cpu_pinning.name(),
289+
ov::hint::execution_mode.name(),
290+
ov::hint::compiled_blob.name(),
291+
ov::hint::enable_hyper_threading.name(),
292+
ov::hint::enable_cpu_reservation.name(),
293+
ov::enable_profiling.name(),
294+
};
295+
296+
for (auto prop_it = properties.begin(); prop_it != properties.end();) {
297+
auto it = allowed_argnames.find(prop_it->first);
298+
if (it == allowed_argnames.end()) {
299+
prop_it = properties.erase(prop_it);
300+
} else {
301+
++prop_it;
302+
}
303+
}
304+
}
305+
283306
void setup_tokenizer(const std::filesystem::path& models_path, const ov::AnyMap& properties) {
284307
ScopedVar env_manager(tokenizers_relative_to_genai());
285308
auto core = get_core_singleton();
@@ -369,9 +392,9 @@ class Tokenizer::TokenizerImpl {
369392
two_input_requested = it->second.as<bool>();
370393
properties.erase(it);
371394
}
372-
373-
// Pass no addtional properties to tokenizer/detokenizer models since it was not used by default
374-
properties = {};
395+
396+
// Filter properties by leaving only params from the allowlist
397+
filter_properties(properties);
375398

376399
is_paired_input = ov_tokenizer && ov_tokenizer->get_parameters().size() == 2;
377400

0 commit comments

Comments
 (0)