Skip to content

Commit e6f12e3

Browse files
authored
Fix paths with unicode for tokenizers (#2337)
Ticket: CVS-169069
1 parent 5561b73 commit e6f12e3

File tree

3 files changed

+45
-17
lines changed

3 files changed

+45
-17
lines changed

src/cpp/src/tokenizer/tokenizer.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,17 @@ constexpr char pad_token_key_name[] = "pad_token";
3838

3939
ov::Core core_with_extension() {
4040
ov::Core core;
41+
42+
#ifdef _WIN32
43+
const wchar_t* ov_tokenizer_path_w = _wgetenv(ScopedVar::ENVIRONMENT_VARIABLE_NAME_W);
44+
OPENVINO_ASSERT(ov_tokenizer_path_w, "openvino_tokenizers path is not set");
45+
core.add_extension(std::wstring(ov_tokenizer_path_w));
46+
#else
4147
const char* ov_tokenizer_path = getenv(ScopedVar::ENVIRONMENT_VARIABLE_NAME);
4248
OPENVINO_ASSERT(ov_tokenizer_path, "openvino_tokenizers path is not set");
4349
core.add_extension(ov_tokenizer_path);
50+
#endif
51+
4452
return core;
4553
}
4654

@@ -252,8 +260,14 @@ class Tokenizer::TokenizerImpl {
252260
filtered_properties = {};
253261
if (is_gguf_model(models_path)) {
254262
std::map<std::string, GGUFMetaData> tokenizer_config{};
263+
std::filesystem::path ov_tokenizer_filesystem_path;
264+
#ifdef _WIN32
265+
const wchar_t* ov_tokenizer_path_w = _wgetenv(ScopedVar::ENVIRONMENT_VARIABLE_NAME_W);
266+
ov_tokenizer_filesystem_path = std::filesystem::path(std::wstring(ov_tokenizer_path_w));
267+
#else
255268
const char* ov_tokenizer_path = getenv(ScopedVar::ENVIRONMENT_VARIABLE_NAME);
256-
auto ov_tokenizer_filesystem_path = std::filesystem::path(ov_tokenizer_path);
269+
ov_tokenizer_filesystem_path = std::filesystem::path(ov_tokenizer_path);
270+
#endif
257271
m_shared_object_ov_tokenizers = load_shared_object(ov_tokenizer_filesystem_path);
258272
std::tie(ov_tokenizer, ov_detokenizer, tokenizer_config) =
259273
create_tokenizer_from_config(m_shared_object_ov_tokenizers, models_path);

src/cpp/src/tokenizer/tokenizers_path.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "tokenizer/tokenizers_path.hpp"
55

66
#include <sstream>
7+
78
#ifdef _WIN32
89
# include <windows.h>
910
# define MAX_ABS_PATH _MAX_PATH
@@ -34,23 +35,28 @@ std::string get_absolute_file_path(const std::string& path) {
3435
}
3536
#endif
3637

37-
std::string get_ov_genai_library_path() {
38+
std::filesystem::path get_ov_genai_library_path() {
3839
#ifdef _WIN32
39-
CHAR genai_library_path[MAX_PATH];
40+
WCHAR genai_library_path_w[MAX_PATH];
4041
HMODULE hm = NULL;
41-
if (!GetModuleHandleExA(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
42-
reinterpret_cast<LPSTR>(get_ov_genai_library_path),
42+
if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
43+
reinterpret_cast<LPCWSTR>(get_ov_genai_library_path),
4344
&hm)) {
4445
std::stringstream ss;
45-
ss << "GetModuleHandle returned " << GetLastError();
46+
ss << "GetModuleHandleExW returned " << GetLastError();
47+
throw std::runtime_error(ss.str());
48+
}
49+
DWORD result = GetModuleFileNameW(hm, (LPWSTR)genai_library_path_w, sizeof(genai_library_path_w) / sizeof(genai_library_path_w[0]));
50+
if (result == 0) {
51+
std::stringstream ss;
52+
ss << "GetModuleFileNameW failed with error " << GetLastError();
4653
throw std::runtime_error(ss.str());
4754
}
48-
GetModuleFileNameA(hm, (LPSTR)genai_library_path, sizeof(genai_library_path));
49-
return std::string(genai_library_path);
55+
return std::filesystem::path(std::wstring(genai_library_path_w));
5056
#elif defined(__APPLE__) || defined(__linux__) || defined(__EMSCRIPTEN__)
5157
Dl_info info;
5258
dladdr(reinterpret_cast<void*>(get_ov_genai_library_path), &info);
53-
return get_absolute_file_path(info.dli_fname).c_str();
59+
return std::filesystem::path(get_absolute_file_path(info.dli_fname));
5460
#else
5561
# error "Unsupported OS"
5662
#endif // _WIN32

src/cpp/src/tokenizer/tokenizers_path.hpp

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,33 @@ class ScopedVar {
2525
bool was_already_set{false};
2626
public:
2727
static constexpr char ENVIRONMENT_VARIABLE_NAME[] = "OPENVINO_TOKENIZERS_PATH_GENAI";
28+
static constexpr wchar_t ENVIRONMENT_VARIABLE_NAME_W[] = L"OPENVINO_TOKENIZERS_PATH_GENAI";
2829

2930
explicit ScopedVar(const std::filesystem::path& environment_variable_value) {
31+
3032
#ifdef _WIN32
31-
char* value = nullptr;
33+
wchar_t* value = nullptr;
3234
size_t len = 0;
33-
_dupenv_s(&value, &len, ENVIRONMENT_VARIABLE_NAME);
34-
if (value == nullptr)
35-
_putenv_s(ENVIRONMENT_VARIABLE_NAME, environment_variable_value.string().c_str());
35+
_wdupenv_s(&value, &len, ENVIRONMENT_VARIABLE_NAME_W);
36+
if (value == nullptr) {
37+
_wputenv_s(ENVIRONMENT_VARIABLE_NAME_W, environment_variable_value.wstring().c_str());
38+
} else {
39+
was_already_set = true;
40+
free(value);
41+
}
3642
#else
37-
if (!getenv(ENVIRONMENT_VARIABLE_NAME))
43+
if (!getenv(ENVIRONMENT_VARIABLE_NAME)) {
3844
setenv(ENVIRONMENT_VARIABLE_NAME, environment_variable_value.string().c_str(), 1);
39-
#endif
40-
else
45+
} else {
4146
was_already_set = true;
47+
}
48+
#endif
4249
}
50+
4351
~ScopedVar() {
4452
if (!was_already_set) {
4553
#ifdef _WIN32
46-
_putenv_s(ENVIRONMENT_VARIABLE_NAME, "");
54+
_wputenv_s(ENVIRONMENT_VARIABLE_NAME_W, L"");
4755
#else
4856
unsetenv(ENVIRONMENT_VARIABLE_NAME);
4957
#endif

0 commit comments

Comments
 (0)