From bffaa940b694536880c672a55d1e57e4e59daa87 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Mon, 12 May 2025 17:39:15 +0800 Subject: [PATCH 01/27] support multi images for vlm test --- .../cpp/text_generation/benchmark_genai.cpp | 25 +++++++++++++++- .../visual_language_chat/benchmark_vlm.cpp | 29 +++++++++++++++---- .../llm_bench/llm_bench_utils/model_utils.py | 14 +++++---- .../task/visual_language_generation.py | 9 ++++-- 4 files changed, 64 insertions(+), 13 deletions(-) diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 4a8c8d0723..fb199d3abc 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -3,6 +3,20 @@ #include "openvino/genai/llm_pipeline.hpp" #include +#include +#include + +std::string read_prompt(const std::string& file_path) { + std::string prompt; + std::ifstream file(file_path); + if (file.is_open()) { + std::stringstream buffer; + buffer << file.rdbuf(); + prompt = buffer.str(); + file.close(); + } + return prompt; +} int main(int argc, char* argv[]) try { cxxopts::Options options("benchmark_vanilla_genai", "Help command"); @@ -10,6 +24,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()) ("p,prompt", "Prompt", cxxopts::value()->default_value("The Sky is blue because")) + ("pf,promptfile", "Prompt from file") ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) ("mt,max_new_tokens", "Maximal number of new tokens", cxxopts::value()->default_value(std::to_string(20))) @@ -36,10 +51,18 @@ int main(int argc, char* argv[]) try { size_t num_warmup = result["num_warmup"].as(); size_t num_iter = result["num_iter"].as(); + if (result.count("promptfile")) { + prompt = read_prompt(result["promptfile"].as()); + } + ov::genai::GenerationConfig config; config.max_new_tokens = result["max_new_tokens"].as(); - ov::genai::LLMPipeline pipe(models_path, device); + ov::genai::SchedulerConfig scheduler_config; + scheduler_config.enable_prefix_caching = false; + scheduler_config.max_num_batched_tokens = 2147483647; + + ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); for (size_t i = 0; i < num_warmup; i++) pipe.generate(prompt, config); diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index 8467738307..ccb5dfc23f 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -3,10 +3,23 @@ #include #include +#include +#include #include "load_image.hpp" #include +std::vector parse_all_images(const std::string &input) { + std::vector images; + std::stringstream ss(input); + std::string image_path; + while (std::getline(ss, image_path, ';')) { + ov::Tensor image = utils::load_image(image_path); + images.push_back(image); + std::cout << "input image:" << image_path << std::endl; + } + return images; +} int main(int argc, char* argv[]) try { cxxopts::Options options("benchmark_vlm", "Help command"); @@ -41,20 +54,26 @@ int main(int argc, char* argv[]) try { std::string device = result["device"].as(); size_t num_warmup = result["num_warmup"].as(); size_t num_iter = result["num_iter"].as(); - ov::Tensor image = utils::load_image(image_path); + std::vector images; + images = parse_all_images(image_path); ov::genai::GenerationConfig config; config.max_new_tokens = result["max_new_tokens"].as(); + config.ignore_eos = true; + + ov::genai::SchedulerConfig scheduler_config; + scheduler_config.enable_prefix_caching = false; + scheduler_config.max_num_batched_tokens = 2147483647; - ov::genai::VLMPipeline pipe(models_path, device); + ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); for (size_t i = 0; i < num_warmup; i++) - pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config)); + pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config)); - auto res = pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config)); + auto res = pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config)); auto metrics = res.perf_metrics; for (size_t i = 0; i < num_iter - 1; i++) { - res = pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config)); + res = pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config)); metrics = metrics + res.perf_metrics; } diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py index 8ca59fe2eb..8a5e12a04f 100644 --- a/tools/llm_bench/llm_bench_utils/model_utils.py +++ b/tools/llm_bench/llm_bench_utils/model_utils.py @@ -321,11 +321,15 @@ def init_timestamp(num_iters, prompt_list, prompt_idx_list): def resolve_media_file_path(file_path, prompt_file_path): - if not file_path: - return file_path - if not (file_path.startswith("http://") or file_path.startswith("https://")): - return os.path.join(os.path.dirname(prompt_file_path), file_path.replace("./", "")) - return file_path + paths_ori = file_path.split(';') + paths_new = [] + for path in paths_ori: + if not path: + continue + if not (path.startswith("http://") or path.startswith("https://")): + paths_new.append(os.path.join(os.path.dirname(prompt_file_path), path.replace("./", ""))) + new_file_path = ";".join(paths_new) + return new_file_path def get_version_in_format_to_pars(version): diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index d48239cfa3..8d51a57409 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -201,7 +201,9 @@ def run_visual_language_generation_genai( inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs for input_data in inputs: if "media" in input_data: - images.append(load_image_genai(input_data["media"])) + image_paths = input_data["media"].split(';') + for path in image_paths: + images.append(load_image_genai(path)) prompts.append(input_data["prompt"]) if args["output_dir"] is not None and num == 0: for bs_index, in_text in enumerate(prompts): @@ -221,7 +223,10 @@ def run_visual_language_generation_genai( if hasattr(gen_config, 'apply_chat_template'): gen_config.apply_chat_template = False kwargs = {} - if len(images) >= 1: + if len(images) > 1: + # multi images + kwargs["images"] = images + elif len(images) == 1: kwargs["images"] = images[0] start = time.perf_counter() generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs) From 9edc62845d3847b563db4e6ec5545685180ff747 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Tue, 13 May 2025 09:54:00 +0800 Subject: [PATCH 02/27] code format --- tools/llm_bench/task/visual_language_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 8d51a57409..1dcf3532ed 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -224,7 +224,7 @@ def run_visual_language_generation_genai( gen_config.apply_chat_template = False kwargs = {} if len(images) > 1: - # multi images + # multi images kwargs["images"] = images elif len(images) == 1: kwargs["images"] = images[0] From e512c3105feebbda6515dd52e1a8c27ddbf00098 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Tue, 13 May 2025 14:58:27 +0800 Subject: [PATCH 03/27] using ov::genai::images to convert images --- samples/cpp/visual_language_chat/benchmark_vlm.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index ccb5dfc23f..56021374a1 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -64,16 +64,15 @@ int main(int argc, char* argv[]) try { ov::genai::SchedulerConfig scheduler_config; scheduler_config.enable_prefix_caching = false; scheduler_config.max_num_batched_tokens = 2147483647; - ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); for (size_t i = 0; i < num_warmup; i++) - pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config)); + pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); - auto res = pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config)); + auto res = pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); auto metrics = res.perf_metrics; for (size_t i = 0; i < num_iter - 1; i++) { - res = pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config)); + res = pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); metrics = metrics + res.perf_metrics; } From eed1dd702ccdf87f5ca8edcd9c32a21a04dd586f Mon Sep 17 00:00:00 2001 From: wgzintel Date: Tue, 13 May 2025 16:14:25 +0800 Subject: [PATCH 04/27] fix none Type --- .../llm_bench/llm_bench_utils/model_utils.py | 22 +++++++++++-------- .../task/visual_language_generation.py | 7 +++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py index 63c5277772..4b559b2d33 100644 --- a/tools/llm_bench/llm_bench_utils/model_utils.py +++ b/tools/llm_bench/llm_bench_utils/model_utils.py @@ -338,15 +338,19 @@ def init_timestamp(num_iters, prompt_list, prompt_idx_list): def resolve_media_file_path(file_path, prompt_file_path): - paths_ori = file_path.split(';') - paths_new = [] - for path in paths_ori: - if not path: - continue - if not (path.startswith("http://") or path.startswith("https://")): - paths_new.append(os.path.join(os.path.dirname(prompt_file_path), path.replace("./", ""))) - new_file_path = ";".join(paths_new) - return new_file_path + file_path = None + if file_path is not None: + paths_ori = file_path.split(';') + paths_new = [] + for path in paths_ori: + if not path: + continue + if not (path.startswith("http://") or path.startswith("https://")): + paths_new.append(os.path.join(os.path.dirname(prompt_file_path), path.replace("./", ""))) + new_file_path = ";".join(paths_new) + return new_file_path + else: + return file_path def get_version_in_format_to_pars(version): diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 1dcf3532ed..58ad815158 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -201,9 +201,10 @@ def run_visual_language_generation_genai( inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs for input_data in inputs: if "media" in input_data: - image_paths = input_data["media"].split(';') - for path in image_paths: - images.append(load_image_genai(path)) + if input_data["media"] is not None: + image_paths = input_data["media"].split(';') + for path in image_paths: + images.append(load_image_genai(path)) prompts.append(input_data["prompt"]) if args["output_dir"] is not None and num == 0: for bs_index, in_text in enumerate(prompts): From 503b74e3d64051d8fb6cc580bd1cd856ddc870c0 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Tue, 13 May 2025 23:28:49 +0800 Subject: [PATCH 05/27] fix NoneTyPE in optimim-intel pipeline --- tools/llm_bench/task/visual_language_generation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 58ad815158..56e0ca9eb2 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -38,7 +38,8 @@ def run_visual_language_generation_optimum( inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs for input_data in inputs: if "media" in input_data: - images.append(load_image(input_data["media"])) + if input_data["media"] is not None: + images.append(load_image(input_data["media"])) prompts.append(input_data["prompt"]) if args["output_dir"] is not None and num == 0: From fa68faa22d8901ca3e5b601f50b50b1881ec0ad6 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Wed, 14 May 2025 23:41:55 +0800 Subject: [PATCH 06/27] Support read images from dir --- samples/cpp/text_generation/CMakeLists.txt | 2 +- .../cpp/text_generation/benchmark_genai.cpp | 30 +++++++--------- samples/cpp/utils/read_prompt_from_file.cpp | 17 +++++++++ samples/cpp/utils/read_prompt_from_file.h | 11 ++++++ .../cpp/visual_language_chat/CMakeLists.txt | 2 +- .../visual_language_chat/benchmark_vlm.cpp | 35 ++++++++++--------- .../llm_bench/llm_bench_utils/model_utils.py | 16 +++------ .../task/visual_language_generation.py | 20 +++++++---- 8 files changed, 80 insertions(+), 53 deletions(-) create mode 100644 samples/cpp/utils/read_prompt_from_file.cpp create mode 100644 samples/cpp/utils/read_prompt_from_file.h diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt index 4ed269d737..b4928dc2ea 100644 --- a/samples/cpp/text_generation/CMakeLists.txt +++ b/samples/cpp/text_generation/CMakeLists.txt @@ -46,7 +46,7 @@ FetchContent_Declare(cxxopts URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08) FetchContent_MakeAvailable(cxxopts) -add_executable(benchmark_genai benchmark_genai.cpp) +add_executable(benchmark_genai benchmark_genai.cpp ../utils/read_prompt_from_file.cpp) target_link_libraries(benchmark_genai PRIVATE openvino::genai cxxopts::cxxopts) set_target_properties(benchmark_genai PROPERTIES # Ensure out of box LC_RPATH on macOS with SIP diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index fb199d3abc..2f4ce1dc31 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -3,20 +3,7 @@ #include "openvino/genai/llm_pipeline.hpp" #include -#include -#include - -std::string read_prompt(const std::string& file_path) { - std::string prompt; - std::ifstream file(file_path); - if (file.is_open()) { - std::stringstream buffer; - buffer << file.rdbuf(); - prompt = buffer.str(); - file.close(); - } - return prompt; -} +#include "../utils/read_prompt_from_file.h" int main(int argc, char* argv[]) try { cxxopts::Options options("benchmark_vanilla_genai", "Help command"); @@ -24,7 +11,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()) ("p,prompt", "Prompt", cxxopts::value()->default_value("The Sky is blue because")) - ("pf,promptfile", "Prompt from file") + ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) ("mt,max_new_tokens", "Maximal number of new tokens", cxxopts::value()->default_value(std::to_string(20))) @@ -51,8 +38,8 @@ int main(int argc, char* argv[]) try { size_t num_warmup = result["num_warmup"].as(); size_t num_iter = result["num_iter"].as(); - if (result.count("promptfile")) { - prompt = read_prompt(result["promptfile"].as()); + if (result.count("prompt_file")) { + prompt = utils::read_prompt(result["prompt_file"].as()); } ov::genai::GenerationConfig config; @@ -64,6 +51,15 @@ int main(int argc, char* argv[]) try { ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); + auto input_data = pipe.get_tokenizer().encode(prompt); + size_t prompt_token_size; + if (input_data.input_ids.get_shape().size() > 1) { + prompt_token_size = input_data.input_ids.get_shape()[1]; + } else { + prompt_token_size = input_data.input_ids.get_size(); + } + std::cout << "Prompt token size:" << prompt_token_size << std::endl; + for (size_t i = 0; i < num_warmup; i++) pipe.generate(prompt, config); diff --git a/samples/cpp/utils/read_prompt_from_file.cpp b/samples/cpp/utils/read_prompt_from_file.cpp new file mode 100644 index 0000000000..50d5087c74 --- /dev/null +++ b/samples/cpp/utils/read_prompt_from_file.cpp @@ -0,0 +1,17 @@ +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include "read_prompt_from_file.h" + +std::string utils::read_prompt(const std::string& file_path) { + std::string prompt; + std::ifstream file(file_path); + if (file.is_open()) { + std::stringstream buffer; + buffer << file.rdbuf(); + prompt = buffer.str(); + file.close(); + } + return prompt; +} \ No newline at end of file diff --git a/samples/cpp/utils/read_prompt_from_file.h b/samples/cpp/utils/read_prompt_from_file.h new file mode 100644 index 0000000000..b47cd08d92 --- /dev/null +++ b/samples/cpp/utils/read_prompt_from_file.h @@ -0,0 +1,11 @@ + +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace utils { +std::string read_prompt(const std::string& file_path); +} \ No newline at end of file diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt index 59c0d1f698..32d4b1e60f 100644 --- a/samples/cpp/visual_language_chat/CMakeLists.txt +++ b/samples/cpp/visual_language_chat/CMakeLists.txt @@ -45,7 +45,7 @@ install(TARGETS encrypted_model_vlm # create benchmark executable -add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp) +add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp ../utils/read_prompt_from_file.cpp) target_include_directories(benchmark_vlm PRIVATE "${CMAKE_CURRENT_SOUCE_DIR}" "${CMAKE_BINARY_DIR}") target_link_libraries(benchmark_vlm PRIVATE openvino::genai cxxopts::cxxopts) set_target_properties(benchmark_vlm PROPERTIES diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index 56021374a1..ef38b14602 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -8,18 +8,7 @@ #include "load_image.hpp" #include - -std::vector parse_all_images(const std::string &input) { - std::vector images; - std::stringstream ss(input); - std::string image_path; - while (std::getline(ss, image_path, ';')) { - ov::Tensor image = utils::load_image(image_path); - images.push_back(image); - std::cout << "input image:" << image_path << std::endl; - } - return images; -} +#include "../utils/read_prompt_from_file.h" int main(int argc, char* argv[]) try { cxxopts::Options options("benchmark_vlm", "Help command"); @@ -27,6 +16,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()->default_value(".")) ("p,prompt", "Prompt", cxxopts::value()->default_value("What is on the image?")) + ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("i,image", "Image", cxxopts::value()->default_value("image.jpg")) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) @@ -49,14 +39,17 @@ int main(int argc, char* argv[]) try { } std::string prompt = result["prompt"].as(); + if (result.count("prompt_file")) { + prompt = utils::read_prompt(result["prompt_file"].as()); + } + const std::string models_path = result["model"].as(); const std::string image_path = result["image"].as(); std::string device = result["device"].as(); size_t num_warmup = result["num_warmup"].as(); size_t num_iter = result["num_iter"].as(); - std::vector images; - images = parse_all_images(image_path); - + std::vector images = utils::load_images(image_path); + ov::genai::GenerationConfig config; config.max_new_tokens = result["max_new_tokens"].as(); config.ignore_eos = true; @@ -65,7 +58,16 @@ int main(int argc, char* argv[]) try { scheduler_config.enable_prefix_caching = false; scheduler_config.max_num_batched_tokens = 2147483647; ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); - + + auto input_data = pipe.get_tokenizer().encode(prompt); + size_t prompt_token_size; + if (input_data.input_ids.get_shape().size() > 1) { + prompt_token_size = input_data.input_ids.get_shape()[1]; + } else { + prompt_token_size = input_data.input_ids.get_size(); + } + std::cout << "Number of images:" << images.size() << ", prompt token size:" << prompt_token_size << std::endl; + for (size_t i = 0; i < num_warmup; i++) pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); @@ -77,6 +79,7 @@ int main(int argc, char* argv[]) try { } std::cout << std::fixed << std::setprecision(2); + std::cout << "Output token size:" << res.perf_metrics.get_num_generated_tokens() << std::endl; std::cout << "Load time: " << metrics.get_load_time() << " ms" << std::endl; std::cout << "Generate time: " << metrics.get_generate_duration().mean << " ± " << metrics.get_generate_duration().std << " ms" << std::endl; std::cout << "Tokenization time: " << metrics.get_tokenization_duration().mean << " ± " << metrics.get_tokenization_duration().std << " ms" << std::endl; diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py index 4b559b2d33..e4171e5b05 100644 --- a/tools/llm_bench/llm_bench_utils/model_utils.py +++ b/tools/llm_bench/llm_bench_utils/model_utils.py @@ -338,19 +338,11 @@ def init_timestamp(num_iters, prompt_list, prompt_idx_list): def resolve_media_file_path(file_path, prompt_file_path): - file_path = None - if file_path is not None: - paths_ori = file_path.split(';') - paths_new = [] - for path in paths_ori: - if not path: - continue - if not (path.startswith("http://") or path.startswith("https://")): - paths_new.append(os.path.join(os.path.dirname(prompt_file_path), path.replace("./", ""))) - new_file_path = ";".join(paths_new) - return new_file_path - else: + if not file_path: return file_path + if not (file_path.startswith("http://") or file_path.startswith("https://")): + return os.path.join(os.path.dirname(prompt_file_path), file_path.replace("./", "")) + return file_path def get_version_in_format_to_pars(version): diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 56e0ca9eb2..f718937bde 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -12,13 +12,13 @@ import openvino as ov import hashlib import llm_bench_utils.metrics_print as metrics_print -import llm_bench_utils.output_csv from transformers import set_seed from transformers.image_utils import load_image -import llm_bench_utils.output_json import llm_bench_utils.output_file import llm_bench_utils.gen_output_data as gen_output_data import llm_bench_utils.parse_json_data as parse_json_data +from pathlib import Path + FW_UTILS = {'pt': llm_bench_utils.pt_utils, 'ov': llm_bench_utils.ov_utils} @@ -39,7 +39,12 @@ def run_visual_language_generation_optimum( for input_data in inputs: if "media" in input_data: if input_data["media"] is not None: - images.append(load_image(input_data["media"])) + entry = Path(input_data["media"]) + if entry.is_dir(): + for file in sorted(entry.iterdir()): + images.append(load_image_genai(str(file))) + else: + images.append(load_image_genai(input_data["media"])) prompts.append(input_data["prompt"]) if args["output_dir"] is not None and num == 0: @@ -203,9 +208,12 @@ def run_visual_language_generation_genai( for input_data in inputs: if "media" in input_data: if input_data["media"] is not None: - image_paths = input_data["media"].split(';') - for path in image_paths: - images.append(load_image_genai(path)) + entry = Path(input_data["media"]) + if entry.is_dir(): + for file in sorted(entry.iterdir()): + images.append(load_image_genai(str(file))) + else: + images.append(load_image_genai(input_data["media"])) prompts.append(input_data["prompt"]) if args["output_dir"] is not None and num == 0: for bs_index, in_text in enumerate(prompts): From 8e627544646c4f8cbbd7ef2ad1a81ec6ca594e11 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Thu, 15 May 2025 11:32:46 +0800 Subject: [PATCH 07/27] fix cmake_list.txt --- samples/cpp/text_generation/CMakeLists.txt | 2 +- samples/cpp/text_generation/benchmark_genai.cpp | 2 +- .../cpp/{utils => text_generation}/read_prompt_from_file.cpp | 0 samples/cpp/{utils => text_generation}/read_prompt_from_file.h | 0 samples/cpp/visual_language_chat/CMakeLists.txt | 3 +-- samples/cpp/visual_language_chat/benchmark_vlm.cpp | 2 +- 6 files changed, 4 insertions(+), 5 deletions(-) rename samples/cpp/{utils => text_generation}/read_prompt_from_file.cpp (100%) rename samples/cpp/{utils => text_generation}/read_prompt_from_file.h (100%) diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt index b4928dc2ea..5824e132bd 100644 --- a/samples/cpp/text_generation/CMakeLists.txt +++ b/samples/cpp/text_generation/CMakeLists.txt @@ -46,7 +46,7 @@ FetchContent_Declare(cxxopts URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08) FetchContent_MakeAvailable(cxxopts) -add_executable(benchmark_genai benchmark_genai.cpp ../utils/read_prompt_from_file.cpp) +add_executable(benchmark_genai benchmark_genai.cpp read_prompt_from_file.cpp) target_link_libraries(benchmark_genai PRIVATE openvino::genai cxxopts::cxxopts) set_target_properties(benchmark_genai PROPERTIES # Ensure out of box LC_RPATH on macOS with SIP diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 2f4ce1dc31..571132f164 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -3,7 +3,7 @@ #include "openvino/genai/llm_pipeline.hpp" #include -#include "../utils/read_prompt_from_file.h" +#include "read_prompt_from_file.h" int main(int argc, char* argv[]) try { cxxopts::Options options("benchmark_vanilla_genai", "Help command"); diff --git a/samples/cpp/utils/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp similarity index 100% rename from samples/cpp/utils/read_prompt_from_file.cpp rename to samples/cpp/text_generation/read_prompt_from_file.cpp diff --git a/samples/cpp/utils/read_prompt_from_file.h b/samples/cpp/text_generation/read_prompt_from_file.h similarity index 100% rename from samples/cpp/utils/read_prompt_from_file.h rename to samples/cpp/text_generation/read_prompt_from_file.h diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt index 32d4b1e60f..3093f3e0aa 100644 --- a/samples/cpp/visual_language_chat/CMakeLists.txt +++ b/samples/cpp/visual_language_chat/CMakeLists.txt @@ -44,8 +44,7 @@ install(TARGETS encrypted_model_vlm EXCLUDE_FROM_ALL) # create benchmark executable - -add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp ../utils/read_prompt_from_file.cpp) +add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp ../text_generation/read_prompt_from_file.cpp) target_include_directories(benchmark_vlm PRIVATE "${CMAKE_CURRENT_SOUCE_DIR}" "${CMAKE_BINARY_DIR}") target_link_libraries(benchmark_vlm PRIVATE openvino::genai cxxopts::cxxopts) set_target_properties(benchmark_vlm PROPERTIES diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index ef38b14602..7232a1c152 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -8,7 +8,7 @@ #include "load_image.hpp" #include -#include "../utils/read_prompt_from_file.h" +#include "../text_generation/read_prompt_from_file.h" int main(int argc, char* argv[]) try { cxxopts::Options options("benchmark_vlm", "Help command"); From 5585335ea1e49eaca3bcd4a339022b18b040ae5f Mon Sep 17 00:00:00 2001 From: wgzintel Date: Thu, 15 May 2025 16:08:38 +0800 Subject: [PATCH 08/27] Output token size in benchmark_genai.cpp --- samples/cpp/text_generation/benchmark_genai.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 571132f164..fe6507d988 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -71,6 +71,7 @@ int main(int argc, char* argv[]) try { } std::cout << std::fixed << std::setprecision(2); + std::cout << "Output token size:" << res.perf_metrics.get_num_generated_tokens() << std::endl; std::cout << "Load time: " << metrics.get_load_time() << " ms" << std::endl; std::cout << "Generate time: " << metrics.get_generate_duration().mean << " ± " << metrics.get_generate_duration().std << " ms" << std::endl; std::cout << "Tokenization time: " << metrics.get_tokenization_duration().mean << " ± " << metrics.get_tokenization_duration().std << " ms" << std::endl; From f70ab0d945a0834347e31d6cf73aeaec01dfb867 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Wed, 21 May 2025 11:16:18 +0800 Subject: [PATCH 09/27] print ov version --- samples/cpp/text_generation/benchmark_genai.cpp | 2 ++ samples/cpp/visual_language_chat/benchmark_vlm.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index fe6507d988..763fc5a88e 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -42,6 +42,8 @@ int main(int argc, char* argv[]) try { prompt = utils::read_prompt(result["prompt_file"].as()); } + std::cout << ov::get_openvino_version() << std::endl; + ov::genai::GenerationConfig config; config.max_new_tokens = result["max_new_tokens"].as(); diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index 7232a1c152..b0bfefae55 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -43,6 +43,8 @@ int main(int argc, char* argv[]) try { prompt = utils::read_prompt(result["prompt_file"].as()); } + std::cout << ov::get_openvino_version() << std::endl; + const std::string models_path = result["model"].as(); const std::string image_path = result["image"].as(); std::string device = result["device"].as(); From b6240fdf500bbc40f321edcc0896767a8318f66d Mon Sep 17 00:00:00 2001 From: wgzintel Date: Wed, 21 May 2025 18:50:04 +0800 Subject: [PATCH 10/27] using load_image() in optimum pipeline --- tools/llm_bench/task/visual_language_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 71c6a229ef..5e1a034bef 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -42,9 +42,9 @@ def run_visual_language_generation_optimum( entry = Path(input_data["media"]) if entry.is_dir(): for file in sorted(entry.iterdir()): - images.append(load_image_genai(str(file))) + images.append(load_image(str(file))) else: - images.append(load_image_genai(input_data["media"])) + images.append(load_image(input_data["media"])) prompts.append(input_data["prompt"]) if args["output_dir"] is not None and num == 0: From e004ceae5ba7a1bcdaf8c62088293779d598f0d8 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Thu, 22 May 2025 15:13:50 +0800 Subject: [PATCH 11/27] Make it an error if prompt_file and prompt are given at the same time --- samples/cpp/text_generation/README.md | 1 + .../cpp/text_generation/benchmark_genai.cpp | 28 +++++++++++++------ samples/cpp/visual_language_chat/README.md | 1 + .../visual_language_chat/benchmark_vlm.cpp | 25 +++++++++++++---- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/samples/cpp/text_generation/README.md b/samples/cpp/text_generation/README.md index ab4fc030b5..3663b9aacd 100644 --- a/samples/cpp/text_generation/README.md +++ b/samples/cpp/text_generation/README.md @@ -162,6 +162,7 @@ For more information how performance metrics are calculated please follow [perfo #### Options - `-m, --model`: Path to the model and tokenizers base directory. - `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `--pf, --prompt_file` Read prompt from file. - `--nw, --num_warmup` (default: `1`): Number of warmup iterations. - `--mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. - `-n, --num_iter` (default: `3`): Number of iterations. diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 763fc5a88e..4c71f34f92 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -10,7 +10,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()) - ("p,prompt", "Prompt", cxxopts::value()->default_value("The Sky is blue because")) + ("p,prompt", "One prompt", cxxopts::value()) ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) @@ -32,18 +32,28 @@ int main(int argc, char* argv[]) try { return EXIT_SUCCESS; } - std::string prompt = result["prompt"].as(); + std::string prompt; + if (result.count("prompt") && result.count("prompt_file")) { + std::cout << "Prompt and prompt file should not exist together!" << std::endl; + return EXIT_FAILURE; + } else { + if (result.count("prompt")) { + prompt = result["prompt"].as(); + } + else if (result.count("prompt_file")) { + prompt = utils::read_prompt(result["prompt_file"].as()); + } + else { + prompt = "The Sky is blue because"; + std::cout << "Run with default prompt:" << prompt << std::endl; + } + } + const std::string models_path = result["model"].as(); std::string device = result["device"].as(); size_t num_warmup = result["num_warmup"].as(); size_t num_iter = result["num_iter"].as(); - if (result.count("prompt_file")) { - prompt = utils::read_prompt(result["prompt_file"].as()); - } - - std::cout << ov::get_openvino_version() << std::endl; - ov::genai::GenerationConfig config; config.max_new_tokens = result["max_new_tokens"].as(); @@ -51,6 +61,8 @@ int main(int argc, char* argv[]) try { scheduler_config.enable_prefix_caching = false; scheduler_config.max_num_batched_tokens = 2147483647; + std::cout << ov::get_openvino_version() << std::endl; + ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); auto input_data = pipe.get_tokenizer().encode(prompt); diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md index 18424be9be..d96a7e67ae 100644 --- a/samples/cpp/visual_language_chat/README.md +++ b/samples/cpp/visual_language_chat/README.md @@ -41,6 +41,7 @@ benchmark_vlm [OPTIONS] - `-m, --model`(default: `.`): Path to the model and tokenizers base directory. - `-p, --prompt` (default: `What is on the image?`): The prompt to generate text. +- `--pf, --prompt_file` Read prompt from file. - `-i, --image` (default: `image.jpg`): Path to the image. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index b0bfefae55..9d4a7b1dd4 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -15,7 +15,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()->default_value(".")) - ("p,prompt", "Prompt", cxxopts::value()->default_value("What is on the image?")) + ("p,prompt", "One prompt", cxxopts::value()) ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("i,image", "Image", cxxopts::value()->default_value("image.jpg")) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) @@ -38,13 +38,23 @@ int main(int argc, char* argv[]) try { return EXIT_SUCCESS; } - std::string prompt = result["prompt"].as(); - if (result.count("prompt_file")) { - prompt = utils::read_prompt(result["prompt_file"].as()); + std::string prompt; + if (result.count("prompt") && result.count("prompt_file")) { + std::cout << "Prompt and prompt file should not exist together!" << std::endl; + return EXIT_FAILURE; + } else { + if (result.count("prompt")) { + prompt = result["prompt"].as(); + } + else if (result.count("prompt_file")) { + prompt = utils::read_prompt(result["prompt_file"].as()); + } + else { + prompt = "What is on the image?"; + std::cout << "Run with default prompt:" << prompt << std::endl; + } } - std::cout << ov::get_openvino_version() << std::endl; - const std::string models_path = result["model"].as(); const std::string image_path = result["image"].as(); std::string device = result["device"].as(); @@ -59,6 +69,9 @@ int main(int argc, char* argv[]) try { ov::genai::SchedulerConfig scheduler_config; scheduler_config.enable_prefix_caching = false; scheduler_config.max_num_batched_tokens = 2147483647; + + std::cout << ov::get_openvino_version() << std::endl; + ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); auto input_data = pipe.get_tokenizer().encode(prompt); From 10c99404e1b5ec601670e2d28fdc46c6b6b61bae Mon Sep 17 00:00:00 2001 From: wgzintel Date: Thu, 22 May 2025 17:03:33 +0800 Subject: [PATCH 12/27] revert get prompt from default args --- samples/cpp/text_generation/benchmark_genai.cpp | 13 ++++--------- samples/cpp/visual_language_chat/benchmark_vlm.cpp | 13 ++++--------- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 4c71f34f92..12b69840da 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -10,7 +10,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()) - ("p,prompt", "One prompt", cxxopts::value()) + ("p,prompt", "Prompt", cxxopts::value()->default_value("The Sky is blue because")) ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) @@ -37,15 +37,10 @@ int main(int argc, char* argv[]) try { std::cout << "Prompt and prompt file should not exist together!" << std::endl; return EXIT_FAILURE; } else { - if (result.count("prompt")) { - prompt = result["prompt"].as(); - } - else if (result.count("prompt_file")) { + if (result.count("prompt_file")) { prompt = utils::read_prompt(result["prompt_file"].as()); - } - else { - prompt = "The Sky is blue because"; - std::cout << "Run with default prompt:" << prompt << std::endl; + } else { + prompt = result["prompt"].as(); } } diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index 9d4a7b1dd4..a243dda075 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -15,7 +15,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()->default_value(".")) - ("p,prompt", "One prompt", cxxopts::value()) + ("p,prompt", "Prompt", cxxopts::value()->default_value("What is on the image?")) ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("i,image", "Image", cxxopts::value()->default_value("image.jpg")) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) @@ -43,15 +43,10 @@ int main(int argc, char* argv[]) try { std::cout << "Prompt and prompt file should not exist together!" << std::endl; return EXIT_FAILURE; } else { - if (result.count("prompt")) { - prompt = result["prompt"].as(); - } - else if (result.count("prompt_file")) { + if (result.count("prompt_file")) { prompt = utils::read_prompt(result["prompt_file"].as()); - } - else { - prompt = "What is on the image?"; - std::cout << "Run with default prompt:" << prompt << std::endl; + } else { + prompt = result["prompt"].as(); } } From 9bb9081f3d0fd126709188a6ae35162a363df639 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Thu, 22 May 2025 17:18:14 +0800 Subject: [PATCH 13/27] Remove redundant code --- samples/cpp/visual_language_chat/benchmark_vlm.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index a243dda075..f08fea100e 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -3,8 +3,6 @@ #include #include -#include -#include #include "load_image.hpp" #include From e1e5326bc3613b51b797c55692f614f0d21b85f8 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Thu, 22 May 2025 22:31:25 +0800 Subject: [PATCH 14/27] get prompt token size from shape[1] --- samples/cpp/text_generation/benchmark_genai.cpp | 4 +--- samples/cpp/visual_language_chat/benchmark_vlm.cpp | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 12b69840da..38f45e02ac 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -61,11 +61,9 @@ int main(int argc, char* argv[]) try { ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); auto input_data = pipe.get_tokenizer().encode(prompt); - size_t prompt_token_size; + size_t prompt_token_size = 0; if (input_data.input_ids.get_shape().size() > 1) { prompt_token_size = input_data.input_ids.get_shape()[1]; - } else { - prompt_token_size = input_data.input_ids.get_size(); } std::cout << "Prompt token size:" << prompt_token_size << std::endl; diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index f08fea100e..118c8dec39 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -68,11 +68,9 @@ int main(int argc, char* argv[]) try { ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); auto input_data = pipe.get_tokenizer().encode(prompt); - size_t prompt_token_size; + size_t prompt_token_size = 0; if (input_data.input_ids.get_shape().size() > 1) { prompt_token_size = input_data.input_ids.get_shape()[1]; - } else { - prompt_token_size = input_data.input_ids.get_size(); } std::cout << "Number of images:" << images.size() << ", prompt token size:" << prompt_token_size << std::endl; From 6688a094e0fa90679980531cee2a75b7ccdb6c58 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Fri, 23 May 2025 11:04:27 +0800 Subject: [PATCH 15/27] remove if --- samples/cpp/text_generation/benchmark_genai.cpp | 9 +++++---- samples/cpp/text_generation/read_prompt_from_file.cpp | 6 +++++- samples/cpp/visual_language_chat/benchmark_vlm.cpp | 9 +++++---- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 38f45e02ac..d7f0a87ccc 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -43,6 +43,10 @@ int main(int argc, char* argv[]) try { prompt = result["prompt"].as(); } } + if (prompt == "") { + std::cout << "Prompt is empty!" << std::endl; + return EXIT_FAILURE; + } const std::string models_path = result["model"].as(); std::string device = result["device"].as(); @@ -61,10 +65,7 @@ int main(int argc, char* argv[]) try { ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); auto input_data = pipe.get_tokenizer().encode(prompt); - size_t prompt_token_size = 0; - if (input_data.input_ids.get_shape().size() > 1) { - prompt_token_size = input_data.input_ids.get_shape()[1]; - } + size_t prompt_token_size = input_data.input_ids.get_shape()[1]; std::cout << "Prompt token size:" << prompt_token_size << std::endl; for (size_t i = 0; i < num_warmup; i++) diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp index 50d5087c74..2b41846ef6 100644 --- a/samples/cpp/text_generation/read_prompt_from_file.cpp +++ b/samples/cpp/text_generation/read_prompt_from_file.cpp @@ -1,17 +1,21 @@ // Copyright (C) 2023-2025 Intel Corporation // SPDX-License-Identifier: Apache-2.0 +#include #include #include "read_prompt_from_file.h" std::string utils::read_prompt(const std::string& file_path) { - std::string prompt; + std::string prompt = ""; std::ifstream file(file_path); if (file.is_open()) { std::stringstream buffer; buffer << file.rdbuf(); prompt = buffer.str(); file.close(); + } else { + // show message: + std::cout << "Error opening prompt file: " << file_path << std::endl; } return prompt; } \ No newline at end of file diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index 118c8dec39..7552233c37 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -47,6 +47,10 @@ int main(int argc, char* argv[]) try { prompt = result["prompt"].as(); } } + if (prompt == "") { + std::cout << "Prompt is empty!" << std::endl; + return EXIT_FAILURE; + } const std::string models_path = result["model"].as(); const std::string image_path = result["image"].as(); @@ -68,10 +72,7 @@ int main(int argc, char* argv[]) try { ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); auto input_data = pipe.get_tokenizer().encode(prompt); - size_t prompt_token_size = 0; - if (input_data.input_ids.get_shape().size() > 1) { - prompt_token_size = input_data.input_ids.get_shape()[1]; - } + size_t prompt_token_size = input_data.input_ids.get_shape()[1]; std::cout << "Number of images:" << images.size() << ", prompt token size:" << prompt_token_size << std::endl; for (size_t i = 0; i < num_warmup; i++) From 16faddc61e1ef4d54e5a6adde68d3c86677d1bee Mon Sep 17 00:00:00 2001 From: guozhong wang Date: Fri, 23 May 2025 21:58:55 +0800 Subject: [PATCH 16/27] Update samples/cpp/text_generation/benchmark_genai.cpp Co-authored-by: Vladimir Zlobin --- samples/cpp/text_generation/benchmark_genai.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index d7f0a87ccc..586bbdabb0 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -58,7 +58,7 @@ int main(int argc, char* argv[]) try { ov::genai::SchedulerConfig scheduler_config; scheduler_config.enable_prefix_caching = false; - scheduler_config.max_num_batched_tokens = 2147483647; + scheduler_config.max_num_batched_tokens = std::numeric_limits::max(); std::cout << ov::get_openvino_version() << std::endl; From f48ae43ac8afca733eb67840d7c80159ac2e84f4 Mon Sep 17 00:00:00 2001 From: guozhong wang Date: Fri, 23 May 2025 21:59:09 +0800 Subject: [PATCH 17/27] Update samples/cpp/visual_language_chat/benchmark_vlm.cpp Co-authored-by: Vladimir Zlobin --- samples/cpp/visual_language_chat/benchmark_vlm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index 7552233c37..0609d68f58 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -65,7 +65,7 @@ int main(int argc, char* argv[]) try { ov::genai::SchedulerConfig scheduler_config; scheduler_config.enable_prefix_caching = false; - scheduler_config.max_num_batched_tokens = 2147483647; + scheduler_config.max_num_batched_tokens = std::numeric_limits::max(); std::cout << ov::get_openvino_version() << std::endl; From 0936b5043e20b8d269cf072772de2676c3ce6626 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Tue, 27 May 2025 11:02:07 +0800 Subject: [PATCH 18/27] Update benchmark_genai.py, benchmark_vlm.py and readme --- samples/python/text_generation/README.md | 1 + .../python/text_generation/benchmark_genai.py | 31 +++++++++++-- samples/python/visual_language_chat/README.md | 1 + .../visual_language_chat/benchmark_vlm.py | 46 ++++++++++++++++--- 4 files changed, 69 insertions(+), 10 deletions(-) diff --git a/samples/python/text_generation/README.md b/samples/python/text_generation/README.md index 7d334df29e..91ec12e10e 100644 --- a/samples/python/text_generation/README.md +++ b/samples/python/text_generation/README.md @@ -154,6 +154,7 @@ For more information how performance metrics are calculated please follow [perfo #### Options - `-m, --model`: Path to the model and tokenizers base directory. - `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `-pf, --prompt_file` Read prompt from file. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. - `-n, --num_iter` (default: `3`): Number of iterations. diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py index d279ab95fc..f680b7adba 100755 --- a/samples/python/text_generation/benchmark_genai.py +++ b/samples/python/text_generation/benchmark_genai.py @@ -1,13 +1,17 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import sys import argparse import openvino_genai as ov_genai +from openvino import get_version def main(): + default_prompt = "The Sky is blue because" parser = argparse.ArgumentParser(description="Help command") parser.add_argument("-m", "--model", type=str, required=True, help="Path to model and tokenizers base directory") - parser.add_argument("-p", "--prompt", type=str, default="The Sky is blue because", help="Prompt") + parser.add_argument("-p", "--prompt", type=str, default=default_prompt, help="Prompt") + parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file") parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations") parser.add_argument("-mt", "--max_new_tokens", type=int, default=20, help="Maximal number of new tokens") @@ -15,9 +19,21 @@ def main(): args = parser.parse_args() + if args.prompt != default_prompt and args.prompt_file is not None: + raise RuntimeError(f'Prompt and prompt file should not exist together!') + else: + if args.prompt_file is not None: + with open(args.prompt_file, 'r', encoding='utf-8') as f: + prompt = [f.read()] + else: + prompt = [args.prompt] + if prompt == "": + raise RuntimeError(f'Prompt is empty!') + + print(f'openvino runtime version: {get_version()}') + # Perf metrics is stored in DecodedResults. # In order to get DecodedResults instead of a string input should be a list. - prompt = [args.prompt] models_path = args.model device = args.device num_warmup = args.num_warmup @@ -26,8 +42,16 @@ def main(): config = ov_genai.GenerationConfig() config.max_new_tokens = args.max_new_tokens - pipe = ov_genai.LLMPipeline(models_path, device) + scheduler_config = ov_genai.SchedulerConfig() + setattr(scheduler_config, 'enable_prefix_caching', False) + setattr(scheduler_config, 'max_num_batched_tokens', sys.maxsize) + + pipe = ov_genai.LLMPipeline(models_path, device, scheduler_config=scheduler_config) + input_data = pipe.get_tokenizer().encode(prompt) + prompt_token_size = input_data.input_ids.get_shape()[1] + print(f"Prompt token size: {prompt_token_size}") + for _ in range(num_warmup): pipe.generate(prompt, config) @@ -37,6 +61,7 @@ def main(): res = pipe.generate(prompt, config) perf_metrics += res.perf_metrics + print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}") print(f"Load time: {perf_metrics.get_load_time():.2f} ms") print(f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms") print(f"Tokenization time: {perf_metrics.get_tokenization_duration().mean:.2f} ± {perf_metrics.get_tokenization_duration().std:.2f} ms") diff --git a/samples/python/visual_language_chat/README.md b/samples/python/visual_language_chat/README.md index 098cbac630..d459bf93e7 100644 --- a/samples/python/visual_language_chat/README.md +++ b/samples/python/visual_language_chat/README.md @@ -41,6 +41,7 @@ python benchmark_vlm.py [OPTIONS] - `-m, --model`(default: `.`): Path to the model and tokenizers base directory. - `-p, --prompt` (default: `What is on the image?`): The prompt to generate text. +- `-pf, --prompt_file` Read prompt from file. - `-i, --image` (default: `image.jpg`): Path to the image. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. diff --git a/samples/python/visual_language_chat/benchmark_vlm.py b/samples/python/visual_language_chat/benchmark_vlm.py index cbce4197dd..e07943c736 100755 --- a/samples/python/visual_language_chat/benchmark_vlm.py +++ b/samples/python/visual_language_chat/benchmark_vlm.py @@ -2,11 +2,14 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import sys import argparse import openvino_genai as ov_genai from PIL import Image from openvino import Tensor +from pathlib import Path import numpy as np +from openvino import get_version def read_image(path: str) -> Tensor: @@ -22,11 +25,19 @@ def read_image(path: str) -> Tensor: image_data = np.array(pic) return Tensor(image_data) +def read_images(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + def main(): + default_prompt = "What is on the image?" parser = argparse.ArgumentParser(description="Help command") parser.add_argument("-m", "--model", type=str, help="Path to model and tokenizers base directory") - parser.add_argument("-p", "--prompt", type=str, default="The Sky is blue because", help="Prompt") + parser.add_argument("-p", "--prompt", type=str, default=default_prompt, help="Prompt") + parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file") parser.add_argument("-i", "--image", type=str, default="image.jpg", help="Image") parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations") @@ -35,11 +46,23 @@ def main(): args = parser.parse_args() + if args.prompt != default_prompt and args.prompt_file is not None: + raise RuntimeError(f'Prompt and prompt file should not exist together!') + else: + if args.prompt_file is not None: + with open(args.prompt_file, 'r', encoding='utf-8') as f: + prompt = f.read() + else: + prompt = args.prompt + if prompt == "": + raise RuntimeError(f'Prompt is empty!') + + print(f'openvino runtime version: {get_version()}') + # Perf metrics is stored in VLMDecodedResults. # In order to get VLMDecodedResults instead of a string input should be a list. - prompt = args.prompt models_path = args.model - image = read_image(args.image) + images = read_images(args.image) device = args.device num_warmup = args.num_warmup num_iter = args.num_iter @@ -47,17 +70,26 @@ def main(): config = ov_genai.GenerationConfig() config.max_new_tokens = args.max_new_tokens - pipe = ov_genai.VLMPipeline(models_path, device) + scheduler_config = ov_genai.SchedulerConfig() + setattr(scheduler_config, 'enable_prefix_caching', False) + setattr(scheduler_config, 'max_num_batched_tokens', sys.maxsize) + + pipe = ov_genai.VLMPipeline(models_path, device, scheduler_config=scheduler_config) + + input_data = pipe.get_tokenizer().encode(prompt) + prompt_token_size = input_data.input_ids.get_shape()[1] + print(f"Number of images:{len(images)}, Prompt token size: {prompt_token_size}") for _ in range(num_warmup): - pipe.generate(prompt, images=image, generation_config=config) + pipe.generate(prompt, images=images, generation_config=config) - res = pipe.generate(prompt, images=image, generation_config=config) + res = pipe.generate(prompt, images=images, generation_config=config) perf_metrics = res.perf_metrics for _ in range(num_iter - 1): - res = pipe.generate(prompt, images=image, generation_config=config) + res = pipe.generate(prompt, images=images, generation_config=config) perf_metrics += res.perf_metrics + print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}") print(f"Load time: {perf_metrics.get_load_time():.2f} ms") print( f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms") From f48424435c1a9c4d7bc82f06e604bdd5640d62fb Mon Sep 17 00:00:00 2001 From: guozhong wang Date: Fri, 13 Jun 2025 20:43:16 +0800 Subject: [PATCH 19/27] Update samples/cpp/text_generation/read_prompt_from_file.cpp Co-authored-by: Vladimir Zlobin --- samples/cpp/text_generation/read_prompt_from_file.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp index 2b41846ef6..ac7caa5f74 100644 --- a/samples/cpp/text_generation/read_prompt_from_file.cpp +++ b/samples/cpp/text_generation/read_prompt_from_file.cpp @@ -11,8 +11,7 @@ std::string utils::read_prompt(const std::string& file_path) { if (file.is_open()) { std::stringstream buffer; buffer << file.rdbuf(); - prompt = buffer.str(); - file.close(); + return buffer.str(); } else { // show message: std::cout << "Error opening prompt file: " << file_path << std::endl; From 624e8fc39de4c078d3e837182a6348fc47dc489c Mon Sep 17 00:00:00 2001 From: wgzintel Date: Tue, 17 Jun 2025 11:20:38 +0800 Subject: [PATCH 20/27] default values --- samples/cpp/text_generation/benchmark_genai.cpp | 6 +++--- samples/cpp/text_generation/read_prompt_from_file.cpp | 5 +++-- samples/cpp/visual_language_chat/benchmark_vlm.cpp | 8 ++++---- samples/python/text_generation/benchmark_genai.py | 9 ++++----- samples/python/visual_language_chat/benchmark_vlm.py | 9 ++++----- 5 files changed, 18 insertions(+), 19 deletions(-) diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 586bbdabb0..078dd88969 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -10,7 +10,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()) - ("p,prompt", "Prompt", cxxopts::value()->default_value("The Sky is blue because")) + ("p,prompt", "Prompt", cxxopts::value()->default_value("")) ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) @@ -40,10 +40,10 @@ int main(int argc, char* argv[]) try { if (result.count("prompt_file")) { prompt = utils::read_prompt(result["prompt_file"].as()); } else { - prompt = result["prompt"].as(); + prompt = result["prompt"].as().empty() ? "The Sky is blue because" : result["prompt"].as(); } } - if (prompt == "") { + if (prompt.empty()) { std::cout << "Prompt is empty!" << std::endl; return EXIT_FAILURE; } diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp index ac7caa5f74..1940994e3c 100644 --- a/samples/cpp/text_generation/read_prompt_from_file.cpp +++ b/samples/cpp/text_generation/read_prompt_from_file.cpp @@ -13,8 +13,9 @@ std::string utils::read_prompt(const std::string& file_path) { buffer << file.rdbuf(); return buffer.str(); } else { - // show message: - std::cout << "Error opening prompt file: " << file_path << std::endl; + std::stringstream error_message; + error_message << "Error opening prompt file: '" << file_path << "'"; + throw std::runtime_error{error_message.str()}; } return prompt; } \ No newline at end of file diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index 0609d68f58..822f71348b 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -13,7 +13,7 @@ int main(int argc, char* argv[]) try { options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()->default_value(".")) - ("p,prompt", "Prompt", cxxopts::value()->default_value("What is on the image?")) + ("p,prompt", "Prompt", cxxopts::value()->default_value("")) ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("i,image", "Image", cxxopts::value()->default_value("image.jpg")) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) @@ -44,13 +44,13 @@ int main(int argc, char* argv[]) try { if (result.count("prompt_file")) { prompt = utils::read_prompt(result["prompt_file"].as()); } else { - prompt = result["prompt"].as(); + prompt = result["prompt"].as().empty() ? "What is on the image?" : result["prompt"].as(); } } - if (prompt == "") { + if (prompt.empty()) { std::cout << "Prompt is empty!" << std::endl; return EXIT_FAILURE; - } + } const std::string models_path = result["model"].as(); const std::string image_path = result["image"].as(); diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py index f680b7adba..a3bcac3870 100755 --- a/samples/python/text_generation/benchmark_genai.py +++ b/samples/python/text_generation/benchmark_genai.py @@ -7,10 +7,9 @@ from openvino import get_version def main(): - default_prompt = "The Sky is blue because" parser = argparse.ArgumentParser(description="Help command") parser.add_argument("-m", "--model", type=str, required=True, help="Path to model and tokenizers base directory") - parser.add_argument("-p", "--prompt", type=str, default=default_prompt, help="Prompt") + parser.add_argument("-p", "--prompt", type=str, default=None, help="Prompt") parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file") parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations") @@ -19,15 +18,15 @@ def main(): args = parser.parse_args() - if args.prompt != default_prompt and args.prompt_file is not None: + if args.prompt is not None and args.prompt_file is not None: raise RuntimeError(f'Prompt and prompt file should not exist together!') else: if args.prompt_file is not None: with open(args.prompt_file, 'r', encoding='utf-8') as f: prompt = [f.read()] else: - prompt = [args.prompt] - if prompt == "": + prompt = ['The Sky is blue because'] if args.prompt is None else [args.prompt] + if len(prompt) == 0: raise RuntimeError(f'Prompt is empty!') print(f'openvino runtime version: {get_version()}') diff --git a/samples/python/visual_language_chat/benchmark_vlm.py b/samples/python/visual_language_chat/benchmark_vlm.py index e07943c736..c9ce84c340 100755 --- a/samples/python/visual_language_chat/benchmark_vlm.py +++ b/samples/python/visual_language_chat/benchmark_vlm.py @@ -33,10 +33,9 @@ def read_images(path: str) -> list[Tensor]: def main(): - default_prompt = "What is on the image?" parser = argparse.ArgumentParser(description="Help command") parser.add_argument("-m", "--model", type=str, help="Path to model and tokenizers base directory") - parser.add_argument("-p", "--prompt", type=str, default=default_prompt, help="Prompt") + parser.add_argument("-p", "--prompt", type=str, default=None, help="Prompt") parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file") parser.add_argument("-i", "--image", type=str, default="image.jpg", help="Image") parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") @@ -46,15 +45,15 @@ def main(): args = parser.parse_args() - if args.prompt != default_prompt and args.prompt_file is not None: + if args.prompt is not None and args.prompt_file is not None: raise RuntimeError(f'Prompt and prompt file should not exist together!') else: if args.prompt_file is not None: with open(args.prompt_file, 'r', encoding='utf-8') as f: prompt = f.read() else: - prompt = args.prompt - if prompt == "": + prompt = 'What is on the image?' if args.prompt is None else args.prompt + if len(prompt) == 0: raise RuntimeError(f'Prompt is empty!') print(f'openvino runtime version: {get_version()}') From 5121bfb968b16f5c69961ecad28106bfc71e7b0e Mon Sep 17 00:00:00 2001 From: wgzintel Date: Tue, 17 Jun 2025 15:50:16 +0800 Subject: [PATCH 21/27] Use the regular assignment for scheduler_config --- samples/cpp/text_generation/README.md | 2 +- samples/cpp/visual_language_chat/README.md | 2 +- samples/python/text_generation/README.md | 2 +- samples/python/text_generation/benchmark_genai.py | 4 ++-- samples/python/visual_language_chat/README.md | 2 +- samples/python/visual_language_chat/benchmark_vlm.py | 4 ++-- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/samples/cpp/text_generation/README.md b/samples/cpp/text_generation/README.md index 3663b9aacd..91a0fd328f 100644 --- a/samples/cpp/text_generation/README.md +++ b/samples/cpp/text_generation/README.md @@ -161,7 +161,7 @@ For more information how performance metrics are calculated please follow [perfo ``` #### Options - `-m, --model`: Path to the model and tokenizers base directory. -- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `-p, --prompt` (default: ''): The prompt to generate text. If without `-p` and `--pf`, the default prompt is `"The Sky is blue because"` - `--pf, --prompt_file` Read prompt from file. - `--nw, --num_warmup` (default: `1`): Number of warmup iterations. - `--mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md index d96a7e67ae..58065bc070 100644 --- a/samples/cpp/visual_language_chat/README.md +++ b/samples/cpp/visual_language_chat/README.md @@ -40,7 +40,7 @@ benchmark_vlm [OPTIONS] ### Options - `-m, --model`(default: `.`): Path to the model and tokenizers base directory. -- `-p, --prompt` (default: `What is on the image?`): The prompt to generate text. +- `-p, --prompt` (default: ''): The prompt to generate text. If without `-p` and `--pf`, the default prompt is `"What is on the image?"` - `--pf, --prompt_file` Read prompt from file. - `-i, --image` (default: `image.jpg`): Path to the image. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. diff --git a/samples/python/text_generation/README.md b/samples/python/text_generation/README.md index 91ec12e10e..cb0ef68f70 100644 --- a/samples/python/text_generation/README.md +++ b/samples/python/text_generation/README.md @@ -153,7 +153,7 @@ For more information how performance metrics are calculated please follow [perfo ``` #### Options - `-m, --model`: Path to the model and tokenizers base directory. -- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `-p, --prompt` (default: `None`): The prompt to generate text. If without `-p` and `-pf`, the default prompt is `"The Sky is blue because"` - `-pf, --prompt_file` Read prompt from file. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py index a3bcac3870..c3a55a9d2e 100755 --- a/samples/python/text_generation/benchmark_genai.py +++ b/samples/python/text_generation/benchmark_genai.py @@ -42,8 +42,8 @@ def main(): config.max_new_tokens = args.max_new_tokens scheduler_config = ov_genai.SchedulerConfig() - setattr(scheduler_config, 'enable_prefix_caching', False) - setattr(scheduler_config, 'max_num_batched_tokens', sys.maxsize) + scheduler_config.enable_prefix_caching = False + scheduler_config.max_num_batched_tokens = sys.maxsize pipe = ov_genai.LLMPipeline(models_path, device, scheduler_config=scheduler_config) diff --git a/samples/python/visual_language_chat/README.md b/samples/python/visual_language_chat/README.md index d459bf93e7..3457f51318 100644 --- a/samples/python/visual_language_chat/README.md +++ b/samples/python/visual_language_chat/README.md @@ -40,7 +40,7 @@ python benchmark_vlm.py [OPTIONS] ### Options - `-m, --model`(default: `.`): Path to the model and tokenizers base directory. -- `-p, --prompt` (default: `What is on the image?`): The prompt to generate text. +- `-p, --prompt` (default: `None`): The prompt to generate text. If without `-p` and `-pf`, the default prompt is `"What is on the image?"` - `-pf, --prompt_file` Read prompt from file. - `-i, --image` (default: `image.jpg`): Path to the image. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. diff --git a/samples/python/visual_language_chat/benchmark_vlm.py b/samples/python/visual_language_chat/benchmark_vlm.py index c9ce84c340..9499e947bd 100755 --- a/samples/python/visual_language_chat/benchmark_vlm.py +++ b/samples/python/visual_language_chat/benchmark_vlm.py @@ -70,8 +70,8 @@ def main(): config.max_new_tokens = args.max_new_tokens scheduler_config = ov_genai.SchedulerConfig() - setattr(scheduler_config, 'enable_prefix_caching', False) - setattr(scheduler_config, 'max_num_batched_tokens', sys.maxsize) + scheduler_config.enable_prefix_caching = False + scheduler_config.max_num_batched_tokens = sys.maxsize pipe = ov_genai.VLMPipeline(models_path, device, scheduler_config=scheduler_config) From d4da4b6eb245d002f25157e1ce8e0b7bbb999a40 Mon Sep 17 00:00:00 2001 From: guozhong wang Date: Wed, 18 Jun 2025 09:06:35 +0800 Subject: [PATCH 22/27] Update samples/cpp/text_generation/read_prompt_from_file.cpp Co-authored-by: Vladimir Zlobin --- samples/cpp/text_generation/read_prompt_from_file.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp index 1940994e3c..87e76e90b3 100644 --- a/samples/cpp/text_generation/read_prompt_from_file.cpp +++ b/samples/cpp/text_generation/read_prompt_from_file.cpp @@ -6,7 +6,6 @@ #include "read_prompt_from_file.h" std::string utils::read_prompt(const std::string& file_path) { - std::string prompt = ""; std::ifstream file(file_path); if (file.is_open()) { std::stringstream buffer; From 1b8ecba4ec157f625a54f24203d9f3268aa895e1 Mon Sep 17 00:00:00 2001 From: guozhong wang Date: Wed, 18 Jun 2025 09:07:07 +0800 Subject: [PATCH 23/27] Update tools/llm_bench/task/visual_language_generation.py Co-authored-by: Sofya Balandina --- tools/llm_bench/task/visual_language_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 7df3026325..60ee0566c7 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -198,7 +198,7 @@ def run_visual_language_generation_genai( prompts = [] inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs for input_data in inputs: - if "media" in input_data: + if input_data.get("media", None): if input_data["media"] is not None: entry = Path(input_data["media"]) if entry.is_dir(): From 9cc975bd745a8d3e5e26985d2763ee25e3bf5f26 Mon Sep 17 00:00:00 2001 From: guozhong wang Date: Wed, 18 Jun 2025 09:07:40 +0800 Subject: [PATCH 24/27] Update tools/llm_bench/task/visual_language_generation.py Co-authored-by: Sofya Balandina --- tools/llm_bench/task/visual_language_generation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 60ee0566c7..4b095712bd 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -223,11 +223,8 @@ def run_visual_language_generation_genai( gen_config.do_sample = False gen_config.ignore_eos = True kwargs = {} - if len(images) > 1: - # multi images + if len(images) >= 1: kwargs["images"] = images - elif len(images) == 1: - kwargs["images"] = images[0] start = time.perf_counter() generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs) end = time.perf_counter() From 3e3a3216c1c7508820a24ea8a9e9c261ee10cd60 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Wed, 18 Jun 2025 10:43:35 +0800 Subject: [PATCH 25/27] print input image nums for vlm --- tools/llm_bench/task/visual_language_generation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 4b095712bd..3f638e5807 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -46,7 +46,8 @@ def run_visual_language_generation_optimum( else: images.append(load_image(input_data["media"])) prompts.append(input_data["prompt"]) - + prefix = '[warm-up]' if num == 0 else '[{}]'.format(num) + log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}') if args["output_dir"] is not None and num == 0: for bs_index, in_text in enumerate(prompts): llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id) @@ -225,6 +226,8 @@ def run_visual_language_generation_genai( kwargs = {} if len(images) >= 1: kwargs["images"] = images + prefix = '[warm-up]' if num == 0 else '[{}]'.format(num) + log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}') start = time.perf_counter() generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs) end = time.perf_counter() From 2c6872fc070b856bf88bbc3be2d87e1d02bc5659 Mon Sep 17 00:00:00 2001 From: wgzintel Date: Wed, 18 Jun 2025 10:47:25 +0800 Subject: [PATCH 26/27] Remove the corresponding return --- samples/cpp/text_generation/read_prompt_from_file.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp index 87e76e90b3..7559c2d1db 100644 --- a/samples/cpp/text_generation/read_prompt_from_file.cpp +++ b/samples/cpp/text_generation/read_prompt_from_file.cpp @@ -16,5 +16,4 @@ std::string utils::read_prompt(const std::string& file_path) { error_message << "Error opening prompt file: '" << file_path << "'"; throw std::runtime_error{error_message.str()}; } - return prompt; } \ No newline at end of file From 1a13411854f6b48d5559921ecb45df55d88acccf Mon Sep 17 00:00:00 2001 From: wgzintel Date: Wed, 18 Jun 2025 23:36:26 +0800 Subject: [PATCH 27/27] remove if input_data.get("media", None) --- .../task/visual_language_generation.py | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 3f638e5807..674f691d4a 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -37,14 +37,13 @@ def run_visual_language_generation_optimum( prompts = [] inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs for input_data in inputs: - if "media" in input_data: - if input_data["media"] is not None: - entry = Path(input_data["media"]) - if entry.is_dir(): - for file in sorted(entry.iterdir()): - images.append(load_image(str(file))) - else: - images.append(load_image(input_data["media"])) + if input_data.get("media", None): + entry = Path(input_data["media"]) + if entry.is_dir(): + for file in sorted(entry.iterdir()): + images.append(load_image(str(file))) + else: + images.append(load_image(input_data["media"])) prompts.append(input_data["prompt"]) prefix = '[warm-up]' if num == 0 else '[{}]'.format(num) log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}') @@ -200,13 +199,12 @@ def run_visual_language_generation_genai( inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs for input_data in inputs: if input_data.get("media", None): - if input_data["media"] is not None: - entry = Path(input_data["media"]) - if entry.is_dir(): - for file in sorted(entry.iterdir()): - images.append(load_image_genai(str(file))) - else: - images.append(load_image_genai(input_data["media"])) + entry = Path(input_data["media"]) + if entry.is_dir(): + for file in sorted(entry.iterdir()): + images.append(load_image_genai(str(file))) + else: + images.append(load_image_genai(input_data["media"])) prompts.append(input_data["prompt"]) if args["output_dir"] is not None and num == 0: for bs_index, in_text in enumerate(prompts):