From bffaa940b694536880c672a55d1e57e4e59daa87 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Mon, 12 May 2025 17:39:15 +0800
Subject: [PATCH 01/27] support multi images for vlm test

---
 .../cpp/text_generation/benchmark_genai.cpp   | 25 +++++++++++++++-
 .../visual_language_chat/benchmark_vlm.cpp    | 29 +++++++++++++++----
 .../llm_bench/llm_bench_utils/model_utils.py  | 14 +++++----
 .../task/visual_language_generation.py        |  9 ++++--
 4 files changed, 64 insertions(+), 13 deletions(-)
diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 4a8c8d0723..fb199d3abc 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -3,6 +3,20 @@
 
 #include "openvino/genai/llm_pipeline.hpp"
 #include <cxxopts.hpp>
+#include <fstream>
+#include <sstream>
+
+std::string read_prompt(const std::string& file_path) {
+    std::string prompt;
+    std::ifstream file(file_path);
+    if (file.is_open()) {
+        std::stringstream buffer;
+        buffer << file.rdbuf();
+        prompt = buffer.str();
+        file.close();        
+    }
+    return prompt;
+}
 
 int main(int argc, char* argv[]) try {
     cxxopts::Options options("benchmark_vanilla_genai", "Help command");
@@ -10,6 +24,7 @@ int main(int argc, char* argv[]) try {
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>())
     ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("The Sky is blue because"))
+    ("pf,promptfile", "Prompt from file")
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
     ("n,num_iter", "Number of iterations", cxxopts::value<size_t>()->default_value(std::to_string(3)))
     ("mt,max_new_tokens", "Maximal number of new tokens", cxxopts::value<size_t>()->default_value(std::to_string(20)))
@@ -36,10 +51,18 @@ int main(int argc, char* argv[]) try {
     size_t num_warmup = result["num_warmup"].as<size_t>();
     size_t num_iter = result["num_iter"].as<size_t>();
 
+    if (result.count("promptfile")) {
+        prompt = read_prompt(result["promptfile"].as<std::string>());
+    }
+
     ov::genai::GenerationConfig config;
     config.max_new_tokens = result["max_new_tokens"].as<size_t>();
 
-    ov::genai::LLMPipeline pipe(models_path, device);
+    ov::genai::SchedulerConfig scheduler_config;
+    scheduler_config.enable_prefix_caching = false;
+    scheduler_config.max_num_batched_tokens = 2147483647;
+
+    ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
 
     for (size_t i = 0; i < num_warmup; i++)
         pipe.generate(prompt, config);
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index 8467738307..ccb5dfc23f 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -3,10 +3,23 @@
 
 #include <cxxopts.hpp>
 #include <filesystem>
+#include <sstream>
+#include <iostream>
 
 #include "load_image.hpp"
 #include <openvino/genai/visual_language/pipeline.hpp>
 
+std::vector<ov::Tensor> parse_all_images(const std::string &input) {
+    std::vector<ov::Tensor> images;
+    std::stringstream ss(input);
+    std::string image_path;
+    while (std::getline(ss, image_path, ';')) {
+        ov::Tensor image = utils::load_image(image_path);
+        images.push_back(image);
+        std::cout << "input image:" << image_path << std::endl;
+    }
+    return images;
+}
 
 int main(int argc, char* argv[]) try {
     cxxopts::Options options("benchmark_vlm", "Help command");
@@ -41,20 +54,26 @@ int main(int argc, char* argv[]) try {
     std::string device = result["device"].as<std::string>();
     size_t num_warmup = result["num_warmup"].as<size_t>();
     size_t num_iter = result["num_iter"].as<size_t>();
-    ov::Tensor image = utils::load_image(image_path);
+    std::vector<ov::Tensor> images;
+    images = parse_all_images(image_path);
   
     ov::genai::GenerationConfig config;
     config.max_new_tokens = result["max_new_tokens"].as<size_t>();
+    config.ignore_eos = true;
+
+    ov::genai::SchedulerConfig scheduler_config;
+    scheduler_config.enable_prefix_caching = false;
+    scheduler_config.max_num_batched_tokens = 2147483647;
 
-    ov::genai::VLMPipeline pipe(models_path, device);
+    ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
     
     for (size_t i = 0; i < num_warmup; i++)
-        pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config));
+        pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config));
     
-    auto res = pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config));
+    auto res = pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config));
     auto metrics = res.perf_metrics;
     for (size_t i = 0; i < num_iter - 1; i++) {
-        res = pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config));
+        res = pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config));
         metrics = metrics + res.perf_metrics;
     }
 
diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py
index 8ca59fe2eb..8a5e12a04f 100644
--- a/tools/llm_bench/llm_bench_utils/model_utils.py
+++ b/tools/llm_bench/llm_bench_utils/model_utils.py
@@ -321,11 +321,15 @@ def init_timestamp(num_iters, prompt_list, prompt_idx_list):
 
 
 def resolve_media_file_path(file_path, prompt_file_path):
-    if not file_path:
-        return file_path
-    if not (file_path.startswith("http://") or file_path.startswith("https://")):
-        return os.path.join(os.path.dirname(prompt_file_path), file_path.replace("./", ""))
-    return file_path
+    paths_ori = file_path.split(';')
+    paths_new = []
+    for path in paths_ori:
+        if not path:
+            continue
+        if not (path.startswith("http://") or path.startswith("https://")):
+            paths_new.append(os.path.join(os.path.dirname(prompt_file_path), path.replace("./", "")))
+    new_file_path = ";".join(paths_new)
+    return new_file_path
 
 
 def get_version_in_format_to_pars(version):
diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index d48239cfa3..8d51a57409 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -201,7 +201,9 @@ def run_visual_language_generation_genai(
     inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
     for input_data in inputs:
         if "media" in input_data:
-            images.append(load_image_genai(input_data["media"]))
+            image_paths = input_data["media"].split(';')
+            for path in image_paths:
+                images.append(load_image_genai(path))
         prompts.append(input_data["prompt"])
     if args["output_dir"] is not None and num == 0:
         for bs_index, in_text in enumerate(prompts):
@@ -221,7 +223,10 @@ def run_visual_language_generation_genai(
     if hasattr(gen_config, 'apply_chat_template'):
         gen_config.apply_chat_template = False
     kwargs = {}
-    if len(images) >= 1:
+    if len(images) > 1:
+        # multi images 
+        kwargs["images"] = images
+    elif len(images) == 1:
         kwargs["images"] = images[0]
     start = time.perf_counter()
     generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs)

From 9edc62845d3847b563db4e6ec5545685180ff747 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Tue, 13 May 2025 09:54:00 +0800
Subject: [PATCH 02/27] code format

---
 tools/llm_bench/task/visual_language_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 8d51a57409..1dcf3532ed 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -224,7 +224,7 @@ def run_visual_language_generation_genai(
         gen_config.apply_chat_template = False
     kwargs = {}
     if len(images) > 1:
-        # multi images 
+        # multi images
         kwargs["images"] = images
     elif len(images) == 1:
         kwargs["images"] = images[0]

From e512c3105feebbda6515dd52e1a8c27ddbf00098 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Tue, 13 May 2025 14:58:27 +0800
Subject: [PATCH 03/27] using ov::genai::images to convert images

---
 samples/cpp/visual_language_chat/benchmark_vlm.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index ccb5dfc23f..56021374a1 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -64,16 +64,15 @@ int main(int argc, char* argv[]) try {
     ov::genai::SchedulerConfig scheduler_config;
     scheduler_config.enable_prefix_caching = false;
     scheduler_config.max_num_batched_tokens = 2147483647;
-
     ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
     
     for (size_t i = 0; i < num_warmup; i++)
-        pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config));
+        pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config));
     
-    auto res = pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config));
+    auto res = pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config));
     auto metrics = res.perf_metrics;
     for (size_t i = 0; i < num_iter - 1; i++) {
-        res = pipe.generate(prompt, ov::genai::image(images), ov::genai::generation_config(config));
+        res = pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config));
         metrics = metrics + res.perf_metrics;
     }
 

From eed1dd702ccdf87f5ca8edcd9c32a21a04dd586f Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Tue, 13 May 2025 16:14:25 +0800
Subject: [PATCH 04/27] fix none Type

---
 .../llm_bench/llm_bench_utils/model_utils.py  | 22 +++++++++++--------
 .../task/visual_language_generation.py        |  7 +++---
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py
index 63c5277772..4b559b2d33 100644
--- a/tools/llm_bench/llm_bench_utils/model_utils.py
+++ b/tools/llm_bench/llm_bench_utils/model_utils.py
@@ -338,15 +338,19 @@ def init_timestamp(num_iters, prompt_list, prompt_idx_list):
 
 
 def resolve_media_file_path(file_path, prompt_file_path):
-    paths_ori = file_path.split(';')
-    paths_new = []
-    for path in paths_ori:
-        if not path:
-            continue
-        if not (path.startswith("http://") or path.startswith("https://")):
-            paths_new.append(os.path.join(os.path.dirname(prompt_file_path), path.replace("./", "")))
-    new_file_path = ";".join(paths_new)
-    return new_file_path
+    file_path = None
+    if file_path is not None:
+        paths_ori = file_path.split(';')
+        paths_new = []
+        for path in paths_ori:
+            if not path:
+                continue
+            if not (path.startswith("http://") or path.startswith("https://")):
+                paths_new.append(os.path.join(os.path.dirname(prompt_file_path), path.replace("./", "")))
+        new_file_path = ";".join(paths_new)
+        return new_file_path
+    else:
+        return file_path
 
 
 def get_version_in_format_to_pars(version):
diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 1dcf3532ed..58ad815158 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -201,9 +201,10 @@ def run_visual_language_generation_genai(
     inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
     for input_data in inputs:
         if "media" in input_data:
-            image_paths = input_data["media"].split(';')
-            for path in image_paths:
-                images.append(load_image_genai(path))
+            if input_data["media"] is not None:
+                image_paths = input_data["media"].split(';')
+                for path in image_paths:
+                    images.append(load_image_genai(path))
         prompts.append(input_data["prompt"])
     if args["output_dir"] is not None and num == 0:
         for bs_index, in_text in enumerate(prompts):

From 503b74e3d64051d8fb6cc580bd1cd856ddc870c0 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Tue, 13 May 2025 23:28:49 +0800
Subject: [PATCH 05/27] fix NoneTyPE in optimim-intel pipeline

---
 tools/llm_bench/task/visual_language_generation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 58ad815158..56e0ca9eb2 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -38,7 +38,8 @@ def run_visual_language_generation_optimum(
     inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
     for input_data in inputs:
         if "media" in input_data:
-            images.append(load_image(input_data["media"]))
+            if input_data["media"] is not None:
+                images.append(load_image(input_data["media"]))
         prompts.append(input_data["prompt"])
 
     if args["output_dir"] is not None and num == 0:

From fa68faa22d8901ca3e5b601f50b50b1881ec0ad6 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Wed, 14 May 2025 23:41:55 +0800
Subject: [PATCH 06/27] Support read images from dir

---
 samples/cpp/text_generation/CMakeLists.txt    |  2 +-
 .../cpp/text_generation/benchmark_genai.cpp   | 30 +++++++---------
 samples/cpp/utils/read_prompt_from_file.cpp   | 17 +++++++++
 samples/cpp/utils/read_prompt_from_file.h     | 11 ++++++
 .../cpp/visual_language_chat/CMakeLists.txt   |  2 +-
 .../visual_language_chat/benchmark_vlm.cpp    | 35 ++++++++++---------
 .../llm_bench/llm_bench_utils/model_utils.py  | 16 +++------
 .../task/visual_language_generation.py        | 20 +++++++----
 8 files changed, 80 insertions(+), 53 deletions(-)
 create mode 100644 samples/cpp/utils/read_prompt_from_file.cpp
 create mode 100644 samples/cpp/utils/read_prompt_from_file.h

diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt
index 4ed269d737..b4928dc2ea 100644
--- a/samples/cpp/text_generation/CMakeLists.txt
+++ b/samples/cpp/text_generation/CMakeLists.txt
@@ -46,7 +46,7 @@ FetchContent_Declare(cxxopts
     URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08)
 FetchContent_MakeAvailable(cxxopts)
 
-add_executable(benchmark_genai benchmark_genai.cpp)
+add_executable(benchmark_genai benchmark_genai.cpp ../utils/read_prompt_from_file.cpp)
 target_link_libraries(benchmark_genai PRIVATE openvino::genai cxxopts::cxxopts)
 set_target_properties(benchmark_genai PROPERTIES
     # Ensure out of box LC_RPATH on macOS with SIP
diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index fb199d3abc..2f4ce1dc31 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -3,20 +3,7 @@
 
 #include "openvino/genai/llm_pipeline.hpp"
 #include <cxxopts.hpp>
-#include <fstream>
-#include <sstream>
-
-std::string read_prompt(const std::string& file_path) {
-    std::string prompt;
-    std::ifstream file(file_path);
-    if (file.is_open()) {
-        std::stringstream buffer;
-        buffer << file.rdbuf();
-        prompt = buffer.str();
-        file.close();        
-    }
-    return prompt;
-}
+#include "../utils/read_prompt_from_file.h"
 
 int main(int argc, char* argv[]) try {
     cxxopts::Options options("benchmark_vanilla_genai", "Help command");
@@ -24,7 +11,7 @@ int main(int argc, char* argv[]) try {
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>())
     ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("The Sky is blue because"))
-    ("pf,promptfile", "Prompt from file")
+    ("pf,prompt_file", "Read prompt from file", cxxopts::value<std::string>())
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
     ("n,num_iter", "Number of iterations", cxxopts::value<size_t>()->default_value(std::to_string(3)))
     ("mt,max_new_tokens", "Maximal number of new tokens", cxxopts::value<size_t>()->default_value(std::to_string(20)))
@@ -51,8 +38,8 @@ int main(int argc, char* argv[]) try {
     size_t num_warmup = result["num_warmup"].as<size_t>();
     size_t num_iter = result["num_iter"].as<size_t>();
 
-    if (result.count("promptfile")) {
-        prompt = read_prompt(result["promptfile"].as<std::string>());
+    if (result.count("prompt_file")) {
+        prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
     }
 
     ov::genai::GenerationConfig config;
@@ -64,6 +51,15 @@ int main(int argc, char* argv[]) try {
 
     ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
 
+    auto input_data = pipe.get_tokenizer().encode(prompt);
+    size_t prompt_token_size;
+    if (input_data.input_ids.get_shape().size() > 1) {
+        prompt_token_size = input_data.input_ids.get_shape()[1];
+    } else {
+        prompt_token_size = input_data.input_ids.get_size();
+    }
+    std::cout << "Prompt token size:" << prompt_token_size << std::endl;
+
     for (size_t i = 0; i < num_warmup; i++)
         pipe.generate(prompt, config);
 
diff --git a/samples/cpp/utils/read_prompt_from_file.cpp b/samples/cpp/utils/read_prompt_from_file.cpp
new file mode 100644
index 0000000000..50d5087c74
--- /dev/null
+++ b/samples/cpp/utils/read_prompt_from_file.cpp
@@ -0,0 +1,17 @@
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <fstream>
+#include "read_prompt_from_file.h"
+
+std::string utils::read_prompt(const std::string& file_path) {
+    std::string prompt;
+    std::ifstream file(file_path);
+    if (file.is_open()) {
+        std::stringstream buffer;
+        buffer << file.rdbuf();
+        prompt = buffer.str();
+        file.close();        
+    }
+    return prompt;
+}
\ No newline at end of file
diff --git a/samples/cpp/utils/read_prompt_from_file.h b/samples/cpp/utils/read_prompt_from_file.h
new file mode 100644
index 0000000000..b47cd08d92
--- /dev/null
+++ b/samples/cpp/utils/read_prompt_from_file.h
@@ -0,0 +1,11 @@
+
+// Copyright (C) 2023-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <sstream>
+
+namespace utils {
+std::string read_prompt(const std::string& file_path);
+}
\ No newline at end of file
diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt
index 59c0d1f698..32d4b1e60f 100644
--- a/samples/cpp/visual_language_chat/CMakeLists.txt
+++ b/samples/cpp/visual_language_chat/CMakeLists.txt
@@ -45,7 +45,7 @@ install(TARGETS encrypted_model_vlm
 
 # create benchmark executable
 
-add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp)
+add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp ../utils/read_prompt_from_file.cpp)
 target_include_directories(benchmark_vlm PRIVATE "${CMAKE_CURRENT_SOUCE_DIR}" "${CMAKE_BINARY_DIR}")
 target_link_libraries(benchmark_vlm PRIVATE openvino::genai cxxopts::cxxopts)
 set_target_properties(benchmark_vlm PROPERTIES
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index 56021374a1..ef38b14602 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -8,18 +8,7 @@
 
 #include "load_image.hpp"
 #include <openvino/genai/visual_language/pipeline.hpp>
-
-std::vector<ov::Tensor> parse_all_images(const std::string &input) {
-    std::vector<ov::Tensor> images;
-    std::stringstream ss(input);
-    std::string image_path;
-    while (std::getline(ss, image_path, ';')) {
-        ov::Tensor image = utils::load_image(image_path);
-        images.push_back(image);
-        std::cout << "input image:" << image_path << std::endl;
-    }
-    return images;
-}
+#include "../utils/read_prompt_from_file.h"
 
 int main(int argc, char* argv[]) try {
     cxxopts::Options options("benchmark_vlm", "Help command");
@@ -27,6 +16,7 @@ int main(int argc, char* argv[]) try {
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value("."))
     ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("What is on the image?"))
+    ("pf,prompt_file", "Read prompt from file", cxxopts::value<std::string>())
     ("i,image", "Image", cxxopts::value<std::string>()->default_value("image.jpg"))
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
     ("n,num_iter", "Number of iterations", cxxopts::value<size_t>()->default_value(std::to_string(3)))
@@ -49,14 +39,17 @@ int main(int argc, char* argv[]) try {
     }
 
     std::string prompt = result["prompt"].as<std::string>();
+    if (result.count("prompt_file")) {
+        prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
+    }
+
     const std::string models_path = result["model"].as<std::string>();
     const std::string image_path = result["image"].as<std::string>();
     std::string device = result["device"].as<std::string>();
     size_t num_warmup = result["num_warmup"].as<size_t>();
     size_t num_iter = result["num_iter"].as<size_t>();
-    std::vector<ov::Tensor> images;
-    images = parse_all_images(image_path);
-  
+    std::vector<ov::Tensor> images = utils::load_images(image_path);
+
     ov::genai::GenerationConfig config;
     config.max_new_tokens = result["max_new_tokens"].as<size_t>();
     config.ignore_eos = true;
@@ -65,7 +58,16 @@ int main(int argc, char* argv[]) try {
     scheduler_config.enable_prefix_caching = false;
     scheduler_config.max_num_batched_tokens = 2147483647;
     ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
-    
+
+    auto input_data = pipe.get_tokenizer().encode(prompt);
+    size_t prompt_token_size;
+    if (input_data.input_ids.get_shape().size() > 1) {
+        prompt_token_size = input_data.input_ids.get_shape()[1];
+    } else {
+        prompt_token_size = input_data.input_ids.get_size();
+    }
+    std::cout << "Number of images:" << images.size() << ", prompt token size:" << prompt_token_size << std::endl;
+
     for (size_t i = 0; i < num_warmup; i++)
         pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config));
     
@@ -77,6 +79,7 @@ int main(int argc, char* argv[]) try {
     }
 
     std::cout << std::fixed << std::setprecision(2);
+    std::cout << "Output token size:" << res.perf_metrics.get_num_generated_tokens() << std::endl;
     std::cout << "Load time: " << metrics.get_load_time() << " ms" << std::endl;
     std::cout << "Generate time: " << metrics.get_generate_duration().mean << " ± " << metrics.get_generate_duration().std << " ms" << std::endl;
     std::cout << "Tokenization time: " << metrics.get_tokenization_duration().mean << " ± " << metrics.get_tokenization_duration().std << " ms" << std::endl;
diff --git a/tools/llm_bench/llm_bench_utils/model_utils.py b/tools/llm_bench/llm_bench_utils/model_utils.py
index 4b559b2d33..e4171e5b05 100644
--- a/tools/llm_bench/llm_bench_utils/model_utils.py
+++ b/tools/llm_bench/llm_bench_utils/model_utils.py
@@ -338,19 +338,11 @@ def init_timestamp(num_iters, prompt_list, prompt_idx_list):
 
 
 def resolve_media_file_path(file_path, prompt_file_path):
-    file_path = None
-    if file_path is not None:
-        paths_ori = file_path.split(';')
-        paths_new = []
-        for path in paths_ori:
-            if not path:
-                continue
-            if not (path.startswith("http://") or path.startswith("https://")):
-                paths_new.append(os.path.join(os.path.dirname(prompt_file_path), path.replace("./", "")))
-        new_file_path = ";".join(paths_new)
-        return new_file_path
-    else:
+    if not file_path:
         return file_path
+    if not (file_path.startswith("http://") or file_path.startswith("https://")):
+        return os.path.join(os.path.dirname(prompt_file_path), file_path.replace("./", ""))
+    return file_path
 
 
 def get_version_in_format_to_pars(version):
diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 56e0ca9eb2..f718937bde 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -12,13 +12,13 @@
 import openvino as ov
 import hashlib
 import llm_bench_utils.metrics_print as metrics_print
-import llm_bench_utils.output_csv
 from transformers import set_seed
 from transformers.image_utils import load_image
-import llm_bench_utils.output_json
 import llm_bench_utils.output_file
 import llm_bench_utils.gen_output_data as gen_output_data
 import llm_bench_utils.parse_json_data as parse_json_data
+from pathlib import Path
+
 
 FW_UTILS = {'pt': llm_bench_utils.pt_utils, 'ov': llm_bench_utils.ov_utils}
 
@@ -39,7 +39,12 @@ def run_visual_language_generation_optimum(
     for input_data in inputs:
         if "media" in input_data:
             if input_data["media"] is not None:
-                images.append(load_image(input_data["media"]))
+                entry = Path(input_data["media"])
+                if entry.is_dir():
+                    for file in sorted(entry.iterdir()):
+                        images.append(load_image_genai(str(file)))
+                else:
+                    images.append(load_image_genai(input_data["media"]))
         prompts.append(input_data["prompt"])
 
     if args["output_dir"] is not None and num == 0:
@@ -203,9 +208,12 @@ def run_visual_language_generation_genai(
     for input_data in inputs:
         if "media" in input_data:
             if input_data["media"] is not None:
-                image_paths = input_data["media"].split(';')
-                for path in image_paths:
-                    images.append(load_image_genai(path))
+                entry = Path(input_data["media"])
+                if entry.is_dir():
+                    for file in sorted(entry.iterdir()):
+                        images.append(load_image_genai(str(file)))
+                else:
+                    images.append(load_image_genai(input_data["media"]))
         prompts.append(input_data["prompt"])
     if args["output_dir"] is not None and num == 0:
         for bs_index, in_text in enumerate(prompts):

From 8e627544646c4f8cbbd7ef2ad1a81ec6ca594e11 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Thu, 15 May 2025 11:32:46 +0800
Subject: [PATCH 07/27] fix cmake_list.txt

---
 samples/cpp/text_generation/CMakeLists.txt                     | 2 +-
 samples/cpp/text_generation/benchmark_genai.cpp                | 2 +-
 .../cpp/{utils => text_generation}/read_prompt_from_file.cpp   | 0
 samples/cpp/{utils => text_generation}/read_prompt_from_file.h | 0
 samples/cpp/visual_language_chat/CMakeLists.txt                | 3 +--
 samples/cpp/visual_language_chat/benchmark_vlm.cpp             | 2 +-
 6 files changed, 4 insertions(+), 5 deletions(-)
 rename samples/cpp/{utils => text_generation}/read_prompt_from_file.cpp (100%)
 rename samples/cpp/{utils => text_generation}/read_prompt_from_file.h (100%)

diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt
index b4928dc2ea..5824e132bd 100644
--- a/samples/cpp/text_generation/CMakeLists.txt
+++ b/samples/cpp/text_generation/CMakeLists.txt
@@ -46,7 +46,7 @@ FetchContent_Declare(cxxopts
     URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08)
 FetchContent_MakeAvailable(cxxopts)
 
-add_executable(benchmark_genai benchmark_genai.cpp ../utils/read_prompt_from_file.cpp)
+add_executable(benchmark_genai benchmark_genai.cpp read_prompt_from_file.cpp)
 target_link_libraries(benchmark_genai PRIVATE openvino::genai cxxopts::cxxopts)
 set_target_properties(benchmark_genai PROPERTIES
     # Ensure out of box LC_RPATH on macOS with SIP
diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 2f4ce1dc31..571132f164 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -3,7 +3,7 @@
 
 #include "openvino/genai/llm_pipeline.hpp"
 #include <cxxopts.hpp>
-#include "../utils/read_prompt_from_file.h"
+#include "read_prompt_from_file.h"
 
 int main(int argc, char* argv[]) try {
     cxxopts::Options options("benchmark_vanilla_genai", "Help command");
diff --git a/samples/cpp/utils/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp
similarity index 100%
rename from samples/cpp/utils/read_prompt_from_file.cpp
rename to samples/cpp/text_generation/read_prompt_from_file.cpp
diff --git a/samples/cpp/utils/read_prompt_from_file.h b/samples/cpp/text_generation/read_prompt_from_file.h
similarity index 100%
rename from samples/cpp/utils/read_prompt_from_file.h
rename to samples/cpp/text_generation/read_prompt_from_file.h
diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt
index 32d4b1e60f..3093f3e0aa 100644
--- a/samples/cpp/visual_language_chat/CMakeLists.txt
+++ b/samples/cpp/visual_language_chat/CMakeLists.txt
@@ -44,8 +44,7 @@ install(TARGETS encrypted_model_vlm
         EXCLUDE_FROM_ALL)
 
 # create benchmark executable
-
-add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp ../utils/read_prompt_from_file.cpp)
+add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp ../text_generation/read_prompt_from_file.cpp)
 target_include_directories(benchmark_vlm PRIVATE "${CMAKE_CURRENT_SOUCE_DIR}" "${CMAKE_BINARY_DIR}")
 target_link_libraries(benchmark_vlm PRIVATE openvino::genai cxxopts::cxxopts)
 set_target_properties(benchmark_vlm PROPERTIES
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index ef38b14602..7232a1c152 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -8,7 +8,7 @@
 
 #include "load_image.hpp"
 #include <openvino/genai/visual_language/pipeline.hpp>
-#include "../utils/read_prompt_from_file.h"
+#include "../text_generation/read_prompt_from_file.h"
 
 int main(int argc, char* argv[]) try {
     cxxopts::Options options("benchmark_vlm", "Help command");

From 5585335ea1e49eaca3bcd4a339022b18b040ae5f Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Thu, 15 May 2025 16:08:38 +0800
Subject: [PATCH 08/27] Output token size in benchmark_genai.cpp

---
 samples/cpp/text_generation/benchmark_genai.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 571132f164..fe6507d988 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -71,6 +71,7 @@ int main(int argc, char* argv[]) try {
     }
 
     std::cout << std::fixed << std::setprecision(2);
+    std::cout << "Output token size:" << res.perf_metrics.get_num_generated_tokens() << std::endl;
     std::cout << "Load time: " << metrics.get_load_time() << " ms" << std::endl;
     std::cout << "Generate time: " << metrics.get_generate_duration().mean << " ± " << metrics.get_generate_duration().std << " ms" << std::endl;
     std::cout << "Tokenization time: " << metrics.get_tokenization_duration().mean << " ± " << metrics.get_tokenization_duration().std << " ms" << std::endl;

From f70ab0d945a0834347e31d6cf73aeaec01dfb867 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Wed, 21 May 2025 11:16:18 +0800
Subject: [PATCH 09/27] print ov version

---
 samples/cpp/text_generation/benchmark_genai.cpp    | 2 ++
 samples/cpp/visual_language_chat/benchmark_vlm.cpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index fe6507d988..763fc5a88e 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -42,6 +42,8 @@ int main(int argc, char* argv[]) try {
         prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
     }
 
+    std::cout << ov::get_openvino_version() << std::endl;
+
     ov::genai::GenerationConfig config;
     config.max_new_tokens = result["max_new_tokens"].as<size_t>();
 
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index 7232a1c152..b0bfefae55 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -43,6 +43,8 @@ int main(int argc, char* argv[]) try {
         prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
     }
 
+    std::cout << ov::get_openvino_version() << std::endl;
+
     const std::string models_path = result["model"].as<std::string>();
     const std::string image_path = result["image"].as<std::string>();
     std::string device = result["device"].as<std::string>();

From b6240fdf500bbc40f321edcc0896767a8318f66d Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Wed, 21 May 2025 18:50:04 +0800
Subject: [PATCH 10/27] using load_image() in optimum pipeline

---
 tools/llm_bench/task/visual_language_generation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 71c6a229ef..5e1a034bef 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -42,9 +42,9 @@ def run_visual_language_generation_optimum(
                 entry = Path(input_data["media"])
                 if entry.is_dir():
                     for file in sorted(entry.iterdir()):
-                        images.append(load_image_genai(str(file)))
+                        images.append(load_image(str(file)))
                 else:
-                    images.append(load_image_genai(input_data["media"]))
+                    images.append(load_image(input_data["media"]))
         prompts.append(input_data["prompt"])
 
     if args["output_dir"] is not None and num == 0:

From e004ceae5ba7a1bcdaf8c62088293779d598f0d8 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Thu, 22 May 2025 15:13:50 +0800
Subject: [PATCH 11/27] Make it an error if prompt_file and prompt are given at
 the same time

---
 samples/cpp/text_generation/README.md         |  1 +
 .../cpp/text_generation/benchmark_genai.cpp   | 28 +++++++++++++------
 samples/cpp/visual_language_chat/README.md    |  1 +
 .../visual_language_chat/benchmark_vlm.cpp    | 25 +++++++++++++----
 4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/samples/cpp/text_generation/README.md b/samples/cpp/text_generation/README.md
index ab4fc030b5..3663b9aacd 100644
--- a/samples/cpp/text_generation/README.md
+++ b/samples/cpp/text_generation/README.md
@@ -162,6 +162,7 @@ For more information how performance metrics are calculated please follow [perfo
   #### Options
 - `-m, --model`: Path to the model and tokenizers base directory.
 - `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text.
+- `--pf, --prompt_file` Read prompt from file.
 - `--nw, --num_warmup` (default: `1`): Number of warmup iterations.
 - `--mt, --max_new_tokens` (default: `20`): Maximal number of new tokens.
 - `-n, --num_iter` (default: `3`): Number of iterations.
diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 763fc5a88e..4c71f34f92 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -10,7 +10,7 @@ int main(int argc, char* argv[]) try {
 
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>())
-    ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("The Sky is blue because"))
+    ("p,prompt", "One prompt", cxxopts::value<std::string>())
     ("pf,prompt_file", "Read prompt from file", cxxopts::value<std::string>())
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
     ("n,num_iter", "Number of iterations", cxxopts::value<size_t>()->default_value(std::to_string(3)))
@@ -32,18 +32,28 @@ int main(int argc, char* argv[]) try {
         return EXIT_SUCCESS;
     }
 
-    std::string prompt = result["prompt"].as<std::string>();
+    std::string prompt;
+    if (result.count("prompt") && result.count("prompt_file")) {
+        std::cout << "Prompt and prompt file should not exist together!" << std::endl;
+        return EXIT_FAILURE;
+    } else {
+        if (result.count("prompt")) {
+            prompt = result["prompt"].as<std::string>();
+        }
+        else if (result.count("prompt_file")) {
+            prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
+        }
+        else {
+            prompt = "The Sky is blue because";
+            std::cout << "Run with default prompt:" << prompt << std::endl;
+        }
+    }
+
     const std::string models_path = result["model"].as<std::string>();
     std::string device = result["device"].as<std::string>();
     size_t num_warmup = result["num_warmup"].as<size_t>();
     size_t num_iter = result["num_iter"].as<size_t>();
 
-    if (result.count("prompt_file")) {
-        prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
-    }
-
-    std::cout << ov::get_openvino_version() << std::endl;
-
     ov::genai::GenerationConfig config;
     config.max_new_tokens = result["max_new_tokens"].as<size_t>();
 
@@ -51,6 +61,8 @@ int main(int argc, char* argv[]) try {
     scheduler_config.enable_prefix_caching = false;
     scheduler_config.max_num_batched_tokens = 2147483647;
 
+    std::cout << ov::get_openvino_version() << std::endl;
+
     ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
 
     auto input_data = pipe.get_tokenizer().encode(prompt);
diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md
index 18424be9be..d96a7e67ae 100644
--- a/samples/cpp/visual_language_chat/README.md
+++ b/samples/cpp/visual_language_chat/README.md
@@ -41,6 +41,7 @@ benchmark_vlm [OPTIONS]
 
 - `-m, --model`(default: `.`): Path to the model and tokenizers base directory.
 - `-p, --prompt` (default: `What is on the image?`): The prompt to generate text.
+- `--pf, --prompt_file` Read prompt from file.
 - `-i, --image` (default: `image.jpg`): Path to the image.
 - `-nw, --num_warmup` (default: `1`): Number of warmup iterations.
 - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens.
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index b0bfefae55..9d4a7b1dd4 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -15,7 +15,7 @@ int main(int argc, char* argv[]) try {
 
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value("."))
-    ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("What is on the image?"))
+    ("p,prompt", "One prompt", cxxopts::value<std::string>())
     ("pf,prompt_file", "Read prompt from file", cxxopts::value<std::string>())
     ("i,image", "Image", cxxopts::value<std::string>()->default_value("image.jpg"))
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
@@ -38,13 +38,23 @@ int main(int argc, char* argv[]) try {
         return EXIT_SUCCESS;
     }
 
-    std::string prompt = result["prompt"].as<std::string>();
-    if (result.count("prompt_file")) {
-        prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
+    std::string prompt;
+    if (result.count("prompt") && result.count("prompt_file")) {
+        std::cout << "Prompt and prompt file should not exist together!" << std::endl;
+        return EXIT_FAILURE;
+    } else {
+        if (result.count("prompt")) {
+            prompt = result["prompt"].as<std::string>();
+        }
+        else if (result.count("prompt_file")) {
+            prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
+        }
+        else {
+            prompt = "What is on the image?";
+            std::cout << "Run with default prompt:" << prompt << std::endl;
+        }
     }
 
-    std::cout << ov::get_openvino_version() << std::endl;
-
     const std::string models_path = result["model"].as<std::string>();
     const std::string image_path = result["image"].as<std::string>();
     std::string device = result["device"].as<std::string>();
@@ -59,6 +69,9 @@ int main(int argc, char* argv[]) try {
     ov::genai::SchedulerConfig scheduler_config;
     scheduler_config.enable_prefix_caching = false;
     scheduler_config.max_num_batched_tokens = 2147483647;
+
+    std::cout << ov::get_openvino_version() << std::endl;
+
     ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
 
     auto input_data = pipe.get_tokenizer().encode(prompt);

From 10c99404e1b5ec601670e2d28fdc46c6b6b61bae Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Thu, 22 May 2025 17:03:33 +0800
Subject: [PATCH 12/27] revert get prompt from default args

---
 samples/cpp/text_generation/benchmark_genai.cpp    | 13 ++++---------
 samples/cpp/visual_language_chat/benchmark_vlm.cpp | 13 ++++---------
 2 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 4c71f34f92..12b69840da 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -10,7 +10,7 @@ int main(int argc, char* argv[]) try {
 
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>())
-    ("p,prompt", "One prompt", cxxopts::value<std::string>())
+    ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("The Sky is blue because"))
     ("pf,prompt_file", "Read prompt from file", cxxopts::value<std::string>())
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
     ("n,num_iter", "Number of iterations", cxxopts::value<size_t>()->default_value(std::to_string(3)))
@@ -37,15 +37,10 @@ int main(int argc, char* argv[]) try {
         std::cout << "Prompt and prompt file should not exist together!" << std::endl;
         return EXIT_FAILURE;
     } else {
-        if (result.count("prompt")) {
-            prompt = result["prompt"].as<std::string>();
-        }
-        else if (result.count("prompt_file")) {
+        if (result.count("prompt_file")) {
             prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
-        }
-        else {
-            prompt = "The Sky is blue because";
-            std::cout << "Run with default prompt:" << prompt << std::endl;
+        } else {
+            prompt = result["prompt"].as<std::string>();
         }
     }
 
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index 9d4a7b1dd4..a243dda075 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -15,7 +15,7 @@ int main(int argc, char* argv[]) try {
 
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value("."))
-    ("p,prompt", "One prompt", cxxopts::value<std::string>())
+    ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("What is on the image?"))
     ("pf,prompt_file", "Read prompt from file", cxxopts::value<std::string>())
     ("i,image", "Image", cxxopts::value<std::string>()->default_value("image.jpg"))
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
@@ -43,15 +43,10 @@ int main(int argc, char* argv[]) try {
         std::cout << "Prompt and prompt file should not exist together!" << std::endl;
         return EXIT_FAILURE;
     } else {
-        if (result.count("prompt")) {
-            prompt = result["prompt"].as<std::string>();
-        }
-        else if (result.count("prompt_file")) {
+        if (result.count("prompt_file")) {
             prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
-        }
-        else {
-            prompt = "What is on the image?";
-            std::cout << "Run with default prompt:" << prompt << std::endl;
+        } else {
+            prompt = result["prompt"].as<std::string>();
         }
     }
 

From 9bb9081f3d0fd126709188a6ae35162a363df639 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Thu, 22 May 2025 17:18:14 +0800
Subject: [PATCH 13/27] Remove redundant code

---
 samples/cpp/visual_language_chat/benchmark_vlm.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index a243dda075..f08fea100e 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -3,8 +3,6 @@
 
 #include <cxxopts.hpp>
 #include <filesystem>
-#include <sstream>
-#include <iostream>
 
 #include "load_image.hpp"
 #include <openvino/genai/visual_language/pipeline.hpp>

From e1e5326bc3613b51b797c55692f614f0d21b85f8 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Thu, 22 May 2025 22:31:25 +0800
Subject: [PATCH 14/27] get prompt token size from shape[1]

---
 samples/cpp/text_generation/benchmark_genai.cpp    | 4 +---
 samples/cpp/visual_language_chat/benchmark_vlm.cpp | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 12b69840da..38f45e02ac 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -61,11 +61,9 @@ int main(int argc, char* argv[]) try {
     ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
 
     auto input_data = pipe.get_tokenizer().encode(prompt);
-    size_t prompt_token_size;
+    size_t prompt_token_size = 0;
     if (input_data.input_ids.get_shape().size() > 1) {
         prompt_token_size = input_data.input_ids.get_shape()[1];
-    } else {
-        prompt_token_size = input_data.input_ids.get_size();
     }
     std::cout << "Prompt token size:" << prompt_token_size << std::endl;
 
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index f08fea100e..118c8dec39 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -68,11 +68,9 @@ int main(int argc, char* argv[]) try {
     ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
 
     auto input_data = pipe.get_tokenizer().encode(prompt);
-    size_t prompt_token_size;
+    size_t prompt_token_size = 0;
     if (input_data.input_ids.get_shape().size() > 1) {
         prompt_token_size = input_data.input_ids.get_shape()[1];
-    } else {
-        prompt_token_size = input_data.input_ids.get_size();
     }
     std::cout << "Number of images:" << images.size() << ", prompt token size:" << prompt_token_size << std::endl;
 

From 6688a094e0fa90679980531cee2a75b7ccdb6c58 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Fri, 23 May 2025 11:04:27 +0800
Subject: [PATCH 15/27] remove if

---
 samples/cpp/text_generation/benchmark_genai.cpp       | 9 +++++----
 samples/cpp/text_generation/read_prompt_from_file.cpp | 6 +++++-
 samples/cpp/visual_language_chat/benchmark_vlm.cpp    | 9 +++++----
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 38f45e02ac..d7f0a87ccc 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -43,6 +43,10 @@ int main(int argc, char* argv[]) try {
             prompt = result["prompt"].as<std::string>();
         }
     }
+    if (prompt == "") {
+        std::cout << "Prompt is empty!" << std::endl;
+        return EXIT_FAILURE;
+    }
 
     const std::string models_path = result["model"].as<std::string>();
     std::string device = result["device"].as<std::string>();
@@ -61,10 +65,7 @@ int main(int argc, char* argv[]) try {
     ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
 
     auto input_data = pipe.get_tokenizer().encode(prompt);
-    size_t prompt_token_size = 0;
-    if (input_data.input_ids.get_shape().size() > 1) {
-        prompt_token_size = input_data.input_ids.get_shape()[1];
-    }
+    size_t prompt_token_size = input_data.input_ids.get_shape()[1];
     std::cout << "Prompt token size:" << prompt_token_size << std::endl;
 
     for (size_t i = 0; i < num_warmup; i++)
diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp
index 50d5087c74..2b41846ef6 100644
--- a/samples/cpp/text_generation/read_prompt_from_file.cpp
+++ b/samples/cpp/text_generation/read_prompt_from_file.cpp
@@ -1,17 +1,21 @@
 // Copyright (C) 2023-2025 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 
+#include <iostream>
 #include <fstream>
 #include "read_prompt_from_file.h"
 
 std::string utils::read_prompt(const std::string& file_path) {
-    std::string prompt;
+    std::string prompt = "";
     std::ifstream file(file_path);
     if (file.is_open()) {
         std::stringstream buffer;
         buffer << file.rdbuf();
         prompt = buffer.str();
         file.close();        
+    } else {
+        // show message:
+        std::cout << "Error opening prompt file: " << file_path << std::endl;
     }
     return prompt;
 }
\ No newline at end of file
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index 118c8dec39..7552233c37 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -47,6 +47,10 @@ int main(int argc, char* argv[]) try {
             prompt = result["prompt"].as<std::string>();
         }
     }
+    if (prompt == "") {
+        std::cout << "Prompt is empty!" << std::endl;
+        return EXIT_FAILURE;
+    }
 
     const std::string models_path = result["model"].as<std::string>();
     const std::string image_path = result["image"].as<std::string>();
@@ -68,10 +72,7 @@ int main(int argc, char* argv[]) try {
     ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config));
 
     auto input_data = pipe.get_tokenizer().encode(prompt);
-    size_t prompt_token_size = 0;
-    if (input_data.input_ids.get_shape().size() > 1) {
-        prompt_token_size = input_data.input_ids.get_shape()[1];
-    }
+    size_t prompt_token_size = input_data.input_ids.get_shape()[1];
     std::cout << "Number of images:" << images.size() << ", prompt token size:" << prompt_token_size << std::endl;
 
     for (size_t i = 0; i < num_warmup; i++)

From 16faddc61e1ef4d54e5a6adde68d3c86677d1bee Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Fri, 23 May 2025 21:58:55 +0800
Subject: [PATCH 16/27] Update samples/cpp/text_generation/benchmark_genai.cpp

Co-authored-by: Vladimir Zlobin <vladimir.zlobin@intel.com>
---
 samples/cpp/text_generation/benchmark_genai.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index d7f0a87ccc..586bbdabb0 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -58,7 +58,7 @@ int main(int argc, char* argv[]) try {
 
     ov::genai::SchedulerConfig scheduler_config;
     scheduler_config.enable_prefix_caching = false;
-    scheduler_config.max_num_batched_tokens = 2147483647;
+    scheduler_config.max_num_batched_tokens = std::numeric_limits<std::size_t>::max();
 
     std::cout << ov::get_openvino_version() << std::endl;
 

From f48ae43ac8afca733eb67840d7c80159ac2e84f4 Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Fri, 23 May 2025 21:59:09 +0800
Subject: [PATCH 17/27] Update
 samples/cpp/visual_language_chat/benchmark_vlm.cpp

Co-authored-by: Vladimir Zlobin <vladimir.zlobin@intel.com>
---
 samples/cpp/visual_language_chat/benchmark_vlm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index 7552233c37..0609d68f58 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -65,7 +65,7 @@ int main(int argc, char* argv[]) try {
 
     ov::genai::SchedulerConfig scheduler_config;
     scheduler_config.enable_prefix_caching = false;
-    scheduler_config.max_num_batched_tokens = 2147483647;
+    scheduler_config.max_num_batched_tokens = std::numeric_limits<std::size_t>::max();
 
     std::cout << ov::get_openvino_version() << std::endl;
 

From 0936b5043e20b8d269cf072772de2676c3ce6626 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Tue, 27 May 2025 11:02:07 +0800
Subject: [PATCH 18/27] Update benchmark_genai.py, benchmark_vlm.py and readme

---
 samples/python/text_generation/README.md      |  1 +
 .../python/text_generation/benchmark_genai.py | 31 +++++++++++--
 samples/python/visual_language_chat/README.md |  1 +
 .../visual_language_chat/benchmark_vlm.py     | 46 ++++++++++++++++---
 4 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/samples/python/text_generation/README.md b/samples/python/text_generation/README.md
index 7d334df29e..91ec12e10e 100644
--- a/samples/python/text_generation/README.md
+++ b/samples/python/text_generation/README.md
@@ -154,6 +154,7 @@ For more information how performance metrics are calculated please follow [perfo
   #### Options
 - `-m, --model`: Path to the model and tokenizers base directory.
 - `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text.
+- `-pf, --prompt_file` Read prompt from file.
 - `-nw, --num_warmup` (default: `1`): Number of warmup iterations.
 - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens.
 - `-n, --num_iter` (default: `3`): Number of iterations.
diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py
index d279ab95fc..f680b7adba 100755
--- a/samples/python/text_generation/benchmark_genai.py
+++ b/samples/python/text_generation/benchmark_genai.py
@@ -1,13 +1,17 @@
 # Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import sys
 import argparse
 import openvino_genai as ov_genai
+from openvino import get_version
 
 def main():
+    default_prompt = "The Sky is blue because"
     parser = argparse.ArgumentParser(description="Help command")
     parser.add_argument("-m", "--model", type=str, required=True, help="Path to model and tokenizers base directory")
-    parser.add_argument("-p", "--prompt", type=str, default="The Sky is blue because", help="Prompt")
+    parser.add_argument("-p", "--prompt", type=str, default=default_prompt, help="Prompt")
+    parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file")
     parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations")
     parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations")
     parser.add_argument("-mt", "--max_new_tokens", type=int, default=20, help="Maximal number of new tokens")
@@ -15,9 +19,21 @@ def main():
     
     args = parser.parse_args()
 
+    if args.prompt != default_prompt and args.prompt_file is not None:
+        raise RuntimeError(f'Prompt and prompt file should not exist together!')
+    else:
+        if args.prompt_file is not None:
+            with open(args.prompt_file, 'r', encoding='utf-8') as f:
+                prompt = [f.read()]
+        else:
+            prompt = [args.prompt]
+    if prompt == "":
+        raise RuntimeError(f'Prompt is empty!')
+
+    print(f'openvino runtime version: {get_version()}')
+
     # Perf metrics is stored in DecodedResults. 
     # In order to get DecodedResults instead of a string input should be a list.
-    prompt = [args.prompt]
     models_path = args.model
     device = args.device
     num_warmup = args.num_warmup
@@ -26,8 +42,16 @@ def main():
     config = ov_genai.GenerationConfig()
     config.max_new_tokens = args.max_new_tokens
 
-    pipe = ov_genai.LLMPipeline(models_path, device)
+    scheduler_config = ov_genai.SchedulerConfig()
+    setattr(scheduler_config, 'enable_prefix_caching', False)
+    setattr(scheduler_config, 'max_num_batched_tokens', sys.maxsize)
+
+    pipe = ov_genai.LLMPipeline(models_path, device, scheduler_config=scheduler_config)
     
+    input_data = pipe.get_tokenizer().encode(prompt)
+    prompt_token_size = input_data.input_ids.get_shape()[1]
+    print(f"Prompt token size: {prompt_token_size}")
+
     for _ in range(num_warmup):
         pipe.generate(prompt, config)
     
@@ -37,6 +61,7 @@ def main():
         res = pipe.generate(prompt, config)
         perf_metrics += res.perf_metrics
     
+    print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}")
     print(f"Load time: {perf_metrics.get_load_time():.2f} ms")
     print(f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms")
     print(f"Tokenization time: {perf_metrics.get_tokenization_duration().mean:.2f} ± {perf_metrics.get_tokenization_duration().std:.2f} ms")
diff --git a/samples/python/visual_language_chat/README.md b/samples/python/visual_language_chat/README.md
index 098cbac630..d459bf93e7 100644
--- a/samples/python/visual_language_chat/README.md
+++ b/samples/python/visual_language_chat/README.md
@@ -41,6 +41,7 @@ python benchmark_vlm.py [OPTIONS]
 
 - `-m, --model`(default: `.`): Path to the model and tokenizers base directory.
 - `-p, --prompt` (default: `What is on the image?`): The prompt to generate text.
+- `-pf, --prompt_file` Read prompt from file.
 - `-i, --image` (default: `image.jpg`): Path to the image.
 - `-nw, --num_warmup` (default: `1`): Number of warmup iterations.
 - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens.
diff --git a/samples/python/visual_language_chat/benchmark_vlm.py b/samples/python/visual_language_chat/benchmark_vlm.py
index cbce4197dd..e07943c736 100755
--- a/samples/python/visual_language_chat/benchmark_vlm.py
+++ b/samples/python/visual_language_chat/benchmark_vlm.py
@@ -2,11 +2,14 @@
 # Copyright (C) 2023-2025 Intel Corporation
 # SPDX-License-Identifier: Apache-2.0
 
+import sys
 import argparse
 import openvino_genai as ov_genai
 from PIL import Image
 from openvino import Tensor
+from pathlib import Path
 import numpy as np
+from openvino import get_version
 
 
 def read_image(path: str) -> Tensor:
@@ -22,11 +25,19 @@ def read_image(path: str) -> Tensor:
     image_data = np.array(pic)
     return Tensor(image_data)
 
+def read_images(path: str) -> list[Tensor]:
+    entry = Path(path)
+    if entry.is_dir():
+        return [read_image(str(file)) for file in sorted(entry.iterdir())]
+    return [read_image(path)]
+
 
 def main():
+    default_prompt = "What is on the image?"
     parser = argparse.ArgumentParser(description="Help command")
     parser.add_argument("-m", "--model", type=str, help="Path to model and tokenizers base directory")
-    parser.add_argument("-p", "--prompt", type=str, default="The Sky is blue because", help="Prompt")
+    parser.add_argument("-p", "--prompt", type=str, default=default_prompt, help="Prompt")
+    parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file")
     parser.add_argument("-i", "--image", type=str, default="image.jpg", help="Image")
     parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations")
     parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations")
@@ -35,11 +46,23 @@ def main():
 
     args = parser.parse_args()
 
+    if args.prompt != default_prompt and args.prompt_file is not None:
+        raise RuntimeError(f'Prompt and prompt file should not exist together!')
+    else:
+        if args.prompt_file is not None:
+            with open(args.prompt_file, 'r', encoding='utf-8') as f:
+                prompt = f.read()
+        else:
+            prompt = args.prompt
+    if prompt == "":
+        raise RuntimeError(f'Prompt is empty!')
+
+    print(f'openvino runtime version: {get_version()}')
+
     # Perf metrics is stored in VLMDecodedResults.
     # In order to get VLMDecodedResults instead of a string input should be a list.
-    prompt = args.prompt
     models_path = args.model
-    image = read_image(args.image)
+    images = read_images(args.image)
     device = args.device
     num_warmup = args.num_warmup
     num_iter = args.num_iter
@@ -47,17 +70,26 @@ def main():
     config = ov_genai.GenerationConfig()
     config.max_new_tokens = args.max_new_tokens
 
-    pipe = ov_genai.VLMPipeline(models_path, device)
+    scheduler_config = ov_genai.SchedulerConfig()
+    setattr(scheduler_config, 'enable_prefix_caching', False)
+    setattr(scheduler_config, 'max_num_batched_tokens', sys.maxsize)
+
+    pipe = ov_genai.VLMPipeline(models_path, device, scheduler_config=scheduler_config)
+
+    input_data = pipe.get_tokenizer().encode(prompt)
+    prompt_token_size = input_data.input_ids.get_shape()[1]
+    print(f"Number of images:{len(images)}, Prompt token size: {prompt_token_size}")
 
     for _ in range(num_warmup):
-        pipe.generate(prompt, images=image, generation_config=config)
+        pipe.generate(prompt, images=images, generation_config=config)
 
-    res = pipe.generate(prompt, images=image, generation_config=config)
+    res = pipe.generate(prompt, images=images, generation_config=config)
     perf_metrics = res.perf_metrics
     for _ in range(num_iter - 1):
-        res = pipe.generate(prompt, images=image, generation_config=config)
+        res = pipe.generate(prompt, images=images, generation_config=config)
         perf_metrics += res.perf_metrics
 
+    print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}")
     print(f"Load time: {perf_metrics.get_load_time():.2f} ms")
     print(
         f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms")

From f48424435c1a9c4d7bc82f06e604bdd5640d62fb Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Fri, 13 Jun 2025 20:43:16 +0800
Subject: [PATCH 19/27] Update
 samples/cpp/text_generation/read_prompt_from_file.cpp

Co-authored-by: Vladimir Zlobin <vladimir.zlobin@intel.com>
---
 samples/cpp/text_generation/read_prompt_from_file.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp
index 2b41846ef6..ac7caa5f74 100644
--- a/samples/cpp/text_generation/read_prompt_from_file.cpp
+++ b/samples/cpp/text_generation/read_prompt_from_file.cpp
@@ -11,8 +11,7 @@ std::string utils::read_prompt(const std::string& file_path) {
     if (file.is_open()) {
         std::stringstream buffer;
         buffer << file.rdbuf();
-        prompt = buffer.str();
-        file.close();        
+        return buffer.str();
     } else {
         // show message:
         std::cout << "Error opening prompt file: " << file_path << std::endl;

From 624e8fc39de4c078d3e837182a6348fc47dc489c Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Tue, 17 Jun 2025 11:20:38 +0800
Subject: [PATCH 20/27] default values

---
 samples/cpp/text_generation/benchmark_genai.cpp       | 6 +++---
 samples/cpp/text_generation/read_prompt_from_file.cpp | 5 +++--
 samples/cpp/visual_language_chat/benchmark_vlm.cpp    | 8 ++++----
 samples/python/text_generation/benchmark_genai.py     | 9 ++++-----
 samples/python/visual_language_chat/benchmark_vlm.py  | 9 ++++-----
 5 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp
index 586bbdabb0..078dd88969 100644
--- a/samples/cpp/text_generation/benchmark_genai.cpp
+++ b/samples/cpp/text_generation/benchmark_genai.cpp
@@ -10,7 +10,7 @@ int main(int argc, char* argv[]) try {
 
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>())
-    ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("The Sky is blue because"))
+    ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value(""))
     ("pf,prompt_file", "Read prompt from file", cxxopts::value<std::string>())
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
     ("n,num_iter", "Number of iterations", cxxopts::value<size_t>()->default_value(std::to_string(3)))
@@ -40,10 +40,10 @@ int main(int argc, char* argv[]) try {
         if (result.count("prompt_file")) {
             prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
         } else {
-            prompt = result["prompt"].as<std::string>();
+            prompt = result["prompt"].as<std::string>().empty() ? "The Sky is blue because" : result["prompt"].as<std::string>();
         }
     }
-    if (prompt == "") {
+    if (prompt.empty()) {
         std::cout << "Prompt is empty!" << std::endl;
         return EXIT_FAILURE;
     }
diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp
index ac7caa5f74..1940994e3c 100644
--- a/samples/cpp/text_generation/read_prompt_from_file.cpp
+++ b/samples/cpp/text_generation/read_prompt_from_file.cpp
@@ -13,8 +13,9 @@ std::string utils::read_prompt(const std::string& file_path) {
         buffer << file.rdbuf();
         return buffer.str();
     } else {
-        // show message:
-        std::cout << "Error opening prompt file: " << file_path << std::endl;
+        std::stringstream error_message;
+        error_message << "Error opening prompt file: '" << file_path << "'";
+        throw std::runtime_error{error_message.str()};
     }
     return prompt;
 }
\ No newline at end of file
diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
index 0609d68f58..822f71348b 100644
--- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp
+++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp
@@ -13,7 +13,7 @@ int main(int argc, char* argv[]) try {
 
     options.add_options()
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value("."))
-    ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value("What is on the image?"))
+    ("p,prompt", "Prompt", cxxopts::value<std::string>()->default_value(""))
     ("pf,prompt_file", "Read prompt from file", cxxopts::value<std::string>())
     ("i,image", "Image", cxxopts::value<std::string>()->default_value("image.jpg"))
     ("nw,num_warmup", "Number of warmup iterations", cxxopts::value<size_t>()->default_value(std::to_string(1)))
@@ -44,13 +44,13 @@ int main(int argc, char* argv[]) try {
         if (result.count("prompt_file")) {
             prompt = utils::read_prompt(result["prompt_file"].as<std::string>());
         } else {
-            prompt = result["prompt"].as<std::string>();
+            prompt = result["prompt"].as<std::string>().empty() ? "What is on the image?" : result["prompt"].as<std::string>();
         }
     }
-    if (prompt == "") {
+    if (prompt.empty()) {
         std::cout << "Prompt is empty!" << std::endl;
         return EXIT_FAILURE;
-    }
+    } 
 
     const std::string models_path = result["model"].as<std::string>();
     const std::string image_path = result["image"].as<std::string>();
diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py
index f680b7adba..a3bcac3870 100755
--- a/samples/python/text_generation/benchmark_genai.py
+++ b/samples/python/text_generation/benchmark_genai.py
@@ -7,10 +7,9 @@
 from openvino import get_version
 
 def main():
-    default_prompt = "The Sky is blue because"
     parser = argparse.ArgumentParser(description="Help command")
     parser.add_argument("-m", "--model", type=str, required=True, help="Path to model and tokenizers base directory")
-    parser.add_argument("-p", "--prompt", type=str, default=default_prompt, help="Prompt")
+    parser.add_argument("-p", "--prompt", type=str, default=None, help="Prompt")
     parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file")
     parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations")
     parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations")
@@ -19,15 +18,15 @@ def main():
     
     args = parser.parse_args()
 
-    if args.prompt != default_prompt and args.prompt_file is not None:
+    if args.prompt is not None and args.prompt_file is not None:
         raise RuntimeError(f'Prompt and prompt file should not exist together!')
     else:
         if args.prompt_file is not None:
             with open(args.prompt_file, 'r', encoding='utf-8') as f:
                 prompt = [f.read()]
         else:
-            prompt = [args.prompt]
-    if prompt == "":
+            prompt = ['The Sky is blue because'] if args.prompt is None else [args.prompt]
+    if len(prompt) == 0:
         raise RuntimeError(f'Prompt is empty!')
 
     print(f'openvino runtime version: {get_version()}')
diff --git a/samples/python/visual_language_chat/benchmark_vlm.py b/samples/python/visual_language_chat/benchmark_vlm.py
index e07943c736..c9ce84c340 100755
--- a/samples/python/visual_language_chat/benchmark_vlm.py
+++ b/samples/python/visual_language_chat/benchmark_vlm.py
@@ -33,10 +33,9 @@ def read_images(path: str) -> list[Tensor]:
 
 
 def main():
-    default_prompt = "What is on the image?"
     parser = argparse.ArgumentParser(description="Help command")
     parser.add_argument("-m", "--model", type=str, help="Path to model and tokenizers base directory")
-    parser.add_argument("-p", "--prompt", type=str, default=default_prompt, help="Prompt")
+    parser.add_argument("-p", "--prompt", type=str, default=None, help="Prompt")
     parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file")
     parser.add_argument("-i", "--image", type=str, default="image.jpg", help="Image")
     parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations")
@@ -46,15 +45,15 @@ def main():
 
     args = parser.parse_args()
 
-    if args.prompt != default_prompt and args.prompt_file is not None:
+    if args.prompt is not None and args.prompt_file is not None:
         raise RuntimeError(f'Prompt and prompt file should not exist together!')
     else:
         if args.prompt_file is not None:
             with open(args.prompt_file, 'r', encoding='utf-8') as f:
                 prompt = f.read()
         else:
-            prompt = args.prompt
-    if prompt == "":
+            prompt = 'What is on the image?' if args.prompt is None else args.prompt
+    if len(prompt) == 0:
         raise RuntimeError(f'Prompt is empty!')
 
     print(f'openvino runtime version: {get_version()}')

From 5121bfb968b16f5c69961ecad28106bfc71e7b0e Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Tue, 17 Jun 2025 15:50:16 +0800
Subject: [PATCH 21/27] Use the regular assignment for scheduler_config

---
 samples/cpp/text_generation/README.md                | 2 +-
 samples/cpp/visual_language_chat/README.md           | 2 +-
 samples/python/text_generation/README.md             | 2 +-
 samples/python/text_generation/benchmark_genai.py    | 4 ++--
 samples/python/visual_language_chat/README.md        | 2 +-
 samples/python/visual_language_chat/benchmark_vlm.py | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/samples/cpp/text_generation/README.md b/samples/cpp/text_generation/README.md
index 3663b9aacd..91a0fd328f 100644
--- a/samples/cpp/text_generation/README.md
+++ b/samples/cpp/text_generation/README.md
@@ -161,7 +161,7 @@ For more information how performance metrics are calculated please follow [perfo
   ```
   #### Options
 - `-m, --model`: Path to the model and tokenizers base directory.
-- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text.
+- `-p, --prompt` (default: ''): The prompt to generate text. If without `-p` and `--pf`, the default prompt is `"The Sky is blue because"`
 - `--pf, --prompt_file` Read prompt from file.
 - `--nw, --num_warmup` (default: `1`): Number of warmup iterations.
 - `--mt, --max_new_tokens` (default: `20`): Maximal number of new tokens.
diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md
index d96a7e67ae..58065bc070 100644
--- a/samples/cpp/visual_language_chat/README.md
+++ b/samples/cpp/visual_language_chat/README.md
@@ -40,7 +40,7 @@ benchmark_vlm [OPTIONS]
 ### Options
 
 - `-m, --model`(default: `.`): Path to the model and tokenizers base directory.
-- `-p, --prompt` (default: `What is on the image?`): The prompt to generate text.
+- `-p, --prompt` (default: ''): The prompt to generate text. If without `-p` and `--pf`, the default prompt is `"What is on the image?"`
 - `--pf, --prompt_file` Read prompt from file.
 - `-i, --image` (default: `image.jpg`): Path to the image.
 - `-nw, --num_warmup` (default: `1`): Number of warmup iterations.
diff --git a/samples/python/text_generation/README.md b/samples/python/text_generation/README.md
index 91ec12e10e..cb0ef68f70 100644
--- a/samples/python/text_generation/README.md
+++ b/samples/python/text_generation/README.md
@@ -153,7 +153,7 @@ For more information how performance metrics are calculated please follow [perfo
   ```
   #### Options
 - `-m, --model`: Path to the model and tokenizers base directory.
-- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text.
+- `-p, --prompt` (default: `None`): The prompt to generate text. If without `-p` and `-pf`, the default prompt is `"The Sky is blue because"`
 - `-pf, --prompt_file` Read prompt from file.
 - `-nw, --num_warmup` (default: `1`): Number of warmup iterations.
 - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens.
diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py
index a3bcac3870..c3a55a9d2e 100755
--- a/samples/python/text_generation/benchmark_genai.py
+++ b/samples/python/text_generation/benchmark_genai.py
@@ -42,8 +42,8 @@ def main():
     config.max_new_tokens = args.max_new_tokens
 
     scheduler_config = ov_genai.SchedulerConfig()
-    setattr(scheduler_config, 'enable_prefix_caching', False)
-    setattr(scheduler_config, 'max_num_batched_tokens', sys.maxsize)
+    scheduler_config.enable_prefix_caching = False
+    scheduler_config.max_num_batched_tokens = sys.maxsize
 
     pipe = ov_genai.LLMPipeline(models_path, device, scheduler_config=scheduler_config)
     
diff --git a/samples/python/visual_language_chat/README.md b/samples/python/visual_language_chat/README.md
index d459bf93e7..3457f51318 100644
--- a/samples/python/visual_language_chat/README.md
+++ b/samples/python/visual_language_chat/README.md
@@ -40,7 +40,7 @@ python benchmark_vlm.py [OPTIONS]
 ### Options
 
 - `-m, --model`(default: `.`): Path to the model and tokenizers base directory.
-- `-p, --prompt` (default: `What is on the image?`): The prompt to generate text.
+- `-p, --prompt` (default: `None`): The prompt to generate text. If without `-p` and `-pf`, the default prompt is `"What is on the image?"`
 - `-pf, --prompt_file` Read prompt from file.
 - `-i, --image` (default: `image.jpg`): Path to the image.
 - `-nw, --num_warmup` (default: `1`): Number of warmup iterations.
diff --git a/samples/python/visual_language_chat/benchmark_vlm.py b/samples/python/visual_language_chat/benchmark_vlm.py
index c9ce84c340..9499e947bd 100755
--- a/samples/python/visual_language_chat/benchmark_vlm.py
+++ b/samples/python/visual_language_chat/benchmark_vlm.py
@@ -70,8 +70,8 @@ def main():
     config.max_new_tokens = args.max_new_tokens
 
     scheduler_config = ov_genai.SchedulerConfig()
-    setattr(scheduler_config, 'enable_prefix_caching', False)
-    setattr(scheduler_config, 'max_num_batched_tokens', sys.maxsize)
+    scheduler_config.enable_prefix_caching = False
+    scheduler_config.max_num_batched_tokens = sys.maxsize
 
     pipe = ov_genai.VLMPipeline(models_path, device, scheduler_config=scheduler_config)
 

From d4da4b6eb245d002f25157e1ce8e0b7bbb999a40 Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Wed, 18 Jun 2025 09:06:35 +0800
Subject: [PATCH 22/27] Update
 samples/cpp/text_generation/read_prompt_from_file.cpp

Co-authored-by: Vladimir Zlobin <vladimir.zlobin@intel.com>
---
 samples/cpp/text_generation/read_prompt_from_file.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp
index 1940994e3c..87e76e90b3 100644
--- a/samples/cpp/text_generation/read_prompt_from_file.cpp
+++ b/samples/cpp/text_generation/read_prompt_from_file.cpp
@@ -6,7 +6,6 @@
 #include "read_prompt_from_file.h"
 
 std::string utils::read_prompt(const std::string& file_path) {
-    std::string prompt = "";
     std::ifstream file(file_path);
     if (file.is_open()) {
         std::stringstream buffer;

From 1b8ecba4ec157f625a54f24203d9f3268aa895e1 Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Wed, 18 Jun 2025 09:07:07 +0800
Subject: [PATCH 23/27] Update
 tools/llm_bench/task/visual_language_generation.py

Co-authored-by: Sofya Balandina <sofya.balandina@intel.com>
---
 tools/llm_bench/task/visual_language_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 7df3026325..60ee0566c7 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -198,7 +198,7 @@ def run_visual_language_generation_genai(
     prompts = []
     inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
     for input_data in inputs:
-        if "media" in input_data:
+        if input_data.get("media", None):
             if input_data["media"] is not None:
                 entry = Path(input_data["media"])
                 if entry.is_dir():

From 9cc975bd745a8d3e5e26985d2763ee25e3bf5f26 Mon Sep 17 00:00:00 2001
From: guozhong wang <guozhong.wang@intel.com>
Date: Wed, 18 Jun 2025 09:07:40 +0800
Subject: [PATCH 24/27] Update
 tools/llm_bench/task/visual_language_generation.py

Co-authored-by: Sofya Balandina <sofya.balandina@intel.com>
---
 tools/llm_bench/task/visual_language_generation.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 60ee0566c7..4b095712bd 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -223,11 +223,8 @@ def run_visual_language_generation_genai(
     gen_config.do_sample = False
     gen_config.ignore_eos = True
     kwargs = {}
-    if len(images) > 1:
-        # multi images
+    if len(images) >= 1:
         kwargs["images"] = images
-    elif len(images) == 1:
-        kwargs["images"] = images[0]
     start = time.perf_counter()
     generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs)
     end = time.perf_counter()

From 3e3a3216c1c7508820a24ea8a9e9c261ee10cd60 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Wed, 18 Jun 2025 10:43:35 +0800
Subject: [PATCH 25/27] print input image nums for vlm

---
 tools/llm_bench/task/visual_language_generation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 4b095712bd..3f638e5807 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -46,7 +46,8 @@ def run_visual_language_generation_optimum(
                 else:
                     images.append(load_image(input_data["media"]))
         prompts.append(input_data["prompt"])
-
+    prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
+    log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}')
     if args["output_dir"] is not None and num == 0:
         for bs_index, in_text in enumerate(prompts):
             llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id)
@@ -225,6 +226,8 @@ def run_visual_language_generation_genai(
     kwargs = {}
     if len(images) >= 1:
         kwargs["images"] = images
+    prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
+    log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}')
     start = time.perf_counter()
     generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs)
     end = time.perf_counter()

From 2c6872fc070b856bf88bbc3be2d87e1d02bc5659 Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Wed, 18 Jun 2025 10:47:25 +0800
Subject: [PATCH 26/27] Remove the corresponding return

---
 samples/cpp/text_generation/read_prompt_from_file.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp
index 87e76e90b3..7559c2d1db 100644
--- a/samples/cpp/text_generation/read_prompt_from_file.cpp
+++ b/samples/cpp/text_generation/read_prompt_from_file.cpp
@@ -16,5 +16,4 @@ std::string utils::read_prompt(const std::string& file_path) {
         error_message << "Error opening prompt file: '" << file_path << "'";
         throw std::runtime_error{error_message.str()};
     }
-    return prompt;
 }
\ No newline at end of file

From 1a13411854f6b48d5559921ecb45df55d88acccf Mon Sep 17 00:00:00 2001
From: wgzintel <guozhong.wang@intel.com>
Date: Wed, 18 Jun 2025 23:36:26 +0800
Subject: [PATCH 27/27] remove if input_data.get("media", None)

---
 .../task/visual_language_generation.py        | 28 +++++++++----------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py
index 3f638e5807..674f691d4a 100644
--- a/tools/llm_bench/task/visual_language_generation.py
+++ b/tools/llm_bench/task/visual_language_generation.py
@@ -37,14 +37,13 @@ def run_visual_language_generation_optimum(
     prompts = []
     inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
     for input_data in inputs:
-        if "media" in input_data:
-            if input_data["media"] is not None:
-                entry = Path(input_data["media"])
-                if entry.is_dir():
-                    for file in sorted(entry.iterdir()):
-                        images.append(load_image(str(file)))
-                else:
-                    images.append(load_image(input_data["media"]))
+        if input_data.get("media", None):
+            entry = Path(input_data["media"])
+            if entry.is_dir():
+                for file in sorted(entry.iterdir()):
+                    images.append(load_image(str(file)))
+            else:
+                images.append(load_image(input_data["media"]))
         prompts.append(input_data["prompt"])
     prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
     log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}')
@@ -200,13 +199,12 @@ def run_visual_language_generation_genai(
     inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs
     for input_data in inputs:
         if input_data.get("media", None):
-            if input_data["media"] is not None:
-                entry = Path(input_data["media"])
-                if entry.is_dir():
-                    for file in sorted(entry.iterdir()):
-                        images.append(load_image_genai(str(file)))
-                else:
-                    images.append(load_image_genai(input_data["media"]))
+            entry = Path(input_data["media"])
+            if entry.is_dir():
+                for file in sorted(entry.iterdir()):
+                    images.append(load_image_genai(str(file)))
+            else:
+                images.append(load_image_genai(input_data["media"]))
         prompts.append(input_data["prompt"])
     if args["output_dir"] is not None and num == 0:
         for bs_index, in_text in enumerate(prompts):