From fb0ab2b843d913f4c674f51fbd64646b2227372f Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 25 Jul 2025 10:42:26 +0200
Subject: [PATCH 01/10] WIP

---
 src/cpp/include/openvino/genai/tokenizer.hpp  |  3 +++
 .../src/tokenizer/make_tokenizer_stateful.cpp | 20 ++++++++++++++++---
 .../src/tokenizer/make_tokenizer_stateful.hpp |  1 +
 src/cpp/src/tokenizer/tokenizer.cpp           |  6 ++++++
 4 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp
index 47c57352c4..5e5f225ee0 100644
--- a/src/cpp/include/openvino/genai/tokenizer.hpp
+++ b/src/cpp/include/openvino/genai/tokenizer.hpp
@@ -165,6 +165,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @param add_special_tokens whether to add special tokens
     * @param max_length optional maximum length to which output will be truncated and/or padded. If not defined, taken from IR.
     * @param pad_to_max_length either pad to max_length, or pad to the longest sequence in the batch. Default is false.
+    * @param padding_side side to pad, either "left" or "right". Default is "right".
     * @return pair of [input_ids, attention_mask]
     */
     template <typename... Properties>
@@ -178,6 +179,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @param add_special_tokens whether to add special tokens
     * @param max_length optional maximum length to which output will be truncated and/or padded. If not defined, taken from IR.
     * @param pad_to_max_length either pad to max_length, or pad to the longest sequence in the batch. Default is false.
+    * @param padding_side side to pad, either "left" or "right". Default is "right".
     * @return pair of [input_ids, attention_mask]
     */
     template <typename... Properties>
@@ -313,6 +315,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
 static constexpr ov::Property<bool> add_special_tokens{"add_special_tokens"};
 static constexpr ov::Property<bool> skip_special_tokens{"skip_special_tokens"};
 static constexpr ov::Property<bool> pad_to_max_length{"pad_to_max_length"};
+static constexpr ov::Property<std::string> padding_side{"padding_side"};
 
 }  // namespace genai
 }  // namespace ov
diff --git a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
index 0b7bbcdd15..c43dea190a 100644
--- a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
+++ b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
@@ -14,6 +14,7 @@
 #include "openvino/op/assign.hpp"
 #include "openvino/op/constant.hpp"
 #include <openvino/pass/manager.hpp>
+#include <openvino/core/graph_util.hpp>
 
 using namespace ov;
 using namespace ov::op;
@@ -246,15 +247,28 @@ bool ov::genai::MakePaddingSatateful::run_on_model(const std::shared_ptr<ov::Mod
     model->add_sinks({std::make_shared<v6::Assign>(pad_to_max_length_rv, pad_to_max_length_var)});
     model->add_variables({pad_to_max_length_var});
     
+    // Add padding side variable.
+    auto pad_right_var = std::make_shared<op::util::Variable>(op::util::VariableInfo{ov::Shape{1}, ov::element::boolean, ov::genai::PAD_RIGHT_VAR_ID});
+    auto pad_right_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{1}, std::vector{true});
+    auto pad_right_rv = std::make_shared<v6::ReadValue>(pad_right_const, pad_right_var);
+    model->add_sinks({std::make_shared<v6::Assign>(pad_right_rv, pad_right_var)});
+    model->add_variables({pad_right_var});
+
     auto select_node = std::make_shared<v1::Select>(pad_to_max_length_rv, max_length_rv, zero_constant);
 
     for (auto ragged_to_dense_node : ragged_to_dense_nodes) {
         if (!ragged_to_dense_node) {
-            return true;  // true since at this point we already have modified the graph.s
+            return true;  // true since at this point we already have modified the graph.
         }
+
+        auto new_inputs = ragged_to_dense_node->input_values();
+        new_inputs.emplace_back(pad_right_rv->output(0));
+        auto new_ragged_to_dense = ragged_to_dense_node->clone_with_new_inputs(new_inputs);
         
-        auto max_op = std::make_shared<v1::Maximum>(ragged_to_dense_node->input_value(3), select_node);
-        ragged_to_dense_node->input(3).replace_source_output(max_op->output(0));
+        auto max_op = std::make_shared<v1::Maximum>(new_ragged_to_dense->input_value(3), select_node);
+        new_ragged_to_dense->input(3).replace_source_output(max_op->output(0));
+
+        ov::replace_node(ragged_to_dense_node, new_ragged_to_dense);
     }
 
     return true;
diff --git a/src/cpp/src/tokenizer/make_tokenizer_stateful.hpp b/src/cpp/src/tokenizer/make_tokenizer_stateful.hpp
index fba9cf07fb..f77f761fef 100644
--- a/src/cpp/src/tokenizer/make_tokenizer_stateful.hpp
+++ b/src/cpp/src/tokenizer/make_tokenizer_stateful.hpp
@@ -90,6 +90,7 @@ inline const std::string SKIP_SPECIAL_TOKENS_VAR_ID = "skip_special_tokens";
 inline const std::string MAX_LENGTH_VAR_ID = "max_length";
 inline const std::string IS_MAX_LENGTH_SET = "is_max_length_set";
 inline const std::string PAD_TO_MAX_LENGTH_VAR_ID = "pad_to_max_length";
+inline const std::string PAD_RIGHT_VAR_ID = "pad_right";
 
 } // namespace genai
 } // namespace ov
diff --git a/src/cpp/src/tokenizer/tokenizer.cpp b/src/cpp/src/tokenizer/tokenizer.cpp
index ec651b4356..92efda640c 100644
--- a/src/cpp/src/tokenizer/tokenizer.cpp
+++ b/src/cpp/src/tokenizer/tokenizer.cpp
@@ -260,11 +260,15 @@ class Tokenizer::TokenizerImpl {
         std::optional<bool> skip_special_tokens_flag = true;
         std::optional<int32_t> max_length_val;
         std::optional<bool> pad_to_max_length_val = false;
+        std::optional<std::string> padding_side_val = "right";
 
         ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag);
         ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag);
         ov::genai::utils::read_anymap_param(params, pad_to_max_length.name(), pad_to_max_length_val);
         ov::genai::utils::read_anymap_param(params, max_length.name(), max_length_val);
+        ov::genai::utils::read_anymap_param(params, padding_side.name(), padding_side_val);
+        std::optional<bool> pad_right = (padding_side_val.has_value() && *padding_side_val == "right") ? true : false;
+
         std::optional<bool> is_max_length_set_val = max_length_val.has_value();
 
         ov::AnyMap& state_flags = m_request_to_state_flags[&infer_request_guard.get()];
@@ -282,6 +286,8 @@ class Tokenizer::TokenizerImpl {
                 set_state_value(state, pad_to_max_length_val, state_flags);
             } else if (name == IS_MAX_LENGTH_SET) {
                 set_state_value(state, is_max_length_set_val, state_flags);
+            } else  if (name == PAD_RIGHT_VAR_ID) {
+                set_state_value(state, pad_right, state_flags);
             }
         }
     }

From 0a9114d4f7d6ffd2d34ee94deaae23ce03e4140a Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Wed, 30 Jul 2025 14:02:22 +0200
Subject: [PATCH 02/10] add padding side to genai

---
 .../src/tokenizer/make_tokenizer_stateful.cpp |  4 +--
 src/cpp/src/tokenizer/tokenizer.cpp           | 25 +++++++++++++++----
 src/python/py_tokenizer.cpp                   | 24 +++++++++++++++---
 3 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
index c43dea190a..c03afcfeca 100644
--- a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
+++ b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
@@ -248,8 +248,8 @@ bool ov::genai::MakePaddingSatateful::run_on_model(const std::shared_ptr<ov::Mod
     model->add_variables({pad_to_max_length_var});
     
     // Add padding side variable.
-    auto pad_right_var = std::make_shared<op::util::Variable>(op::util::VariableInfo{ov::Shape{1}, ov::element::boolean, ov::genai::PAD_RIGHT_VAR_ID});
-    auto pad_right_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{1}, std::vector{true});
+    auto pad_right_var = std::make_shared<op::util::Variable>(op::util::VariableInfo{ov::Shape{}, ov::element::boolean, ov::genai::PAD_RIGHT_VAR_ID});
+    auto pad_right_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{}, std::vector{true});
     auto pad_right_rv = std::make_shared<v6::ReadValue>(pad_right_const, pad_right_var);
     model->add_sinks({std::make_shared<v6::Assign>(pad_right_rv, pad_right_var)});
     model->add_variables({pad_right_var});
diff --git a/src/cpp/src/tokenizer/tokenizer.cpp b/src/cpp/src/tokenizer/tokenizer.cpp
index 92efda640c..19a864714e 100644
--- a/src/cpp/src/tokenizer/tokenizer.cpp
+++ b/src/cpp/src/tokenizer/tokenizer.cpp
@@ -851,27 +851,42 @@ Tokenizer::Tokenizer(const std::string& model_str, ov::Tensor& weights_tensor, c
 }
 
 TokenizedInputs Tokenizer::encode(const std::string& prompt, const ov::AnyMap& tokenization_params) {
-    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::pad_to_max_length.name()});
+    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(),
+                                          ov::genai::max_length.name(),
+                                          ov::genai::pad_to_max_length.name(),
+                                          ov::genai::padding_side.name()});
     return m_pimpl->encode(std::move(prompt), tokenization_params);
 }
 
 TokenizedInputs Tokenizer::encode(const std::vector<std::pair<std::string, std::string>>& prompts, const ov::AnyMap& tokenization_params) {
-    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::pad_to_max_length.name()});
+    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(),
+                                          ov::genai::max_length.name(),
+                                          ov::genai::pad_to_max_length.name(),
+                                          ov::genai::padding_side.name()});
     return m_pimpl->encode(prompts, tokenization_params);
 }
 
 TokenizedInputs Tokenizer::encode(const std::vector<std::string>& prompts_1, const std::vector<std::string>& prompts_2, const ov::AnyMap& tokenization_params) {
-    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::pad_to_max_length.name()});
+    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(),
+                                          ov::genai::max_length.name(),
+                                          ov::genai::pad_to_max_length.name(),
+                                          ov::genai::padding_side.name()});
     return m_pimpl->encode(prompts_1, prompts_2, tokenization_params);
 }
 
 TokenizedInputs Tokenizer::encode(const std::vector<std::string>& prompts, const ov::AnyMap& tokenization_params) {
-    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::pad_to_max_length.name()});
+    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(),
+                                          ov::genai::max_length.name(),
+                                          ov::genai::pad_to_max_length.name(),
+                                          ov::genai::padding_side.name()});
     return m_pimpl->encode(prompts, tokenization_params);
 }
 
 TokenizedInputs Tokenizer::encode(const std::initializer_list<std::string>& text, const ov::AnyMap& tokenization_params) {
-    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::pad_to_max_length.name()});
+    check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(),
+                                          ov::genai::max_length.name(),
+                                          ov::genai::pad_to_max_length.name(),
+                                          ov::genai::padding_side.name()});
     return encode(std::vector<std::string>(text.begin(), text.end()), tokenization_params);
 }
 
diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp
index d98ead7e20..b5f34cbd75 100644
--- a/src/python/py_tokenizer.cpp
+++ b/src/python/py_tokenizer.cpp
@@ -75,10 +75,13 @@ void init_tokenizer(py::module_& m) {
         .def("encode", [](Tokenizer& tok, std::vector<std::string>& prompts, 
                           bool add_special_tokens, 
                           bool pad_to_max_length,
-                          std::optional<size_t> max_length) {
+                          std::optional<size_t> max_length,
+                          std::optional<std::string> padding_side) {
                 ov::AnyMap tokenization_params;
                 tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
                 tokenization_params[ov::genai::pad_to_max_length.name()] = pad_to_max_length;
+                tokenization_params[ov::genai::padding_side.name()] = padding_side.value_or("right");
+
                 if (max_length.has_value()) {
                     tokenization_params[ov::genai::max_length.name()] = *max_length;
                 }
@@ -88,15 +91,19 @@ void init_tokenizer(py::module_& m) {
             py::arg("add_special_tokens") = true,
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
+            py::arg("padding_side") = "right",
             R"(Encodes a list of prompts into tokenized inputs.)")
 
         .def("encode", [](Tokenizer& tok, const std::string prompt, 
                           bool add_special_tokens, 
                           bool pad_to_max_length,
-                          std::optional<size_t> max_length) {
+                          std::optional<size_t> max_length,
+                          std::optional<std::string> padding_side
+                        ) {
                 ov::AnyMap tokenization_params;
                 tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
                 tokenization_params[ov::genai::pad_to_max_length.name()] = pad_to_max_length;
+                tokenization_params[ov::genai::padding_side.name()] = padding_side.value_or("right");
                 if (max_length.has_value()) {
                     tokenization_params[ov::genai::max_length.name()] = *max_length;
                 }
@@ -106,6 +113,7 @@ void init_tokenizer(py::module_& m) {
             py::arg("add_special_tokens") = true, 
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
+            py::arg("padding_side") = "right",
             R"(Encodes a single prompt into tokenized input.)")
             
             .def("encode", [](Tokenizer& tok, 
@@ -113,10 +121,13 @@ void init_tokenizer(py::module_& m) {
                 std::vector<std::string>& prompts_2,
                 bool add_special_tokens, 
                 bool pad_to_max_length,
-                std::optional<size_t> max_length) {
+                std::optional<size_t> max_length,
+                std::optional<std::string> padding_side) {
                 ov::AnyMap tokenization_params;
                 tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
                 tokenization_params[ov::genai::pad_to_max_length.name()] = pad_to_max_length;
+                tokenization_params[ov::genai::padding_side.name()] = padding_side.value_or("right");
+
                 if (max_length.has_value()) {
                     tokenization_params[ov::genai::max_length.name()] = *max_length;
                 }
@@ -127,16 +138,20 @@ void init_tokenizer(py::module_& m) {
             py::arg("add_special_tokens") = true,
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
+            py::arg("padding_side") = "right",
             R"(Encodes a list of prompts into tokenized inputs. The number of strings must be the same, or one of the inputs can contain one string.
             In the latter case, the single-string input will be broadcast into the shape of the other input, which is more efficient than repeating the string in pairs.)")
 
             .def("encode", [](Tokenizer& tok, py::list& prompts, 
                             bool add_special_tokens, 
                             bool pad_to_max_length,
-                            std::optional<size_t> max_length) {
+                            std::optional<size_t> max_length,
+                            std::optional<std::string> padding_side) {
                 ov::AnyMap tokenization_params;
                 tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
                 tokenization_params[ov::genai::pad_to_max_length.name()] = pad_to_max_length;
+                tokenization_params[ov::genai::padding_side.name()] = padding_side.value_or("right");
+
                 if (max_length.has_value()) {
                     tokenization_params[ov::genai::max_length.name()] = *max_length;
                 }
@@ -156,6 +171,7 @@ void init_tokenizer(py::module_& m) {
             py::arg("add_special_tokens") = true,
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
+            py::arg("padding_side") = "right",
             R"(Encodes a list of paired prompts into tokenized inputs. Input format is same as for HF paired input [[prompt_1, prompt_2], ...].)")
             
             .def(

From 658d83b2d47df9d31aa034ea58119e3c2410c221 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 8 Aug 2025 13:47:15 +0200
Subject: [PATCH 03/10] add tests & make them green

---
 .../src/tokenizer/make_tokenizer_stateful.cpp |  8 ++++--
 src/cpp/src/tokenizer/tokenizer.cpp           | 18 ++++++++++---
 src/python/py_tokenizer.cpp                   | 26 ++++++++++++-------
 tests/python_tests/test_tokenizer.py          |  8 ++++++
 4 files changed, 45 insertions(+), 15 deletions(-)

diff --git a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
index c03afcfeca..32760bd466 100644
--- a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
+++ b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
@@ -248,8 +248,12 @@ bool ov::genai::MakePaddingSatateful::run_on_model(const std::shared_ptr<ov::Mod
     model->add_variables({pad_to_max_length_var});
     
     // Add padding side variable.
-    auto pad_right_var = std::make_shared<op::util::Variable>(op::util::VariableInfo{ov::Shape{}, ov::element::boolean, ov::genai::PAD_RIGHT_VAR_ID});
-    auto pad_right_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{}, std::vector{true});
+    auto pad_right_var = std::make_shared<op::util::Variable>(op::util::VariableInfo{ov::Shape{}, ov::element::i32, ov::genai::PAD_RIGHT_VAR_ID});
+    // If user called encode without explicitly stating padding side, then we should pad it to the default side
+    // which was defined during model conversion, but we don't know default side value during this transformation,
+    // therefore we should indicate that padding side should be taken from the operation attribute.
+    // We decided that to be number 2.
+    auto pad_right_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{}, std::vector{2});
     auto pad_right_rv = std::make_shared<v6::ReadValue>(pad_right_const, pad_right_var);
     model->add_sinks({std::make_shared<v6::Assign>(pad_right_rv, pad_right_var)});
     model->add_variables({pad_right_var});
diff --git a/src/cpp/src/tokenizer/tokenizer.cpp b/src/cpp/src/tokenizer/tokenizer.cpp
index 19a864714e..1764fb806e 100644
--- a/src/cpp/src/tokenizer/tokenizer.cpp
+++ b/src/cpp/src/tokenizer/tokenizer.cpp
@@ -260,14 +260,24 @@ class Tokenizer::TokenizerImpl {
         std::optional<bool> skip_special_tokens_flag = true;
         std::optional<int32_t> max_length_val;
         std::optional<bool> pad_to_max_length_val = false;
-        std::optional<std::string> padding_side_val = "right";
-
+        std::optional<std::string> padding_side_val = std::nullopt;
+        
         ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag);
         ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag);
         ov::genai::utils::read_anymap_param(params, pad_to_max_length.name(), pad_to_max_length_val);
         ov::genai::utils::read_anymap_param(params, max_length.name(), max_length_val);
         ov::genai::utils::read_anymap_param(params, padding_side.name(), padding_side_val);
-        std::optional<bool> pad_right = (padding_side_val.has_value() && *padding_side_val == "right") ? true : false;
+        std::optional<bool> pad_right;
+
+        // If padding_side is not set, we should leave nullopt this will indicate that default value from RaggetToDense attribute will be used.
+        if (padding_side_val.has_value()) {
+            OPENVINO_ASSERT(
+                padding_side_val == "left" || padding_side_val == "right",
+                "padding_side should be either 'left' or 'right', but got: ",
+                *padding_side_val
+            );
+            pad_right = (*padding_side_val == "right") ? true : false;
+        }
 
         std::optional<bool> is_max_length_set_val = max_length_val.has_value();
 
@@ -286,7 +296,7 @@ class Tokenizer::TokenizerImpl {
                 set_state_value(state, pad_to_max_length_val, state_flags);
             } else if (name == IS_MAX_LENGTH_SET) {
                 set_state_value(state, is_max_length_set_val, state_flags);
-            } else  if (name == PAD_RIGHT_VAR_ID) {
+            } else if (name == PAD_RIGHT_VAR_ID) {
                 set_state_value(state, pad_right, state_flags);
             }
         }
diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp
index b5f34cbd75..4f90e33622 100644
--- a/src/python/py_tokenizer.cpp
+++ b/src/python/py_tokenizer.cpp
@@ -80,18 +80,20 @@ void init_tokenizer(py::module_& m) {
                 ov::AnyMap tokenization_params;
                 tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
                 tokenization_params[ov::genai::pad_to_max_length.name()] = pad_to_max_length;
-                tokenization_params[ov::genai::padding_side.name()] = padding_side.value_or("right");
 
                 if (max_length.has_value()) {
                     tokenization_params[ov::genai::max_length.name()] = *max_length;
                 }
+                if (padding_side.has_value()) {
+                    tokenization_params[ov::genai::padding_side.name()] = *padding_side;
+                }
                 return tok.encode(prompts, tokenization_params);
             },
             py::arg("prompts"),
             py::arg("add_special_tokens") = true,
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
-            py::arg("padding_side") = "right",
+            py::arg("padding_side") = std::nullopt,
             R"(Encodes a list of prompts into tokenized inputs.)")
 
         .def("encode", [](Tokenizer& tok, const std::string prompt, 
@@ -103,17 +105,19 @@ void init_tokenizer(py::module_& m) {
                 ov::AnyMap tokenization_params;
                 tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
                 tokenization_params[ov::genai::pad_to_max_length.name()] = pad_to_max_length;
-                tokenization_params[ov::genai::padding_side.name()] = padding_side.value_or("right");
                 if (max_length.has_value()) {
                     tokenization_params[ov::genai::max_length.name()] = *max_length;
                 }
+                if (padding_side.has_value()) {
+                    tokenization_params[ov::genai::padding_side.name()] = *padding_side;
+                }
                 return tok.encode(prompt, tokenization_params);
             },
             py::arg("prompt"), 
             py::arg("add_special_tokens") = true, 
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
-            py::arg("padding_side") = "right",
+            py::arg("padding_side") = std::nullopt,
             R"(Encodes a single prompt into tokenized input.)")
             
             .def("encode", [](Tokenizer& tok, 
@@ -126,11 +130,13 @@ void init_tokenizer(py::module_& m) {
                 ov::AnyMap tokenization_params;
                 tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
                 tokenization_params[ov::genai::pad_to_max_length.name()] = pad_to_max_length;
-                tokenization_params[ov::genai::padding_side.name()] = padding_side.value_or("right");
 
                 if (max_length.has_value()) {
                     tokenization_params[ov::genai::max_length.name()] = *max_length;
                 }
+                if (padding_side.has_value()) {
+                    tokenization_params[ov::genai::padding_side.name()] = *padding_side;
+                }
                 return tok.encode(prompts_1, prompts_2, tokenization_params);
             },
             py::arg("prompts_1"),
@@ -138,7 +144,7 @@ void init_tokenizer(py::module_& m) {
             py::arg("add_special_tokens") = true,
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
-            py::arg("padding_side") = "right",
+            py::arg("padding_side") = std::nullopt,
             R"(Encodes a list of prompts into tokenized inputs. The number of strings must be the same, or one of the inputs can contain one string.
             In the latter case, the single-string input will be broadcast into the shape of the other input, which is more efficient than repeating the string in pairs.)")
 
@@ -150,11 +156,13 @@ void init_tokenizer(py::module_& m) {
                 ov::AnyMap tokenization_params;
                 tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
                 tokenization_params[ov::genai::pad_to_max_length.name()] = pad_to_max_length;
-                tokenization_params[ov::genai::padding_side.name()] = padding_side.value_or("right");
-
+                
                 if (max_length.has_value()) {
                     tokenization_params[ov::genai::max_length.name()] = *max_length;
                 }
+                if (padding_side.has_value()) {
+                    tokenization_params[ov::genai::padding_side.name()] = *padding_side;
+                }
 
                 // Convert py::list to std::vector<std::string>
                 std::vector<std::pair<std::string, std::string>> prompts_vector;
@@ -171,7 +179,7 @@ void init_tokenizer(py::module_& m) {
             py::arg("add_special_tokens") = true,
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
-            py::arg("padding_side") = "right",
+            py::arg("padding_side") = std::nullopt,
             R"(Encodes a list of paired prompts into tokenized inputs. Input format is same as for HF paired input [[prompt_1, prompt_2], ...].)")
             
             .def(
diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py
index f48af9bada..43a7f2090f 100644
--- a/tests/python_tests/test_tokenizer.py
+++ b/tests/python_tests/test_tokenizer.py
@@ -365,6 +365,8 @@ def hf_ov_genai_models(request, tmp_path_factory):
 @pytest.mark.parametrize("add_special_tokens", [True, False])
 @pytest.mark.parametrize("max_length", [None, 16, 103, 512, 1024])
 @pytest.mark.parametrize("pad_to_max_length", [None, True, False])
+# regardless of what side was set during conversion we should be able to set it at runtime
+@pytest.mark.parametrize("padding_side", [None, "right", "left"])
 @pytest.mark.parametrize("prompt", prompts)
 @pytest.mark.parametrize(
     "hf_ov_genai_models",
@@ -386,6 +388,7 @@ def test_padding(
     add_special_tokens,
     max_length,
     pad_to_max_length,
+    padding_side,
     prompt,
 ):
     hf_tokenizer, genai_tokenzier = hf_ov_genai_models
@@ -418,8 +421,13 @@ def test_padding(
         hf_params.pop("max_length")
         ov_params.pop("max_length")
 
+    if padding_side is not None:
+        hf_params["padding_side"] = padding_side
+        ov_params["padding_side"] = padding_side
+
     ov_res = genai_tokenzier.encode(prompt, **ov_params)
     hf_res = hf_tokenizer(prompt, return_tensors="np", **hf_params)
+
     assert np.all(ov_res.input_ids.data == hf_res["input_ids"])
     assert np.all(ov_res.attention_mask.data == hf_res["attention_mask"])
 

From 8b53509f7619eec5521f1fd4b7a4d80c2cade561 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 8 Aug 2025 14:01:31 +0200
Subject: [PATCH 04/10] update pyi

---
 src/python/openvino_genai/py_openvino_genai.pyi | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index b8d448600b..cdb1b380c7 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -2971,23 +2971,23 @@ class Tokenizer:
         Decode a batch of tokens into a list of string prompt.
         """
     @typing.overload
-    def encode(self, prompts: collections.abc.Sequence[str], add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None) -> TokenizedInputs:
+    def encode(self, prompts: collections.abc.Sequence[str], add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None, padding_side: str | None = None) -> TokenizedInputs:
         """
         Encodes a list of prompts into tokenized inputs.
         """
     @typing.overload
-    def encode(self, prompt: str, add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None) -> TokenizedInputs:
+    def encode(self, prompt: str, add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None, padding_side: str | None = None) -> TokenizedInputs:
         """
         Encodes a single prompt into tokenized input.
         """
     @typing.overload
-    def encode(self, prompts_1: collections.abc.Sequence[str], prompts_2: collections.abc.Sequence[str], add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None) -> TokenizedInputs:
+    def encode(self, prompts_1: collections.abc.Sequence[str], prompts_2: collections.abc.Sequence[str], add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None, padding_side: str | None = None) -> TokenizedInputs:
         """
         Encodes a list of prompts into tokenized inputs. The number of strings must be the same, or one of the inputs can contain one string.
                     In the latter case, the single-string input will be broadcast into the shape of the other input, which is more efficient than repeating the string in pairs.
         """
     @typing.overload
-    def encode(self, prompts: list, add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None) -> TokenizedInputs:
+    def encode(self, prompts: list, add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None, padding_side: str | None = None) -> TokenizedInputs:
         """
         Encodes a list of paired prompts into tokenized inputs. Input format is same as for HF paired input [[prompt_1, prompt_2], ...].
         """

From 9117082379598078f20b900ec0db7363e17a3719 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Fri, 8 Aug 2025 14:05:11 +0200
Subject: [PATCH 05/10] update docstring

---
 src/cpp/include/openvino/genai/tokenizer.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp
index 5e5f225ee0..69e5fb4707 100644
--- a/src/cpp/include/openvino/genai/tokenizer.hpp
+++ b/src/cpp/include/openvino/genai/tokenizer.hpp
@@ -165,7 +165,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @param add_special_tokens whether to add special tokens
     * @param max_length optional maximum length to which output will be truncated and/or padded. If not defined, taken from IR.
     * @param pad_to_max_length either pad to max_length, or pad to the longest sequence in the batch. Default is false.
-    * @param padding_side side to pad, either "left" or "right". Default is "right".
+    * @param padding_side side to pad, either "left" or "right". If not defined value is taken from IR.
     * @return pair of [input_ids, attention_mask]
     */
     template <typename... Properties>
@@ -179,7 +179,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @param add_special_tokens whether to add special tokens
     * @param max_length optional maximum length to which output will be truncated and/or padded. If not defined, taken from IR.
     * @param pad_to_max_length either pad to max_length, or pad to the longest sequence in the batch. Default is false.
-    * @param padding_side side to pad, either "left" or "right". Default is "right".
+    * @param padding_side side to pad, either "left" or "right". If not defined value is taken from IR.
     * @return pair of [input_ids, attention_mask]
     */
     template <typename... Properties>

From 6bc01682683f7986fe2088760e1f1b13b20a7947 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Mon, 11 Aug 2025 13:35:31 +0200
Subject: [PATCH 06/10] update python docstring for 'encode'

---
 src/python/py_tokenizer.cpp | 58 +++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 9 deletions(-)

diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp
index 4f90e33622..aacc063873 100644
--- a/src/python/py_tokenizer.cpp
+++ b/src/python/py_tokenizer.cpp
@@ -32,6 +32,46 @@ constexpr char class_docstring[] = R"(
     7. Replace not supported instructions with equivalents.
 )";
 
+constexpr char common_encode_docstring[] =R"(
+ 'add_special_tokens' - whether to add special tokens like BOS, EOS, PAD. Default is True.
+ 'pad_to_max_length' - whether to pad the sequence to the maximum length. Default is False.
+ 'max_length' - maximum length of the sequence. If None (default), the value will be taken from the model configuration.
+ 'padding_side' - side to pad the sequence, can be 'left' or 'right'. If None (default), the value will be taken from the model configuration.
+Returns:
+ TokenizedInputs object containing input_ids and attention_mask tensors.
+)";
+
+auto encode_list_docstring = (
+R"(Encodes a list of prompts into tokenized inputs.
+Args:
+ 'prompts' - list of prompts to encode)"
++ std::string(common_encode_docstring)
+);
+
+auto encode_single_prompt_docstring = (
+R"(Encodes a single prompt into tokenized input.
+Args:
+ 'prompt' - prompt to encode)"
++ std::string(common_encode_docstring)
+);
+
+auto encode_list_of_pairs_docstring = (
+R"(Encodes a list of prompts into tokenized inputs. The number of strings must be the same, or one of the inputs can contain one string.
+In the latter case, the single-string input will be broadcast into the shape of the other input, which is more efficient than repeating the string in pairs.)
+Args:
+ 'prompts_1' - list of prompts to encode
+ 'prompts_2' - list of prompts to encode)"
++ std::string(common_encode_docstring)
+);
+
+auto encode_list_of_lists_docstring =
+(
+R"(Encodes a list of paired prompts into tokenized inputs. Input format is same as for HF paired input [[prompt_1, prompt_2], ...].
+Args:
+ 'prompts' - list of prompts to encode\n)"
++ std::string(common_encode_docstring)
+);
+
 }  // namespace
 
 namespace py = pybind11;
@@ -94,7 +134,7 @@ void init_tokenizer(py::module_& m) {
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
             py::arg("padding_side") = std::nullopt,
-            R"(Encodes a list of prompts into tokenized inputs.)")
+            encode_list_docstring.c_str())
 
         .def("encode", [](Tokenizer& tok, const std::string prompt, 
                           bool add_special_tokens, 
@@ -118,8 +158,8 @@ void init_tokenizer(py::module_& m) {
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
             py::arg("padding_side") = std::nullopt,
-            R"(Encodes a single prompt into tokenized input.)")
-            
+            encode_single_prompt_docstring.c_str())
+
             .def("encode", [](Tokenizer& tok, 
                 std::vector<std::string>& prompts_1, 
                 std::vector<std::string>& prompts_2,
@@ -145,9 +185,8 @@ void init_tokenizer(py::module_& m) {
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
             py::arg("padding_side") = std::nullopt,
-            R"(Encodes a list of prompts into tokenized inputs. The number of strings must be the same, or one of the inputs can contain one string.
-            In the latter case, the single-string input will be broadcast into the shape of the other input, which is more efficient than repeating the string in pairs.)")
-
+            encode_list_of_pairs_docstring.c_str())
+            
             .def("encode", [](Tokenizer& tok, py::list& prompts, 
                             bool add_special_tokens, 
                             bool pad_to_max_length,
@@ -180,9 +219,10 @@ void init_tokenizer(py::module_& m) {
             py::arg("pad_to_max_length") = false,
             py::arg("max_length") = std::nullopt,
             py::arg("padding_side") = std::nullopt,
-            R"(Encodes a list of paired prompts into tokenized inputs. Input format is same as for HF paired input [[prompt_1, prompt_2], ...].)")
-            
-            .def(
+            encode_list_of_lists_docstring.c_str()
+        )
+
+        .def(
             "decode",
             [](Tokenizer& tok, std::vector<int64_t>& tokens, bool skip_special_tokens) -> py::str {
                 ov::AnyMap detokenization_params;

From bea238d92aeb54d3f9dbe4783f25069e9b70e2a2 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Mon, 11 Aug 2025 13:35:41 +0200
Subject: [PATCH 07/10] get default value of pad_right from AttributeVisitor

---
 .../src/tokenizer/make_tokenizer_stateful.cpp | 62 ++++++++++++++-----
 src/python/py_tokenizer.cpp                   |  4 +-
 2 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
index 32760bd466..e6e89389e3 100644
--- a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
+++ b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
@@ -109,6 +109,23 @@ bool ov::genai::MakeVocabDecoderSatateful::run_on_model(const std::shared_ptr<ov
     return true;
 }
 
+class ReadPadRightAttributes : public ov::AttributeVisitor {
+private:
+    bool m_pad_right = true;
+public:
+    void on_adapter(const std::string& name, ov::ValueAccessor<void>& adapter) override {
+        if (name != "pad_right") {
+            return;
+        }
+        if (auto a = ov::as_type<ov::AttributeAdapter<bool>>(&adapter)) {
+            m_pad_right = a->get();
+        }
+    }
+
+    bool get_pad_right() const {
+        return m_pad_right;
+    }
+};
 
 bool ov::genai::MakePaddingSatateful::run_on_model(const std::shared_ptr<ov::Model>& model) {
     std::shared_ptr<ov::Node> combine_seg_node;
@@ -244,22 +261,34 @@ bool ov::genai::MakePaddingSatateful::run_on_model(const std::shared_ptr<ov::Mod
     auto pad_to_max_length_var = std::make_shared<op::util::Variable>(op::util::VariableInfo{ov::Shape{1}, ov::element::boolean, ov::genai::PAD_TO_MAX_LENGTH_VAR_ID});
     auto default_false_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{1}, std::vector{false});
     auto pad_to_max_length_rv = std::make_shared<v6::ReadValue>(default_false_const, pad_to_max_length_var);
-    model->add_sinks({std::make_shared<v6::Assign>(pad_to_max_length_rv, pad_to_max_length_var)});
-    model->add_variables({pad_to_max_length_var});
+    auto select_node = std::make_shared<v1::Select>(pad_to_max_length_rv, max_length_rv, zero_constant);
+
     
+    // If user called encode without explicitly stating padding side, then we should pad it to the default side.
+    // Here we get that side from the RaggedToDense nodes attribute. 
+    auto pad_right_attr_visitor = ReadPadRightAttributes();
+    bool first_iter = false;
+    bool default_pad_right = true;
+    for (auto ragged_to_dense_node : ragged_to_dense_nodes) {
+        if (!ragged_to_dense_node) {
+            return true;  // true since at this point we already have modified the graph.
+        }
+        ragged_to_dense_node->visit_attributes(pad_right_attr_visitor);
+        if (first_iter) {
+            default_pad_right = pad_right_attr_visitor.get_pad_right();
+        } else if (pad_right_attr_visitor.get_pad_right() != default_pad_right) {
+            return true;  // true since at this point we already have modified the graph.
+        }
+    }
+
     // Add padding side variable.
-    auto pad_right_var = std::make_shared<op::util::Variable>(op::util::VariableInfo{ov::Shape{}, ov::element::i32, ov::genai::PAD_RIGHT_VAR_ID});
-    // If user called encode without explicitly stating padding side, then we should pad it to the default side
-    // which was defined during model conversion, but we don't know default side value during this transformation,
-    // therefore we should indicate that padding side should be taken from the operation attribute.
-    // We decided that to be number 2.
-    auto pad_right_const = std::make_shared<v0::Constant>(ov::element::i32, ov::Shape{}, std::vector{2});
+    auto pad_right_var = std::make_shared<op::util::Variable>(op::util::VariableInfo{ov::Shape{}, ov::element::boolean, ov::genai::PAD_RIGHT_VAR_ID});
+    auto pad_right_const = std::make_shared<v0::Constant>(ov::element::boolean, ov::Shape{}, std::vector{default_pad_right});
     auto pad_right_rv = std::make_shared<v6::ReadValue>(pad_right_const, pad_right_var);
-    model->add_sinks({std::make_shared<v6::Assign>(pad_right_rv, pad_right_var)});
-    model->add_variables({pad_right_var});
-
-    auto select_node = std::make_shared<v1::Select>(pad_to_max_length_rv, max_length_rv, zero_constant);
-
+    
+    // This cycle cannot be united with the cycle above since first we need to ensure that all RaggedToDense nodes have the same padding side
+    // and only after that start to modify. Therefore we need to iterate over RaggedToDense nodes twice. In 99% of cases there is only one RaggedToDense node
+    // and in the rest of cases it would be two RaggedToDense nodes with the same padding side if they are created by the openvino_tokenizers.
     for (auto ragged_to_dense_node : ragged_to_dense_nodes) {
         if (!ragged_to_dense_node) {
             return true;  // true since at this point we already have modified the graph.
@@ -271,9 +300,14 @@ bool ov::genai::MakePaddingSatateful::run_on_model(const std::shared_ptr<ov::Mod
         
         auto max_op = std::make_shared<v1::Maximum>(new_ragged_to_dense->input_value(3), select_node);
         new_ragged_to_dense->input(3).replace_source_output(max_op->output(0));
-
+        
         ov::replace_node(ragged_to_dense_node, new_ragged_to_dense);
     }
 
+    model->add_sinks({std::make_shared<v6::Assign>(pad_right_rv, pad_right_var)});
+    model->add_variables({pad_right_var});
+    model->add_sinks({std::make_shared<v6::Assign>(pad_to_max_length_rv, pad_to_max_length_var)});
+    model->add_variables({pad_to_max_length_var});
+
     return true;
 }
diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp
index aacc063873..e542e5239d 100644
--- a/src/python/py_tokenizer.cpp
+++ b/src/python/py_tokenizer.cpp
@@ -35,8 +35,8 @@ constexpr char class_docstring[] = R"(
 constexpr char common_encode_docstring[] =R"(
  'add_special_tokens' - whether to add special tokens like BOS, EOS, PAD. Default is True.
  'pad_to_max_length' - whether to pad the sequence to the maximum length. Default is False.
- 'max_length' - maximum length of the sequence. If None (default), the value will be taken from the model configuration.
- 'padding_side' - side to pad the sequence, can be 'left' or 'right'. If None (default), the value will be taken from the model configuration.
+ 'max_length' - maximum length of the sequence. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+ 'padding_side' - side to pad the sequence, can be 'left' or 'right'. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
 Returns:
  TokenizedInputs object containing input_ids and attention_mask tensors.
 )";

From 5a6d1bb5a088aa33166ed7844594145f5ed9cf46 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Mon, 11 Aug 2025 13:48:34 +0200
Subject: [PATCH 08/10] update cpp docstring

---
 src/cpp/include/openvino/genai/tokenizer.hpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp
index 69e5fb4707..7d436db8d2 100644
--- a/src/cpp/include/openvino/genai/tokenizer.hpp
+++ b/src/cpp/include/openvino/genai/tokenizer.hpp
@@ -163,9 +163,9 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @brief encode a single prompt
     * @param prompt std::string with input prompt
     * @param add_special_tokens whether to add special tokens
-    * @param max_length optional maximum length to which output will be truncated and/or padded. If not defined, taken from IR.
+    * @param max_length optional maximum length to which output will be truncated and/or padded. If not defined, taken from IR (where default value from original HF/GGUF model is stored).
     * @param pad_to_max_length either pad to max_length, or pad to the longest sequence in the batch. Default is false.
-    * @param padding_side side to pad, either "left" or "right". If not defined value is taken from IR.
+    * @param padding_side side to pad, either "left" or "right". If not defined value is taken from IR (where default value from original HF/GGUF model is stored).
     * @return pair of [input_ids, attention_mask]
     */
     template <typename... Properties>
@@ -177,9 +177,9 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
     * @brief encode batch of prompts.
     * @param prompts vector storing batch of prompts
     * @param add_special_tokens whether to add special tokens
-    * @param max_length optional maximum length to which output will be truncated and/or padded. If not defined, taken from IR.
+    * @param max_length optional maximum length to which output will be truncated and/or padded. If not defined, taken from IR (where default value from original HF/GGUF model is stored).
     * @param pad_to_max_length either pad to max_length, or pad to the longest sequence in the batch. Default is false.
-    * @param padding_side side to pad, either "left" or "right". If not defined value is taken from IR.
+    * @param padding_side side to pad, either "left" or "right". If not defined value is taken from IR (where default value from original HF/GGUF model is stored).
     * @return pair of [input_ids, attention_mask]
     */
     template <typename... Properties>

From b65ada50c6c5e27a4a8b89db6c89aa24eab276d7 Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Mon, 11 Aug 2025 14:08:39 +0200
Subject: [PATCH 09/10] some corrections

---
 src/cpp/src/tokenizer/make_tokenizer_stateful.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
index e6e89389e3..030c473eb2 100644
--- a/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
+++ b/src/cpp/src/tokenizer/make_tokenizer_stateful.cpp
@@ -263,11 +263,10 @@ bool ov::genai::MakePaddingSatateful::run_on_model(const std::shared_ptr<ov::Mod
     auto pad_to_max_length_rv = std::make_shared<v6::ReadValue>(default_false_const, pad_to_max_length_var);
     auto select_node = std::make_shared<v1::Select>(pad_to_max_length_rv, max_length_rv, zero_constant);
 
-    
     // If user called encode without explicitly stating padding side, then we should pad it to the default side.
     // Here we get that side from the RaggedToDense nodes attribute. 
     auto pad_right_attr_visitor = ReadPadRightAttributes();
-    bool first_iter = false;
+    bool first_iter = true;
     bool default_pad_right = true;
     for (auto ragged_to_dense_node : ragged_to_dense_nodes) {
         if (!ragged_to_dense_node) {
@@ -279,6 +278,7 @@ bool ov::genai::MakePaddingSatateful::run_on_model(const std::shared_ptr<ov::Mod
         } else if (pad_right_attr_visitor.get_pad_right() != default_pad_right) {
             return true;  // true since at this point we already have modified the graph.
         }
+        first_iter = false;
     }
 
     // Add padding side variable.

From 3f32b295aec891b4a81d97e258dbdde28c1a2aed Mon Sep 17 00:00:00 2001
From: Pavel Esir <pavel.esir@intel.com>
Date: Mon, 11 Aug 2025 14:44:32 +0200
Subject: [PATCH 10/10] update pyi

---
 .../openvino_genai/py_openvino_genai.pyi      | 35 ++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
index cdb1b380c7..991aa63313 100644
--- a/src/python/openvino_genai/py_openvino_genai.pyi
+++ b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -2974,22 +2974,55 @@ class Tokenizer:
     def encode(self, prompts: collections.abc.Sequence[str], add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None, padding_side: str | None = None) -> TokenizedInputs:
         """
         Encodes a list of prompts into tokenized inputs.
+        Args:
+         'prompts' - list of prompts to encode
+         'add_special_tokens' - whether to add special tokens like BOS, EOS, PAD. Default is True.
+         'pad_to_max_length' - whether to pad the sequence to the maximum length. Default is False.
+         'max_length' - maximum length of the sequence. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+         'padding_side' - side to pad the sequence, can be 'left' or 'right'. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+        Returns:
+         TokenizedInputs object containing input_ids and attention_mask tensors.
         """
     @typing.overload
     def encode(self, prompt: str, add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None, padding_side: str | None = None) -> TokenizedInputs:
         """
         Encodes a single prompt into tokenized input.
+        Args:
+         'prompt' - prompt to encode
+         'add_special_tokens' - whether to add special tokens like BOS, EOS, PAD. Default is True.
+         'pad_to_max_length' - whether to pad the sequence to the maximum length. Default is False.
+         'max_length' - maximum length of the sequence. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+         'padding_side' - side to pad the sequence, can be 'left' or 'right'. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+        Returns:
+         TokenizedInputs object containing input_ids and attention_mask tensors.
         """
     @typing.overload
     def encode(self, prompts_1: collections.abc.Sequence[str], prompts_2: collections.abc.Sequence[str], add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None, padding_side: str | None = None) -> TokenizedInputs:
         """
         Encodes a list of prompts into tokenized inputs. The number of strings must be the same, or one of the inputs can contain one string.
-                    In the latter case, the single-string input will be broadcast into the shape of the other input, which is more efficient than repeating the string in pairs.
+        In the latter case, the single-string input will be broadcast into the shape of the other input, which is more efficient than repeating the string in pairs.)
+        Args:
+         'prompts_1' - list of prompts to encode
+         'prompts_2' - list of prompts to encode
+         'add_special_tokens' - whether to add special tokens like BOS, EOS, PAD. Default is True.
+         'pad_to_max_length' - whether to pad the sequence to the maximum length. Default is False.
+         'max_length' - maximum length of the sequence. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+         'padding_side' - side to pad the sequence, can be 'left' or 'right'. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+        Returns:
+         TokenizedInputs object containing input_ids and attention_mask tensors.
         """
     @typing.overload
     def encode(self, prompts: list, add_special_tokens: bool = True, pad_to_max_length: bool = False, max_length: typing.SupportsInt | None = None, padding_side: str | None = None) -> TokenizedInputs:
         """
         Encodes a list of paired prompts into tokenized inputs. Input format is same as for HF paired input [[prompt_1, prompt_2], ...].
+        Args:
+         'prompts' - list of prompts to encode\\n
+         'add_special_tokens' - whether to add special tokens like BOS, EOS, PAD. Default is True.
+         'pad_to_max_length' - whether to pad the sequence to the maximum length. Default is False.
+         'max_length' - maximum length of the sequence. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+         'padding_side' - side to pad the sequence, can be 'left' or 'right'. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored).
+        Returns:
+         TokenizedInputs object containing input_ids and attention_mask tensors.
         """
     def get_bos_token(self) -> str:
         ...