diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 4fca4037301fb..71292f5c72ecb 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -21,19 +21,18 @@
 namespace onnxruntime {
 namespace openvino_ep {
 
-GlobalContext& BackendManager::GetGlobalContext() {
+GlobalContext* BackendManager::GetGlobalContext() {
   return global_context_;
 }
 
-BackendManager::BackendManager(const GlobalContext& global_context,
+BackendManager::BackendManager(GlobalContext* global_context,
                                const onnxruntime::Node& fused_node,
                                const onnxruntime::GraphViewer& subgraph,
                                const logging::Logger& logger,
                                EPCtxHandler& ep_ctx_handle_) {
   global_context_ = global_context;
-
-  openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
-                          std::to_string(global_context_.OpenVINO_Version.at(1));
+  openvino_sdk_version_ = std::to_string(global_context_->OpenVINO_Version.at(0)) + "." +
+                          std::to_string(global_context_->OpenVINO_Version.at(1));
   if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) {
     if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK())
       ORT_THROW("Import blob from model failed");
@@ -66,17 +65,17 @@ BackendManager::BackendManager(const GlobalContext& global_context,
   }
   subgraph_context_.subgraph_name = fused_node.Name();
   auto model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
-  std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type;
+  std::string device_type = openvino_ep::BackendManager::GetGlobalContext()->device_type;
 
   if (ModelHasSymbolicInputDims(subgraph)) {
     subgraph_context_.has_dynamic_input_shape = true;
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
-    ORT_ENFORCE(!global_context_.enable_qdq_optimizer,
+    ORT_ENFORCE(!global_context_->enable_qdq_optimizer,
                 "QDQ stripping should not be enabled for models with dynamic input shapes. "
                 "Set enable_qdq_optimizer to False");
-    if ((GetGlobalContext().device_type.find("CPU") != std::string::npos ||
-         GetGlobalContext().device_type.find("GPU") != std::string::npos) &&
-        !GetGlobalContext().disable_dynamic_shapes) {
+    if ((GetGlobalContext()->device_type.find("CPU") != std::string::npos ||
+         GetGlobalContext()->device_type.find("GPU") != std::string::npos) &&
+        !GetGlobalContext()->disable_dynamic_shapes) {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
                          << "Creating backend Dynamic Shapes";
       try {
@@ -110,7 +109,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
     } catch (const OnnxRuntimeException& ex) {
       std::string exception_str = ex.what();
       bool eligible_for_cpu_fallback = device_type.find("NPU") != std::string::npos &&
-                                       !GetGlobalContext().disable_cpu_fallback &&
+                                       !GetGlobalContext()->disable_cpu_fallback &&
                                        !ep_ctx_handle_.IsValidOVEPCtxGraph();
 #if defined(OPENVINO_DISABLE_NPU_FALLBACK)
       eligible_for_cpu_fallback = false;
@@ -119,8 +118,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
         LOGS_DEFAULT(VERBOSE) << exception_str;
         LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
                               << "Falling back to OV CPU for execution";
-        GetGlobalContext().device_type = "CPU";
-        GetGlobalContext().precision_str = "FP32";
+        GetGlobalContext()->device_type = "CPU";
+        GetGlobalContext()->precision_str = "FP32";
         try {
           concrete_backend_ = BackendFactory::MakeBackend(model_proto,
                                                           GetGlobalContext(),
@@ -157,7 +156,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
       }
     }
   }
-  if (global_context_.export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) {
+  if (global_context_->export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) {
     auto status = onnxruntime::openvino_ep::BackendManager::ExportCompiledBlobAsEPCtxNode(subgraph,
                                                                                           logger);
     if ((!status.IsOK())) {
@@ -172,7 +171,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
 // the EPContext node.
 Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer,
                                                      const logging::Logger& logger) {
-  if (GetGlobalContext().disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape) {
+  if (GetGlobalContext()->disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape) {
     std::string exception_str =
         "Exporting dynamically compiled models at runtime is not supported. "
         "Cannot export blobs of dynamic models that request static shape inference. "
@@ -184,19 +183,19 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
   auto compiled_model = concrete_backend_->GetOVCompiledModel();
   std::string graph_name = "";
   // Epctx file path from SO is mapped to cache_dir variable for OVEP for readability
-  if (!global_context_.cache_dir.empty()) {
-    graph_name = global_context_.cache_dir;
+  if (!global_context_->cache_dir.empty()) {
+    graph_name = global_context_->cache_dir;
   } else {
-    graph_name = global_context_.onnx_model_path_name;
+    graph_name = global_context_->onnx_model_path_name;
     // Remove extension so we can append suffix to form the complete name of output graph
-    size_t dot = global_context_.onnx_model_path_name.find_last_of(".");
+    size_t dot = global_context_->onnx_model_path_name.find_last_of(".");
     graph_name = graph_name.substr(0, dot);
     if (dot != std::string::npos) graph_name += "_ctx.onnx";
   }
 
   // If embed_mode, then pass on the serialized blob
   // If not embed_mode, dump the blob here and only pass on the path to the blob
-  if (global_context_.ep_context_embed_mode) {
+  if (global_context_->ep_context_embed_mode) {
     std::ostringstream model_blob_stream;
     compiled_model.export_model(model_blob_stream);
     model_blob_str = std::move(model_blob_stream).str();
@@ -218,7 +217,7 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
   ORT_RETURN_IF_ERROR(ep_ctx_handle_.ExportEPCtxModel(graph_body_viewer,
                                                       graph_name,
                                                       logger,
-                                                      global_context_.ep_context_embed_mode,
+                                                      global_context_->ep_context_embed_mode,
                                                       std::move(model_blob_str),
                                                       openvino_sdk_version_));
 
@@ -337,8 +336,8 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
   };
 
   // QDQ stripping enabled only for the NPU
-  if (global_context_.device_type.find("NPU") != std::string::npos &&
-      global_context_.enable_qdq_optimizer &&
+  if (global_context_->device_type.find("NPU") != std::string::npos &&
+      global_context_->enable_qdq_optimizer &&
       IsQDQGraph(subgraph)) {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1";
     std::unique_ptr<onnxruntime::Model> model;
@@ -346,7 +345,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     auto model_proto = model->ToProto();
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
     print_model_proto_duration();
-    DumpOpenVINOEPModel(global_context_.onnx_model_path_name, model_proto.get(), fused_node);
+    DumpOpenVINOEPModel(global_context_->onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
   } else {
@@ -356,7 +355,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
     subgraph.ToProto(*model_proto->mutable_graph(), true, true);
     print_model_proto_duration();
-    DumpOpenVINOEPModel(global_context_.onnx_model_path_name, model_proto.get(), fused_node);
+    DumpOpenVINOEPModel(global_context_->onnx_model_path_name, model_proto.get(), fused_node);
     return model_proto;
   }
 }
@@ -448,13 +447,13 @@ void BackendManager::Compute(OrtKernelContext* context) {
   // by rewriting the model to static shaped model at runtime based on input shape.
   // disable_dynamic_shapes is always set to true for OV NPU plugin.
   if (subgraph_context_.has_dynamic_input_shape &&
-      !GetGlobalContext().disable_dynamic_shapes &&
-      (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
-       GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
+      !GetGlobalContext()->disable_dynamic_shapes &&
+      (GetGlobalContext()->device_type.find("CPU") != std::string::npos ||
+       GetGlobalContext()->device_type.find("GPU") != std::string::npos)) {
     concrete_backend_->Infer(context);
   } else if (subgraph_context_.has_dynamic_input_shape) {
     std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(ctx);
-    auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
+    auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext()->device_type);
     std::shared_ptr<IBackend> dynamic_backend;
     auto search = backend_map_.find(key);
     if (search == backend_map_.end()) {
@@ -474,14 +473,14 @@ void BackendManager::Compute(OrtKernelContext* context) {
         LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU.";
         ORT_THROW(ex.what());
 #else
-        if (GetGlobalContext().device_type.find("NPU") != std::string::npos &&
-            !GetGlobalContext().disable_cpu_fallback) {
+        if (GetGlobalContext()->device_type.find("NPU") != std::string::npos &&
+            !GetGlobalContext()->disable_cpu_fallback) {
           LOGS_DEFAULT(WARNING) << ex.what();
           LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
                                 << "Falling back to OV CPU for execution";
-          GetGlobalContext().device_type = "CPU";
-          GetGlobalContext().precision_str = "FP32";
-          key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
+          GetGlobalContext()->device_type = "CPU";
+          GetGlobalContext()->precision_str = "FP32";
+          key = MakeMapKeyString(tensor_shapes, GetGlobalContext()->device_type);
           try {
             dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
                                                           GetGlobalContext(),
diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
index b9ff7a72372b3..578c1c199f832 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.h
+++ b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -19,7 +19,7 @@ namespace openvino_ep {
 // Singleton class that manages all the backends
 class BackendManager {
  public:
-  BackendManager(const GlobalContext& global_context,
+  BackendManager(GlobalContext* global_context,
                  const onnxruntime::Node& fused_node,
                  const onnxruntime::GraphViewer& subgraph,
                  const logging::Logger& logger,
@@ -27,7 +27,7 @@ class BackendManager {
   void Compute(OrtKernelContext* context);
   void ShutdownBackendManager();
   void SetGlobalCotext(const GlobalContext& global_context);
-  GlobalContext& GetGlobalContext();
+  GlobalContext* GetGlobalContext();
   Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph,
                                        const logging::Logger& logger);
 
@@ -51,7 +51,7 @@ class BackendManager {
   std::shared_ptr<IBackend> concrete_backend_;
   std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
   SubGraphContext subgraph_context_;
-  GlobalContext global_context_;
+  GlobalContext* global_context_;
   EPCtxHandler ep_ctx_handle_{};
   std::string openvino_sdk_version_{};
 };
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index 4d9fbe09f118d..51828938824c4 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -41,17 +41,17 @@ struct static_cast_int64 {
 };
 
 std::shared_ptr<OVNetwork>
-CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
+CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext* global_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
   if (IsCILogEnabled()) {
     std::cout << "CreateNgraphFunc" << std::endl;
   }
   const std::string model = model_proto.SerializeAsString();
   try {
-    auto cnn_network = global_context.ie_core.ReadModel(model, global_context.onnx_model_path_name);
+    auto cnn_network = global_context->ie_core.ReadModel(model, global_context->onnx_model_path_name);
 
     // Check for Constant Folding
-    if (!global_context.is_wholly_supported_graph) {
+    if (!global_context->is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;
       pass_const_obj.run_on_model(cnn_network);
       auto& results = const_cast<ov::ResultVector&>(cnn_network.get()->get_results());
@@ -130,13 +130,13 @@ GetOutputTensor(Ort::KernelContext& context,
   return context.GetOutput(index, output_shape.get(), num_dims);
 }
 
-int GetFirstAvailableDevice(GlobalContext& global_context) {
+int GetFirstAvailableDevice(GlobalContext* global_context) {
   int i = 0;
   // Get the first available VAD-M device and set the device to busy
   while (i < 8) {
-    bool device = global_context.deviceAvailableList[i];
+    bool device = global_context->deviceAvailableList[i];
     if (device) {
-      global_context.deviceAvailableList[i] = false;
+      global_context->deviceAvailableList[i] = false;
       break;
     }
     i++;
@@ -145,9 +145,9 @@ int GetFirstAvailableDevice(GlobalContext& global_context) {
   // make all remaining devices free
   if (i == 8) {
     i = 0;
-    global_context.deviceAvailableList[i] = false;
+    global_context->deviceAvailableList[i] = false;
     for (int j = 1; j < 8; j++) {
-      global_context.deviceAvailableList[j] = true;
+      global_context->deviceAvailableList[j] = true;
     }
   }
   return i;
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index a105e6b08aade..915fb75a5eb5d 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -34,7 +34,7 @@ bool IsDebugEnabled();
 // Internal diagnostic function.
 bool IsCILogEnabled();
 
-int GetFirstAvailableDevice(GlobalContext& global_context);
+int GetFirstAvailableDevice(GlobalContext* global_context);
 
 void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);
 
@@ -62,7 +62,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
 
 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
-              const GlobalContext& global_context,
+              const GlobalContext* global_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
index b7e4aed6e7e18..88410a8f75e88 100644
--- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc
+++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@@ -12,10 +12,10 @@ namespace openvino_ep {
 
 std::shared_ptr<IBackend>
 BackendFactory::MakeBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
-                            GlobalContext& global_context,
+                            GlobalContext* global_context,
                             const SubGraphContext& subgraph_context,
                             EPCtxHandler& ep_ctx_handle) {
-  std::string type = global_context.device_type;
+  std::string type = global_context->device_type;
   if (type == "CPU" || type.find("GPU") != std::string::npos ||
       type.find("NPU") != std::string::npos ||
       type.find("HETERO") != std::string::npos ||
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index bfd79bb960dcd..54448067337d7 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -21,11 +21,11 @@ namespace openvino_ep {
 using namespace backend_utils;
 
 BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
-                           GlobalContext& global_context,
+                           GlobalContext* global_context,
                            const SubGraphContext& subgraph_context,
                            EPCtxHandler& ep_ctx_handle)
     : global_context_(global_context), subgraph_context_(subgraph_context) {
-  std::string& hw_target = global_context_.device_type;
+  std::string& hw_target = global_context_->device_type;
 
   is_ep_ctx_graph_ = ep_ctx_handle.IsValidOVEPCtxGraph();
 
@@ -48,41 +48,43 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
   // Set the inference_num_threads property of the CPU
   SetNumThreads(device_config);
 
+  // set workload type to decide on the performance mode
+  SetWorkLoadType(device_config);
+
   try {
-    std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str;
+    std::string dev_prec = global_context_->device_type + "_" + global_context_->precision_str;
 
-    if (global_context.is_wholly_supported_graph) {  // Full graph is supported
+    if (global_context_->is_wholly_supported_graph) {  // Full graph is supported
 #if defined(IO_BUFFER_ENABLED)
       if (is_ep_ctx_graph_) {
         std::istringstream model_stream(ep_ctx_handle.GetModelBlobString());
-        exe_network_ = global_context_.ie_core.ImportModel(model_stream,
-                                                           remote_context_,
-                                                           subgraph_context_.subgraph_name);
-      } else if ((global_context.device_type.find("GPU") != std::string::npos) &&
-                 (global_context_.context != nullptr)) {
+        exe_network_ = global_context_->ie_core.ImportModel(model_stream,
+                                                            remote_context_,
+                                                            subgraph_context_.subgraph_name);
+      } else if ((global_context_->device_type.find("GPU") != std::string::npos) &&
+                 (global_context_->context != nullptr)) {
         LOGS_DEFAULT(INFO) << log_tag << "IO Buffering Enabled";
-        cl_context ctx = static_cast<cl_context>(global_context_.context);
-        remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
+        cl_context ctx = static_cast<cl_context>(global_context_->context);
+        remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_->ie_core.Get(), ctx);
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.CompileModel(
+        exe_network_ = global_context_->ie_core.CompileModel(
             ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
       } else {
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.CompileModel(
+        exe_network_ = global_context_->ie_core.CompileModel(
             ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
       }
 #else  // !IO_BUFFER_ENABLED
-      std::string prec_str = (global_context_.precision_str != "ACCURACY") ? global_context_.precision_str : global_context_.model_precision;
+      std::string prec_str = (global_context_->precision_str != "ACCURACY") ? global_context_->precision_str : global_context_->model_precision;
       if (is_ep_ctx_graph_) {
         // If the blob is held in an EPContext node, then skip FE+Compile
         // and directly move on to creating a backend with the executable blob
-        exe_network_ = global_context_.ie_core.ImportModel(ep_ctx_handle.GetModelBlobStream(),
-                                                           hw_target,
-                                                           device_config,
-                                                           global_context_.ep_context_embed_mode,
-                                                           subgraph_context_.subgraph_name);
-        ie_cnn_network_ = exe_network_.Get().get_runtime_model();
-      } else if (global_context_.export_ep_ctx_blob &&
+        exe_network_ = global_context_->ie_core.ImportModel(ep_ctx_handle.GetModelBlobStream(),
+                                                            hw_target,
+                                                            device_config,
+                                                            global_context_->ep_context_embed_mode,
+                                                            subgraph_context_.subgraph_name);
+      } else if (global_context_->export_ep_ctx_blob &&
                  hw_target.find("NPU") != std::string::npos) {
         std::shared_ptr<ov::Model> ov_model;
         {
@@ -90,28 +92,28 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
           if (!subgraph_context.has_dynamic_input_shape) {
             delete model_proto.release();
           }
-          ov_model = global_context_.ie_core.Get().read_model(model, ov::Tensor());
+          ov_model = global_context_->ie_core.Get().read_model(model, ov::Tensor());
         }
-        exe_network_ = OVExeNetwork(global_context_.ie_core.Get().compile_model(ov_model, hw_target, device_config));
+        exe_network_ = OVExeNetwork(global_context_->ie_core.Get().compile_model(ov_model, hw_target, device_config));
       } else if ((!subgraph_context_.has_dynamic_input_shape) &&
                  ((hw_target.find("AUTO") == std::string::npos) ||
-                  (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) > 2))) {
+                  (global_context_->OpenVINO_Version.at(0) >= 2024 && global_context_->OpenVINO_Version.at(1) > 2))) {
         // Optimized OV compile_model API is supported with AUTO from version 2024.3 and above
         // Inputs with static dimenstions
         const std::string model = model_proto->SerializeAsString();
-        exe_network_ = global_context_.ie_core.CompileModel(model,
-                                                            hw_target,
-                                                            device_config,
-                                                            subgraph_context_.subgraph_name);
+        exe_network_ = global_context_->ie_core.CompileModel(model,
+                                                             hw_target,
+                                                             device_config,
+                                                             subgraph_context_.subgraph_name);
       } else {  // For all other types use ov::Model Type
         ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.CompileModel(
+        exe_network_ = global_context_->ie_core.CompileModel(
             ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
       }
 #endif
     } else {  // Full graph is not supported
       ie_cnn_network_ = CreateOVModel(*model_proto, global_context_, const_outputs_map_);
-      exe_network_ = global_context_.ie_core.CompileModel(
+      exe_network_ = global_context_->ie_core.CompileModel(
           ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
     }
     LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
@@ -135,21 +137,21 @@ bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::No
 void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
   device_config = {};
   // Set inference precision based on device precision for OV backend
-  if (global_context_.precision_str.find("FP16") != std::string::npos &&
-      global_context_.device_type == "GPU") {
+  if (global_context_->precision_str.find("FP16") != std::string::npos &&
+      global_context_->device_type == "GPU") {
     device_config.emplace(ov::hint::inference_precision("f16"));
   }
-  if (global_context_.precision_str.find("FP32") != std::string::npos) {
+  if (global_context_->precision_str.find("FP32") != std::string::npos) {
     device_config.emplace(ov::hint::inference_precision("f32"));
   }
-  if (global_context_.precision_str.find("ACCURACY") != std::string::npos &&
-      global_context_.device_type == "GPU") {
-    if (global_context_.OpenVINO_Version.at(0) >= 2024 && global_context_.OpenVINO_Version.at(1) >= 1) {
+  if (global_context_->precision_str.find("ACCURACY") != std::string::npos &&
+      global_context_->device_type == "GPU") {
+    if (global_context_->OpenVINO_Version.at(0) >= 2024 && global_context_->OpenVINO_Version.at(1) >= 1) {
       device_config.emplace(ov::hint::inference_precision(ov::element::undefined));
       device_config.emplace(ov::hint::execution_mode(ov::hint::ExecutionMode::ACCURACY));
     } else {
-      if (global_context_.model_precision != "")
-        device_config.emplace(ov::hint::inference_precision(global_context_.model_precision));
+      if (global_context_->model_precision != "")
+        device_config.emplace(ov::hint::inference_precision(global_context_->model_precision));
     }
   }
 #ifndef NDEBUG
@@ -160,10 +162,10 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
 
   // Set a priority level for the current workload for preemption;  default priority is "DEFAULT"
   // CPU Plugin doesn't support workload priority
-  if (global_context_.device_type.find("CPU") == std::string::npos)
-    device_config.emplace(ov::hint::model_priority(global_context_.model_priority));
+  if (global_context_->device_type.find("CPU") == std::string::npos)
+    device_config.emplace(ov::hint::model_priority(global_context_->model_priority));
 
-  if (global_context_.device_type.find("NPU") != std::string::npos) {
+  if (global_context_->device_type.find("NPU") != std::string::npos) {
     std::pair<std::string, ov::Any> device_property;
     device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER");
 
@@ -173,15 +175,15 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
     }
     device_config.emplace(ov::device::properties("NPU", device_property));
 #if (OPENVINO_VERSION_MAJOR >= 2024) && (OPENVINO_VERSION_MINOR > 3)
-    if (global_context_.export_ep_ctx_blob) {
-      global_context_.ie_core.Get().set_property("NPU", ov::intel_npu::bypass_umd_caching(true));
+    if (global_context_->export_ep_ctx_blob) {
+      global_context_->ie_core.Get().set_property("NPU", ov::intel_npu::bypass_umd_caching(true));
     }
 #endif
   }
 
-  if (!global_context_.load_config.empty()) {
+  if (!global_context_->load_config.empty()) {
     std::map<std::string, ov::AnyMap> target_config;
-    LoadConfig(global_context_.load_config, target_config);
+    LoadConfig(global_context_->load_config, target_config);
 
     // Parse device types like "AUTO:CPU,GPU" and extract individual devices
     auto parse_individual_devices = [&](const std::string& device_type) -> std::vector<std::string> {
@@ -213,7 +215,7 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
                                      const std::vector<ov::PropertyName>& supported_properties) {
       for (const auto& [key, value] : config_options) {
         if (is_supported_and_mutable(key, supported_properties)) {
-          global_context_.ie_core.Get().set_property(device, ov::AnyMap{{key, value}});
+          global_context_->ie_core.Get().set_property(device, ov::AnyMap{{key, value}});
         } else {
           LOGS_DEFAULT(WARNING) << "WARNING: Property \"" << key
                                 << "\" is either unsupported in current OpenVINO version"
@@ -224,26 +226,26 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
     };
 
     // Check if the device type is AUTO, HETERO, or MULTI
-    if (global_context_.device_type.find("AUTO") == 0 ||
-        global_context_.device_type.find("HETERO") == 0 ||
-        global_context_.device_type.find("MULTI") == 0) {
+    if (global_context_->device_type.find("AUTO") == 0 ||
+        global_context_->device_type.find("HETERO") == 0 ||
+        global_context_->device_type.find("MULTI") == 0) {
       // Parse individual devices (e.g., "AUTO:CPU,GPU" -> ["CPU", "GPU"])
-      auto individual_devices = parse_individual_devices(global_context_.device_type);
+      auto individual_devices = parse_individual_devices(global_context_->device_type);
       // Set properties only for individual devices (e.g., "CPU", "GPU")
       for (const std::string& device : individual_devices) {
         if (target_config.count(device)) {
           // Get supported properties for each individual device
-          auto device_properties = global_context_.ie_core.Get().get_property(device, ov::supported_properties);
+          auto device_properties = global_context_->ie_core.Get().get_property(device, ov::supported_properties);
           // Set properties for the device
           set_target_properties(device, target_config.at(device), device_properties);
         }
       }
     } else {
-      if (target_config.count(global_context_.device_type)) {
-        auto supported_properties = global_context_.ie_core.Get().get_property(global_context_.device_type,
+      if (target_config.count(global_context_->device_type)) {
+        auto supported_properties = global_context_->ie_core.Get().get_property(global_context_->device_type,
                                                                                ov::supported_properties);
-        set_target_properties(global_context_.device_type,
-                              target_config.at(global_context_.device_type), supported_properties);
+        set_target_properties(global_context_->device_type,
+                              target_config.at(global_context_->device_type), supported_properties);
       }
     }
   }
@@ -253,21 +255,21 @@ void BasicBackend::EnableCaching(ov::AnyMap& device_config) {
   // cache_dir argument has no effect when working with an embed-mode EPContext Graph
   if (is_ep_ctx_graph_) return;
 
-  if (!global_context_.cache_dir.empty() && !global_context_.export_ep_ctx_blob) {
+  if (!global_context_->cache_dir.empty() && !global_context_->export_ep_ctx_blob) {
     LOGS_DEFAULT(INFO) << log_tag << "Enables Caching";
-    if (global_context_.device_type.find("AUTO:GPU") != std::string::npos) {
+    if (global_context_->device_type.find("AUTO:GPU") != std::string::npos) {
       std::pair<std::string, ov::Any> device_property;
-      device_property = std::make_pair("CACHE_DIR", global_context_.cache_dir);
+      device_property = std::make_pair("CACHE_DIR", global_context_->cache_dir);
       device_config.emplace(ov::device::properties("GPU", device_property));
     } else {
-      global_context_.ie_core.SetCache(global_context_.cache_dir);
+      global_context_->ie_core.SetCache(global_context_->cache_dir);
     }
   }
 }
 
 void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
-  if (global_context_.enable_opencl_throttling == true &&
-      global_context_.device_type.find("GPU") != std::string::npos) {
+  if (global_context_->enable_opencl_throttling == true &&
+      global_context_->device_type.find("GPU") != std::string::npos) {
     LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
     std::pair<std::string, ov::Any> device_property;
     device_property = std::make_pair("PLUGIN_THROTTLE", "1");
@@ -278,28 +280,38 @@ void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
 void BasicBackend::EnableStreams() {
   // Return silently for NPU as it's currently treated as a read-only flag by the NPU plugin
   // and throws an exception for the same
-  if (global_context_.device_type.find("NPU") != std::string::npos)
+  if (global_context_->device_type.find("NPU") != std::string::npos)
     return;
 
   // Streams can be set only if the device is not one of AUTO, MULTI, or HETERO
   // Throw an exception if the user tries to set num_streams for these devices
-  if ((global_context_.device_type.find("MULTI") != std::string::npos) ||
-      (global_context_.device_type.find("HETERO") != std::string::npos) ||
-      (global_context_.device_type.find("AUTO") != std::string::npos)) {
-    if (global_context_.num_streams != 1) {
+  if ((global_context_->device_type.find("MULTI") != std::string::npos) ||
+      (global_context_->device_type.find("HETERO") != std::string::npos) ||
+      (global_context_->device_type.find("AUTO") != std::string::npos)) {
+    if (global_context_->num_streams != 1) {
       ORT_THROW(log_tag + "Cannot set NUM_STREAMS to " +
-                std::to_string(global_context_.num_streams) + " for device " + global_context_.device_type);
+                std::to_string(global_context_->num_streams) + " for device " + global_context_->device_type);
     }
     // Do nothing
   } else {
-    global_context_.ie_core.SetStreams(global_context_.device_type, global_context_.num_streams);
+    global_context_->ie_core.SetStreams(global_context_->device_type, global_context_->num_streams);
   }
 }
 
 void BasicBackend::SetNumThreads(ov::AnyMap& device_config) {
   // inference_num_threads is applicable only for the CPU device
-  if (global_context_.device_type.find("CPU") != std::string::npos)
-    device_config.emplace(ov::inference_num_threads(global_context_.num_of_threads));
+  if (global_context_->device_type.find("CPU") != std::string::npos)
+    device_config.emplace(ov::inference_num_threads(global_context_->num_of_threads));
+}
+
+void BasicBackend::SetWorkLoadType(ov::AnyMap& device_config) {
+  if ((global_context_->OpenVINO_Version.at(0) >= 2024 &&
+       global_context_->OpenVINO_Version.at(1) >= 3)) {
+    std::pair<std::string, ov::Any> device_property;
+    device_property = std::make_pair("WORKLOAD_TYPE", global_context_->workload_type);
+    device_config.emplace(ov::device::properties("NPU", device_property));
+    LOGS_DEFAULT(INFO) << log_tag << "Set compile time workloadtype as " << global_context_->workload_type;
+  }
 }
 
 // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
@@ -330,9 +342,9 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
       }
       size_t batch_slice_idx = 0;
       if (subgraph_context_.has_dynamic_input_shape &&
-          !global_context_.disable_dynamic_shapes &&
-          (global_context_.device_type.find("CPU") != std::string::npos ||
-           global_context_.device_type.find("GPU") != std::string::npos)) {
+          !global_context_->disable_dynamic_shapes &&
+          (global_context_->device_type.find("CPU") != std::string::npos ||
+           global_context_->device_type.find("GPU") != std::string::npos)) {
         auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
         auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
         auto tensor_shape = tensor_info.GetShape();
@@ -347,7 +359,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
         auto input = graph_input_info.at(input_idx);
         OVTensorPtr tensor_ptr;
         // avoid input copies on the CPU device
-        if (global_context_.device_type.find("CPU") != std::string::npos) {
+        if (global_context_->device_type.find("CPU") != std::string::npos) {
           tensor_ptr = std::make_shared<ov::Tensor>(input.get_element_type(), input_tensor_shape,
                                                     (void*)tensor_data);
         } else {
@@ -361,8 +373,8 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
           ORT_THROW(msg);
         }
       } else {
-        if ((global_context_.device_type.find("CPU") != std::string::npos ||
-             global_context_.device_type.find("GPU") != std::string::npos)) {
+        if ((global_context_->device_type.find("CPU") != std::string::npos ||
+             global_context_->device_type.find("GPU") != std::string::npos)) {
           OVTensorPtr graph_input_blob;
           try {
             graph_input_blob = infer_request->GetTensor(input_name);
@@ -394,7 +406,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
       }
       input_idx++;
     }
-    if (global_context_.device_type.find("NPU") != std::string::npos) {
+    if (global_context_->device_type.find("NPU") != std::string::npos) {
       // Set the output blob as remote blob
       auto graph_output_info = exe_network_.Get().outputs();
       auto output_idx = 0;
@@ -440,6 +452,11 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
     }
 
     // Start Async inference
+    std::string runtime_workload_type = global_context_->runtime_workload_type;
+    if (runtime_workload_type == "DEFAULT" || runtime_workload_type == "EFFICIENT") {
+      LOGS_DEFAULT(VERBOSE) << "[OpenVINO-EP]" << global_context_->runtime_workload_type << " mode is set for OV inference";
+      exe_network_.Get().set_property(ov::workload_type(runtime_workload_type));
+    }
     infer_request->StartAsync();
   } catch (const char* msg) {
     ORT_THROW(msg);
@@ -548,6 +565,11 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
     }
 
     // Start Async inference
+    std::string runtime_workload_type = global_context_->runtime_workload_type;
+    if (runtime_workload_type == "DEFAULT" || runtime_workload_type == "EFFICIENT") {
+      LOGS_DEFAULT(VERBOSE) << "[OpenVINO-EP]" << global_context_->runtime_workload_type << " mode is set for OV inference";
+      exe_network_.Get().set_property(ov::workload_type(runtime_workload_type));
+    }
     infer_request->StartAsync();
   } catch (const char* msg) {
     ORT_THROW(msg);
@@ -588,8 +610,8 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
             " doesn't exist in the "
             "list of OpenVINO output tensor names");
       }
-      if ((global_context_.device_type.find("CPU") != std::string::npos ||
-           global_context_.device_type.find("GPU") != std::string::npos)) {
+      if ((global_context_->device_type.find("CPU") != std::string::npos ||
+           global_context_->device_type.find("GPU") != std::string::npos)) {
         try {
           graph_output_blob = infer_request->GetTensor(output_name);
         } catch (const char* msg) {
@@ -664,8 +686,8 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
     infer_request = inferRequestsQueue_->getIdleRequest();
 
 #ifdef IO_BUFFER_ENABLED
-    if ((global_context_.device_type.find("GPU") != std::string::npos) &&
-        (global_context_.context != nullptr) && global_context_.is_wholly_supported_graph) {
+    if ((global_context_->device_type.find("GPU") != std::string::npos) &&
+        (global_context_->context != nullptr) && global_context_->is_wholly_supported_graph) {
       try {
         StartRemoteAsyncInference(context, infer_request);
       } catch (std::string const& msg) {
@@ -709,7 +731,7 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
 #ifndef IO_BUFFER_ENABLED  // Printing performance counts is disabled when IO_BUFFER_ENABLED
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
       inferRequestsQueue_->printstatus();  // Printing the elements of infer_requests_ vector pool only in debug mode
-      std::string& hw_target = global_context_.device_type;
+      std::string& hw_target = global_context_->device_type;
       printPerformanceCounts(std::move(infer_request_), std::cout, hw_target);
     }
 #endif
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 12502a1d83c5d..5143c10227377 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -30,7 +30,7 @@ class InferRequestsQueue;
 class BasicBackend : public IBackend {
  public:
   BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
-               GlobalContext& global_context,
+               GlobalContext* global_context,
                const SubGraphContext& subgraph_context,
                EPCtxHandler& ep_ctx_handle);
 
@@ -47,6 +47,7 @@ class BasicBackend : public IBackend {
   void EnableGPUThrottling(ov::AnyMap& device_config);
   void EnableStreams();
   void SetNumThreads(ov::AnyMap& device_config);
+  void SetWorkLoadType(ov::AnyMap& device_config);
   void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
 
 #ifdef IO_BUFFER_ENABLED
@@ -55,7 +56,7 @@ class BasicBackend : public IBackend {
 
   void CompleteAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
 
-  GlobalContext& global_context_;
+  GlobalContext* global_context_;
   SubGraphContext subgraph_context_;
   mutable std::mutex compute_lock_;
   std::shared_ptr<const OVNetwork> ie_cnn_network_;
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index 2d238917eb8ed..36e449c4b7e44 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -33,6 +33,8 @@ struct GlobalContext {
   std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
   std::string onnx_model_name;
   std::string onnx_model_path_name;
+  std::string workload_type;
+  std::string runtime_workload_type = "";
   int onnx_opset_version;
   void* context = 0;
   bool use_api_2;
diff --git a/onnxruntime/core/providers/openvino/ibackend.h b/onnxruntime/core/providers/openvino/ibackend.h
index 7a2d6f4e8cd69..d6a836073f3eb 100644
--- a/onnxruntime/core/providers/openvino/ibackend.h
+++ b/onnxruntime/core/providers/openvino/ibackend.h
@@ -21,7 +21,7 @@ class BackendFactory {
  public:
   static std::shared_ptr<IBackend>
   MakeBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
-              GlobalContext& global_context,
+              GlobalContext* global_context,
               const SubGraphContext& subgraph_context,
               EPCtxHandler& ctx_handle);
 };
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index c55e7a607e496..1a01d6788ff21 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -13,6 +13,7 @@
 #ifdef USE_OVEP_NPU_MEMORY
 #include "core/providers/openvino/ov_allocator.h"
 #endif
+#include "core/session/onnxruntime_run_options_config_keys.h"
 
 #define MEMCPY_S(dest, src, destsz, srcsz) memcpy(dest, src, std::min(destsz, srcsz))
 
@@ -39,6 +40,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   global_context_->enable_qdq_optimizer = info.enable_qdq_optimizer_;
   global_context_->disable_cpu_fallback = info.disable_cpu_fallback_;
   global_context_->ep_context_embed_mode = info.so_epctx_embed_mode_;
+  global_context_->workload_type = info.so_workload_type_;
 
   // to check if target device is available
   // using ie_core capability GetAvailableDevices to fetch list of devices plugged in
@@ -143,14 +145,12 @@ common::Status OpenVINOExecutionProvider::Compile(
     // During backend creation, we check if user wants to use precompiled blob onnx model or the original model
     // For precompiled blob, directly load the model instead of compiling the model
     // For original model, check if the user wants to export a model with pre-compiled blob
-
     std::shared_ptr<openvino_ep::BackendManager> backend_manager =
-        std::make_shared<openvino_ep::BackendManager>(*global_context_,
+        std::make_shared<openvino_ep::BackendManager>(global_context_.get(),
                                                       fused_node,
                                                       graph_body_viewer,
                                                       *GetLogger(),
                                                       ep_ctx_handle_);
-
     compute_info.create_state_func =
         [backend_manager](ComputeContext* context, FunctionState* state) {
           OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState();
@@ -180,10 +180,27 @@ common::Status OpenVINOExecutionProvider::Compile(
         };
     node_compute_funcs.push_back(compute_info);
   }
-
+  return Status::OK();
+}
+common::Status OpenVINOExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_options) {
+  auto workload_type_opt = run_options.GetConfigOptions().GetConfigEntry(kOrtRunOptionsWorkloadType);
+  if (workload_type_opt.has_value()) {
+    std::string workload_type = workload_type_opt.value();
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" << "Workload type from ORT RunOption = " << workload_type;
+    std::transform(workload_type.begin(), workload_type.end(), workload_type.begin(), ::tolower);
+    if (workload_type == "default") {
+      global_context_->runtime_workload_type = "DEFAULT";
+    } else if (workload_type == "efficient") {
+      global_context_->runtime_workload_type = "EFFICIENT";
+    }
+  }
   return Status::OK();
 }
 
+common::Status OpenVINOExecutionProvider::OnRunEnd(bool /*sync_stream*/, const onnxruntime::RunOptions& run_options) {
+  global_context_->runtime_workload_type = global_context_->workload_type;
+  return Status::OK();
+}
 #ifdef USE_OVEP_NPU_MEMORY
 std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators() {
   AllocatorCreationInfo npu_allocator_info{
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index cc09a4c6878b0..8171712687fb2 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -92,6 +92,7 @@ struct OpenVINOExecutionProviderInfo {
   bool enable_qdq_optimizer_{false};
   bool disable_cpu_fallback_{false};
   bool so_epctx_embed_mode_{true};
+  std::string so_workload_type_{""};
 
   OpenVINOExecutionProviderInfo() = delete;
 
@@ -102,7 +103,7 @@ struct OpenVINOExecutionProviderInfo {
                                          void* context, bool enable_opencl_throttling,
                                          bool disable_dynamic_shapes, bool export_ep_ctx_blob,
                                          bool enable_qdq_optimizer, bool disable_cpu_fallback,
-                                         bool so_epctx_embed_mode)
+                                         bool so_epctx_embed_mode, std::string so_workload_type)
       : precision_(std::move(precision)),
         enable_npu_fast_compile_(enable_npu_fast_compile),
         num_of_threads_(num_of_threads),
@@ -116,7 +117,8 @@ struct OpenVINOExecutionProviderInfo {
         export_ep_ctx_blob_(export_ep_ctx_blob),
         enable_qdq_optimizer_(enable_qdq_optimizer),
         disable_cpu_fallback_(disable_cpu_fallback),
-        so_epctx_embed_mode_{so_epctx_embed_mode} {
+        so_epctx_embed_mode_{so_epctx_embed_mode},
+        so_workload_type_(so_workload_type) {
     std::set<std::string> ov_supported_device_types = {"CPU", "GPU",
                                                        "GPU.0", "GPU.1", "NPU"};
 
@@ -188,6 +190,9 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
 
   Status Compile(const std::vector<FusedNodeAndGraph>& fused_nodes,
                  std::vector<NodeComputeInfo>& node_compute_funcs) override;
+  Status OnRunStart(const onnxruntime::RunOptions& run_options) override;
+
+  Status OnRunEnd(bool sync_stream, const onnxruntime::RunOptions& run_options) override;
 
   const void* GetExecutionHandle() const noexcept override {
     return nullptr;
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index c69d53638ae90..5eba00ba6c419 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -53,7 +53,7 @@ std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
   bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault("ep.context_enable", "0") == "1";
   bool so_epctx_embed_mode = config_options_.GetConfigOrDefault("ep.context_embed_mode", "1") == "1";
   std::string so_cache_path = config_options_.GetConfigOrDefault("ep.context_file_path", "").c_str();
-
+  std::string so_workload_type_ = config_options_.GetConfigOrDefault("session.workload_type", "").c_str();
   if (so_export_ep_ctx_blob && !so_cache_path.empty()) {
     cache_dir_ = so_cache_path;
     auto file_path = std::filesystem::path(cache_dir_);
@@ -70,11 +70,21 @@ std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
       ORT_THROW("[ERROR] [OpenVINO] Invalid ep_ctx_file_path" + cache_dir_ + " \n");
     }
   }
+  if (!so_workload_type_.empty()) {
+    std::transform(so_workload_type_.begin(), so_workload_type_.end(), so_workload_type_.begin(), ::tolower);
+    if (so_workload_type_ == "default") {
+      so_workload_type_ = "DEFAULT";
+    } else if (so_workload_type_ == "efficient") {
+      so_workload_type_ = "EFFICIENT";
+    } else {
+      ORT_THROW("[ERROR] [OpenVINO] Invalid workload_type - Supported modes are Default and Efficient \n");
+    }
+  }
 
   OpenVINOExecutionProviderInfo info(device_type_, precision_, enable_npu_fast_compile_, num_of_threads_, load_config_,
                                      cache_dir_, model_priority_, num_streams_, context_, enable_opencl_throttling_,
                                      disable_dynamic_shapes_, so_export_ep_ctx_blob, enable_qdq_optimizer_,
-                                     so_disable_cpu_fallback, so_epctx_embed_mode);
+                                     so_disable_cpu_fallback, so_epctx_embed_mode, so_workload_type_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }