intel · preetha-intel · Sep 5, 2024 · Sep 10, 2024 · Sep 11, 2024 · Sep 19, 2024
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -21,19 +21,18 @@
 namespace onnxruntime {
 namespace openvino_ep {
 
-GlobalContext& BackendManager::GetGlobalContext() {
+GlobalContext* BackendManager::GetGlobalContext() {
   return global_context_;
 }
 
-BackendManager::BackendManager(const GlobalContext& global_context,
+BackendManager::BackendManager(GlobalContext* global_context,
                                const onnxruntime::Node& fused_node,
                                const onnxruntime::GraphViewer& subgraph,
                                const logging::Logger& logger,
                                EPCtxHandler& ep_ctx_handle_) {
   global_context_ = global_context;
-
-  openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
-                          std::to_string(global_context_.OpenVINO_Version.at(1));
+  openvino_sdk_version_ = std::to_string(global_context_->OpenVINO_Version.at(0)) + "." +
+                          std::to_string(global_context_->OpenVINO_Version.at(1));
   if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) {
     if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK())
       ORT_THROW("Import blob from model failed");
@@ -66,17 +65,17 @@ BackendManager::BackendManager(const GlobalContext& global_context,
   }
   subgraph_context_.subgraph_name = fused_node.Name();
   auto model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
-  std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type;
+  std::string device_type = openvino_ep::BackendManager::GetGlobalContext()->device_type;
 
   if (ModelHasSymbolicInputDims(subgraph)) {
     subgraph_context_.has_dynamic_input_shape = true;
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
-    ORT_ENFORCE(!global_context_.enable_qdq_optimizer,
+    ORT_ENFORCE(!global_context_->enable_qdq_optimizer,
                 "QDQ stripping should not be enabled for models with dynamic input shapes. "
                 "Set enable_qdq_optimizer to False");
-    if ((GetGlobalContext().device_type.find("CPU") != std::string::npos ||
-         GetGlobalContext().device_type.find("GPU") != std::string::npos) &&
-        !GetGlobalContext().disable_dynamic_shapes) {
+    if ((GetGlobalContext()->device_type.find("CPU") != std::string::npos ||
+         GetGlobalContext()->device_type.find("GPU") != std::string::npos) &&
+        !GetGlobalContext()->disable_dynamic_shapes) {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
                          << "Creating backend Dynamic Shapes";
       try {
@@ -110,7 +109,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
     } catch (const OnnxRuntimeException& ex) {
       std::string exception_str = ex.what();
       bool eligible_for_cpu_fallback = device_type.find("NPU") != std::string::npos &&
-                                       !GetGlobalContext().disable_cpu_fallback &&
+                                       !GetGlobalContext()->disable_cpu_fallback &&
                                        !ep_ctx_handle_.IsValidOVEPCtxGraph();
 #if defined(OPENVINO_DISABLE_NPU_FALLBACK)
       eligible_for_cpu_fallback = false;
@@ -119,8 +118,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
         LOGS_DEFAULT(VERBOSE) << exception_str;
         LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
                               << "Falling back to OV CPU for execution";
-        GetGlobalContext().device_type = "CPU";
-        GetGlobalContext().precision_str = "FP32";
+        GetGlobalContext()->device_type = "CPU";
+        GetGlobalContext()->precision_str = "FP32";
         try {
           concrete_backend_ = BackendFactory::MakeBackend(model_proto,
                                                           GetGlobalContext(),
@@ -157,7 +156,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
       }
     }
   }
-  if (global_context_.export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) {
+  if (global_context_->export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) {
     auto status = onnxruntime::openvino_ep::BackendManager::ExportCompiledBlobAsEPCtxNode(subgraph,
                                                                                           logger);
     if ((!status.IsOK())) {
@@ -172,7 +171,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
 // the EPContext node.
 Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer,
                                                      const logging::Logger& logger) {
-  if (GetGlobalContext().disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape) {
+  if (GetGlobalContext()->disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape) {
     std::string exception_str =
         "Exporting dynamically compiled models at runtime is not supported. "
         "Cannot export blobs of dynamic models that request static shape inference. "
@@ -184,19 +183,19 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
   auto compiled_model = concrete_backend_->GetOVCompiledModel();
   std::string graph_name = "";
   // Epctx file path from SO is mapped to cache_dir variable for OVEP for readability
-  if (!global_context_.cache_dir.empty()) {
-    graph_name = global_context_.cache_dir;
+  if (!global_context_->cache_dir.empty()) {
+    graph_name = global_context_->cache_dir;
   } else {
-    graph_name = global_context_.onnx_model_path_name;
+    graph_name = global_context_->onnx_model_path_name;
     // Remove extension so we can append suffix to form the complete name of output graph
-    size_t dot = global_context_.onnx_model_path_name.find_last_of(".");
+    size_t dot = global_context_->onnx_model_path_name.find_last_of(".");
     graph_name = graph_name.substr(0, dot);
     if (dot != std::string::npos) graph_name += "_ctx.onnx";
   }
 
   // If embed_mode, then pass on the serialized blob
   // If not embed_mode, dump the blob here and only pass on the path to the blob
-  if (global_context_.ep_context_embed_mode) {
+  if (global_context_->ep_context_embed_mode) {
     std::ostringstream model_blob_stream;
     compiled_model.export_model(model_blob_stream);
     model_blob_str = std::move(model_blob_stream).str();
@@ -218,7 +217,7 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
   ORT_RETURN_IF_ERROR(ep_ctx_handle_.ExportEPCtxModel(graph_body_viewer,
                                                       graph_name,
                                                       logger,
-                                                      global_context_.ep_context_embed_mode,
+                                                      global_context_->ep_context_embed_mode,
                                                       std::move(model_blob_str),
                                                       openvino_sdk_version_));
 
@@ -337,16 +336,16 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
   };
 
   // QDQ stripping enabled only for the NPU
-  if (global_context_.device_type.find("NPU") != std::string::npos &&
-      global_context_.enable_qdq_optimizer &&
+  if (global_context_->device_type.find("NPU") != std::string::npos &&
+      global_context_->enable_qdq_optimizer &&
       IsQDQGraph(subgraph)) {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1";
     std::unique_ptr<onnxruntime::Model> model;
     Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, model);
     auto model_proto = model->ToProto();
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
     print_model_proto_duration();
-    DumpOpenVINOEPModel(global_context_.onnx_model_path_name, model_proto.get(), fused_node);
+    DumpOpenVINOEPModel(global_context_->onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
   } else {
@@ -356,7 +355,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
     subgraph.ToProto(*model_proto->mutable_graph(), true, true);
     print_model_proto_duration();
-    DumpOpenVINOEPModel(global_context_.onnx_model_path_name, model_proto.get(), fused_node);
+    DumpOpenVINOEPModel(global_context_->onnx_model_path_name, model_proto.get(), fused_node);
     return model_proto;
   }
 }
@@ -448,13 +447,13 @@ void BackendManager::Compute(OrtKernelContext* context) {
   // by rewriting the model to static shaped model at runtime based on input shape.
   // disable_dynamic_shapes is always set to true for OV NPU plugin.
   if (subgraph_context_.has_dynamic_input_shape &&
-      !GetGlobalContext().disable_dynamic_shapes &&
-      (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
-       GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
+      !GetGlobalContext()->disable_dynamic_shapes &&
+      (GetGlobalContext()->device_type.find("CPU") != std::string::npos ||
+       GetGlobalContext()->device_type.find("GPU") != std::string::npos)) {
     concrete_backend_->Infer(context);
   } else if (subgraph_context_.has_dynamic_input_shape) {
     std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(ctx);
-    auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
+    auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext()->device_type);
     std::shared_ptr<IBackend> dynamic_backend;
     auto search = backend_map_.find(key);
     if (search == backend_map_.end()) {
@@ -474,14 +473,14 @@ void BackendManager::Compute(OrtKernelContext* context) {
         LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU.";
         ORT_THROW(ex.what());
 #else
-        if (GetGlobalContext().device_type.find("NPU") != std::string::npos &&
-            !GetGlobalContext().disable_cpu_fallback) {
+        if (GetGlobalContext()->device_type.find("NPU") != std::string::npos &&
+            !GetGlobalContext()->disable_cpu_fallback) {
           LOGS_DEFAULT(WARNING) << ex.what();
           LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
                                 << "Falling back to OV CPU for execution";
-          GetGlobalContext().device_type = "CPU";
-          GetGlobalContext().precision_str = "FP32";
-          key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
+          GetGlobalContext()->device_type = "CPU";
+          GetGlobalContext()->precision_str = "FP32";
+          key = MakeMapKeyString(tensor_shapes, GetGlobalContext()->device_type);
           try {
             dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
                                                           GetGlobalContext(),

diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -19,15 +19,15 @@ namespace openvino_ep {
 // Singleton class that manages all the backends
 class BackendManager {
  public:
-  BackendManager(const GlobalContext& global_context,
+  BackendManager(GlobalContext* global_context,
                  const onnxruntime::Node& fused_node,
                  const onnxruntime::GraphViewer& subgraph,
                  const logging::Logger& logger,
                  EPCtxHandler& ctx_handle);
   void Compute(OrtKernelContext* context);
   void ShutdownBackendManager();
   void SetGlobalCotext(const GlobalContext& global_context);
-  GlobalContext& GetGlobalContext();
+  GlobalContext* GetGlobalContext();
   Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph,
                                        const logging::Logger& logger);
 
@@ -51,7 +51,7 @@ class BackendManager {
   std::shared_ptr<IBackend> concrete_backend_;
   std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
   SubGraphContext subgraph_context_;
-  GlobalContext global_context_;
+  GlobalContext* global_context_;
   EPCtxHandler ep_ctx_handle_{};
   std::string openvino_sdk_version_{};
 };

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -41,17 +41,17 @@ struct static_cast_int64 {
 };
 
 std::shared_ptr<OVNetwork>
-CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
+CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext* global_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
   if (IsCILogEnabled()) {
     std::cout << "CreateNgraphFunc" << std::endl;
   }
   const std::string model = model_proto.SerializeAsString();
   try {
-    auto cnn_network = global_context.ie_core.ReadModel(model, global_context.onnx_model_path_name);
+    auto cnn_network = global_context->ie_core.ReadModel(model, global_context->onnx_model_path_name);
 
     // Check for Constant Folding
-    if (!global_context.is_wholly_supported_graph) {
+    if (!global_context->is_wholly_supported_graph) {
       ov::pass::ConstantFolding pass_const_obj;
       pass_const_obj.run_on_model(cnn_network);
       auto& results = const_cast<ov::ResultVector&>(cnn_network.get()->get_results());
@@ -130,13 +130,13 @@ GetOutputTensor(Ort::KernelContext& context,
   return context.GetOutput(index, output_shape.get(), num_dims);
 }
 
-int GetFirstAvailableDevice(GlobalContext& global_context) {
+int GetFirstAvailableDevice(GlobalContext* global_context) {
   int i = 0;
   // Get the first available VAD-M device and set the device to busy
   while (i < 8) {
-    bool device = global_context.deviceAvailableList[i];
+    bool device = global_context->deviceAvailableList[i];
     if (device) {
-      global_context.deviceAvailableList[i] = false;
+      global_context->deviceAvailableList[i] = false;
       break;
     }
     i++;
@@ -145,9 +145,9 @@ int GetFirstAvailableDevice(GlobalContext& global_context) {
   // make all remaining devices free
   if (i == 8) {
     i = 0;
-    global_context.deviceAvailableList[i] = false;
+    global_context->deviceAvailableList[i] = false;
     for (int j = 1; j < 8; j++) {
-      global_context.deviceAvailableList[j] = true;
+      global_context->deviceAvailableList[j] = true;
     }
   }
   return i;

diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -34,7 +34,7 @@ bool IsDebugEnabled();
 // Internal diagnostic function.
 bool IsCILogEnabled();
 
-int GetFirstAvailableDevice(GlobalContext& global_context);
+int GetFirstAvailableDevice(GlobalContext* global_context);
 
 void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);
 
@@ -62,7 +62,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
 
 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
-              const GlobalContext& global_context,
+              const GlobalContext* global_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,

diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@@ -12,10 +12,10 @@ namespace openvino_ep {
 
 std::shared_ptr<IBackend>
 BackendFactory::MakeBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
-                            GlobalContext& global_context,
+                            GlobalContext* global_context,
                             const SubGraphContext& subgraph_context,
                             EPCtxHandler& ep_ctx_handle) {
-  std::string type = global_context.device_type;
+  std::string type = global_context->device_type;
   if (type == "CPU" || type.find("GPU") != std::string::npos ||
       type.find("NPU") != std::string::npos ||
       type.find("HETERO") != std::string::npos ||