Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 33 additions & 34 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,18 @@
namespace onnxruntime {
namespace openvino_ep {

GlobalContext& BackendManager::GetGlobalContext() {
GlobalContext* BackendManager::GetGlobalContext() {
return global_context_;
}

BackendManager::BackendManager(const GlobalContext& global_context,
BackendManager::BackendManager(GlobalContext* global_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger,
EPCtxHandler& ep_ctx_handle_) {
global_context_ = global_context;

openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
std::to_string(global_context_.OpenVINO_Version.at(1));
openvino_sdk_version_ = std::to_string(global_context_->OpenVINO_Version.at(0)) + "." +
std::to_string(global_context_->OpenVINO_Version.at(1));
if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) {
if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK())
ORT_THROW("Import blob from model failed");
Expand Down Expand Up @@ -66,17 +65,17 @@ BackendManager::BackendManager(const GlobalContext& global_context,
}
subgraph_context_.subgraph_name = fused_node.Name();
auto model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type;
std::string device_type = openvino_ep::BackendManager::GetGlobalContext()->device_type;

if (ModelHasSymbolicInputDims(subgraph)) {
subgraph_context_.has_dynamic_input_shape = true;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
ORT_ENFORCE(!global_context_.enable_qdq_optimizer,
ORT_ENFORCE(!global_context_->enable_qdq_optimizer,
"QDQ stripping should not be enabled for models with dynamic input shapes. "
"Set enable_qdq_optimizer to False");
if ((GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos) &&
!GetGlobalContext().disable_dynamic_shapes) {
if ((GetGlobalContext()->device_type.find("CPU") != std::string::npos ||
GetGlobalContext()->device_type.find("GPU") != std::string::npos) &&
!GetGlobalContext()->disable_dynamic_shapes) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
<< "Creating backend Dynamic Shapes";
try {
Expand Down Expand Up @@ -110,7 +109,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
} catch (const OnnxRuntimeException& ex) {
std::string exception_str = ex.what();
bool eligible_for_cpu_fallback = device_type.find("NPU") != std::string::npos &&
!GetGlobalContext().disable_cpu_fallback &&
!GetGlobalContext()->disable_cpu_fallback &&
!ep_ctx_handle_.IsValidOVEPCtxGraph();
#if defined(OPENVINO_DISABLE_NPU_FALLBACK)
eligible_for_cpu_fallback = false;
Expand All @@ -119,8 +118,8 @@ BackendManager::BackendManager(const GlobalContext& global_context,
LOGS_DEFAULT(VERBOSE) << exception_str;
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
<< "Falling back to OV CPU for execution";
GetGlobalContext().device_type = "CPU";
GetGlobalContext().precision_str = "FP32";
GetGlobalContext()->device_type = "CPU";
GetGlobalContext()->precision_str = "FP32";
try {
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
GetGlobalContext(),
Expand Down Expand Up @@ -157,7 +156,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
}
}
}
if (global_context_.export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) {
if (global_context_->export_ep_ctx_blob && !ep_ctx_handle_.IsValidOVEPCtxGraph()) {
auto status = onnxruntime::openvino_ep::BackendManager::ExportCompiledBlobAsEPCtxNode(subgraph,
logger);
if ((!status.IsOK())) {
Expand All @@ -172,7 +171,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
// the EPContext node.
Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& graph_body_viewer,
const logging::Logger& logger) {
if (GetGlobalContext().disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape) {
if (GetGlobalContext()->disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape) {
std::string exception_str =
"Exporting dynamically compiled models at runtime is not supported. "
"Cannot export blobs of dynamic models that request static shape inference. "
Expand All @@ -184,19 +183,19 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
auto compiled_model = concrete_backend_->GetOVCompiledModel();
std::string graph_name = "";
// Epctx file path from SO is mapped to cache_dir variable for OVEP for readability
if (!global_context_.cache_dir.empty()) {
graph_name = global_context_.cache_dir;
if (!global_context_->cache_dir.empty()) {
graph_name = global_context_->cache_dir;
} else {
graph_name = global_context_.onnx_model_path_name;
graph_name = global_context_->onnx_model_path_name;
// Remove extension so we can append suffix to form the complete name of output graph
size_t dot = global_context_.onnx_model_path_name.find_last_of(".");
size_t dot = global_context_->onnx_model_path_name.find_last_of(".");
graph_name = graph_name.substr(0, dot);
if (dot != std::string::npos) graph_name += "_ctx.onnx";
}

// If embed_mode, then pass on the serialized blob
// If not embed_mode, dump the blob here and only pass on the path to the blob
if (global_context_.ep_context_embed_mode) {
if (global_context_->ep_context_embed_mode) {
std::ostringstream model_blob_stream;
compiled_model.export_model(model_blob_stream);
model_blob_str = std::move(model_blob_stream).str();
Expand All @@ -218,7 +217,7 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
ORT_RETURN_IF_ERROR(ep_ctx_handle_.ExportEPCtxModel(graph_body_viewer,
graph_name,
logger,
global_context_.ep_context_embed_mode,
global_context_->ep_context_embed_mode,
std::move(model_blob_str),
openvino_sdk_version_));

Expand Down Expand Up @@ -337,16 +336,16 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
};

// QDQ stripping enabled only for the NPU
if (global_context_.device_type.find("NPU") != std::string::npos &&
global_context_.enable_qdq_optimizer &&
if (global_context_->device_type.find("NPU") != std::string::npos &&
global_context_->enable_qdq_optimizer &&
IsQDQGraph(subgraph)) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1";
std::unique_ptr<onnxruntime::Model> model;
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, model);
auto model_proto = model->ToProto();
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
print_model_proto_duration();
DumpOpenVINOEPModel(global_context_.onnx_model_path_name, model_proto.get(), fused_node);
DumpOpenVINOEPModel(global_context_->onnx_model_path_name, model_proto.get(), fused_node);
ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
return model_proto;
} else {
Expand All @@ -356,7 +355,7 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
subgraph.ToProto(*model_proto->mutable_graph(), true, true);
print_model_proto_duration();
DumpOpenVINOEPModel(global_context_.onnx_model_path_name, model_proto.get(), fused_node);
DumpOpenVINOEPModel(global_context_->onnx_model_path_name, model_proto.get(), fused_node);
return model_proto;
}
}
Expand Down Expand Up @@ -448,13 +447,13 @@ void BackendManager::Compute(OrtKernelContext* context) {
// by rewriting the model to static shaped model at runtime based on input shape.
// disable_dynamic_shapes is always set to true for OV NPU plugin.
if (subgraph_context_.has_dynamic_input_shape &&
!GetGlobalContext().disable_dynamic_shapes &&
(GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
!GetGlobalContext()->disable_dynamic_shapes &&
(GetGlobalContext()->device_type.find("CPU") != std::string::npos ||
GetGlobalContext()->device_type.find("GPU") != std::string::npos)) {
concrete_backend_->Infer(context);
} else if (subgraph_context_.has_dynamic_input_shape) {
std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(ctx);
auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext()->device_type);
std::shared_ptr<IBackend> dynamic_backend;
auto search = backend_map_.find(key);
if (search == backend_map_.end()) {
Expand All @@ -474,14 +473,14 @@ void BackendManager::Compute(OrtKernelContext* context) {
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU.";
ORT_THROW(ex.what());
#else
if (GetGlobalContext().device_type.find("NPU") != std::string::npos &&
!GetGlobalContext().disable_cpu_fallback) {
if (GetGlobalContext()->device_type.find("NPU") != std::string::npos &&
!GetGlobalContext()->disable_cpu_fallback) {
LOGS_DEFAULT(WARNING) << ex.what();
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
<< "Falling back to OV CPU for execution";
GetGlobalContext().device_type = "CPU";
GetGlobalContext().precision_str = "FP32";
key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
GetGlobalContext()->device_type = "CPU";
GetGlobalContext()->precision_str = "FP32";
key = MakeMapKeyString(tensor_shapes, GetGlobalContext()->device_type);
try {
dynamic_backend = BackendFactory::MakeBackend(modelproto_with_concrete_shapes,
GetGlobalContext(),
Expand Down
6 changes: 3 additions & 3 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ namespace openvino_ep {
// Singleton class that manages all the backends
class BackendManager {
public:
BackendManager(const GlobalContext& global_context,
BackendManager(GlobalContext* global_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger,
EPCtxHandler& ctx_handle);
void Compute(OrtKernelContext* context);
void ShutdownBackendManager();
void SetGlobalCotext(const GlobalContext& global_context);
GlobalContext& GetGlobalContext();
GlobalContext* GetGlobalContext();
Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger);

Expand All @@ -51,7 +51,7 @@ class BackendManager {
std::shared_ptr<IBackend> concrete_backend_;
std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
SubGraphContext subgraph_context_;
GlobalContext global_context_;
GlobalContext* global_context_;
EPCtxHandler ep_ctx_handle_{};
std::string openvino_sdk_version_{};
};
Expand Down
16 changes: 8 additions & 8 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,17 @@ struct static_cast_int64 {
};

std::shared_ptr<OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext* global_context,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
if (IsCILogEnabled()) {
std::cout << "CreateNgraphFunc" << std::endl;
}
const std::string model = model_proto.SerializeAsString();
try {
auto cnn_network = global_context.ie_core.ReadModel(model, global_context.onnx_model_path_name);
auto cnn_network = global_context->ie_core.ReadModel(model, global_context->onnx_model_path_name);

// Check for Constant Folding
if (!global_context.is_wholly_supported_graph) {
if (!global_context->is_wholly_supported_graph) {
ov::pass::ConstantFolding pass_const_obj;
pass_const_obj.run_on_model(cnn_network);
auto& results = const_cast<ov::ResultVector&>(cnn_network.get()->get_results());
Expand Down Expand Up @@ -130,13 +130,13 @@ GetOutputTensor(Ort::KernelContext& context,
return context.GetOutput(index, output_shape.get(), num_dims);
}

int GetFirstAvailableDevice(GlobalContext& global_context) {
int GetFirstAvailableDevice(GlobalContext* global_context) {
int i = 0;
// Get the first available VAD-M device and set the device to busy
while (i < 8) {
bool device = global_context.deviceAvailableList[i];
bool device = global_context->deviceAvailableList[i];
if (device) {
global_context.deviceAvailableList[i] = false;
global_context->deviceAvailableList[i] = false;
break;
}
i++;
Expand All @@ -145,9 +145,9 @@ int GetFirstAvailableDevice(GlobalContext& global_context) {
// make all remaining devices free
if (i == 8) {
i = 0;
global_context.deviceAvailableList[i] = false;
global_context->deviceAvailableList[i] = false;
for (int j = 1; j < 8; j++) {
global_context.deviceAvailableList[j] = true;
global_context->deviceAvailableList[j] = true;
}
}
return i;
Expand Down
4 changes: 2 additions & 2 deletions onnxruntime/core/providers/openvino/backend_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ bool IsDebugEnabled();
// Internal diagnostic function.
bool IsCILogEnabled();

int GetFirstAvailableDevice(GlobalContext& global_context);
int GetFirstAvailableDevice(GlobalContext* global_context);

void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);

Expand Down Expand Up @@ -62,7 +62,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,

std::shared_ptr<OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
const GlobalContext& global_context,
const GlobalContext* global_context,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);

void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ namespace openvino_ep {

std::shared_ptr<IBackend>
BackendFactory::MakeBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_proto,
GlobalContext& global_context,
GlobalContext* global_context,
const SubGraphContext& subgraph_context,
EPCtxHandler& ep_ctx_handle) {
std::string type = global_context.device_type;
std::string type = global_context->device_type;
if (type == "CPU" || type.find("GPU") != std::string::npos ||
type.find("NPU") != std::string::npos ||
type.find("HETERO") != std::string::npos ||
Expand Down
Loading
Loading