Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ GlobalContext& BackendManager::GetGlobalContext() {
return global_context_;
}

ov::CompiledModel& BackendManager::GetOVCompiledModel() {
ov::CompiledModel& ov_ptr = concrete_backend_->GetOVCompiledModel();
return (ov_ptr);
}

BackendManager::BackendManager(const GlobalContext& global_context,
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
Expand All @@ -35,7 +40,7 @@ BackendManager::BackendManager(const GlobalContext& global_context,
openvino_sdk_version_ = std::to_string(global_context_.OpenVINO_Version.at(0)) + "." +
std::to_string(global_context_.OpenVINO_Version.at(1));
if (ep_ctx_handle_.CheckForOVEPCtxNode(subgraph, openvino_sdk_version_)) {
if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph) != Status::OK())
if (ep_ctx_handle_.ImportBlobFromEPCtxModel(subgraph, global_context_.ep_context_embed_mode) != Status::OK())
ORT_THROW("Import blob from model failed");
}

Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/backend_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ class BackendManager {
GlobalContext& GetGlobalContext();
Status ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger);
ov::CompiledModel& GetOVCompiledModel();

private:
std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,12 @@ Status EPCtxHandler::ExportEPCtxModel(const GraphViewer& graph_viewer,
return Status::OK();
}

Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer) {
Status EPCtxHandler::ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode) {
auto node = graph_viewer.GetNode(0);
auto& attrs = node->GetAttributes();
ORT_ENFORCE(attrs.count(EP_CACHE_CONTEXT) > 0);
model_stream_ = std::make_shared<std::istringstream>(attrs.at(EP_CACHE_CONTEXT).s());
ep_context_embed_mode = static_cast<bool>(attrs.at(EMBED_MODE).i());
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";

is_valid_ep_ctx_graph_ = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class EPCtxHandler {
const bool& ep_context_embed_mode,
std::string&& model_blob_str,
const std::string& openvino_sdk_version) const;
Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer);
Status ImportBlobFromEPCtxModel(const GraphViewer& graph_viewer, bool& ep_context_embed_mode);
bool CheckForOVEPCtxNode(const GraphViewer& graph_viewer, std::string openvino_sdk_version) const;
bool IsValidOVEPCtxGraph() const { return is_valid_ep_ctx_graph_; }
[[nodiscard]] const std::shared_ptr<std::istringstream> GetModelBlobStream() const { return model_stream_; }
Expand Down
38 changes: 36 additions & 2 deletions onnxruntime/core/providers/openvino/openvino_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
// Licensed under the MIT License
#include <filesystem>
#include <utility>

#include <string>
#include "core/providers/shared_library/provider_api.h"
#include "core/providers/openvino/openvino_execution_provider.h"
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/backend_manager.h"
#include "core/providers/openvino/onnx_ctx_model_helper.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/session/onnxruntime_session_options_config_keys.h"
#include "openvino/core/version.hpp"
#ifdef USE_OVEP_NPU_MEMORY
#include "core/providers/openvino/ov_allocator.h"
Expand Down Expand Up @@ -150,7 +151,7 @@ common::Status OpenVINOExecutionProvider::Compile(
graph_body_viewer,
*GetLogger(),
ep_ctx_handle_);

backend_manager_ = backend_manager;
compute_info.create_state_func =
[backend_manager](ComputeContext* context, FunctionState* state) {
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState();
Expand Down Expand Up @@ -198,4 +199,37 @@ std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators()
}
#endif

common::Status OpenVINOExecutionProvider::SetEpDynamicOptions(gsl::span<const char* const> keys,
gsl::span<const char* const> values) {
std::string workload_type = "";
// Ensure the number of keys and values match
if (keys.size() != values.size()) {
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "Mismatched keys and values sizes.");
}

for (size_t i = 0; i < keys.size(); ++i) {
std::string key = keys[i];
std::string value = values[i];

if (key == kOrtEpDynamicOptionsWorkloadType) {
if (value == "Efficient") {
workload_type = "EFFICIENT";
} else if (value == "Default") {
workload_type = "DEFAULT";
} else {
LOGS_DEFAULT(WARNING) << "Unknown workload_type - ignoring " << key << "/" << value;
LOGS_DEFAULT(WARNING) << "Supported types are 'Efficient' and 'Default' \n";
}
if (workload_type != "") {
LOGS_DEFAULT(INFO) << "SetEpDynamicOptions - modifying: " << key << "/" << value;
ov::CompiledModel& ov_compiled_model = backend_manager_->GetOVCompiledModel();
ov_compiled_model.set_property(ov::workload_type(workload_type));
}
} else {
// Handle unknown options
LOGS_DEFAULT(WARNING) << "Unknown key/value pair - ignoring " << key << "/" << value;
}
}
return Status::OK();
}
} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
Status Compile(const std::vector<FusedNodeAndGraph>& fused_nodes,
std::vector<NodeComputeInfo>& node_compute_funcs) override;

Status SetEpDynamicOptions(gsl::span<const char* const> /*keys*/,
gsl::span<const char* const> /*values*/) override;

const void* GetExecutionHandle() const noexcept override {
return nullptr;
}
Expand All @@ -197,6 +200,7 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
private:
std::unique_ptr<openvino_ep::GlobalContext> global_context_;
openvino_ep::EPCtxHandler ep_ctx_handle_{};
std::shared_ptr<openvino_ep::BackendManager> backend_manager_;
};

} // namespace onnxruntime
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "core/providers/openvino/openvino_provider_factory.h"
#include "core/providers/openvino/openvino_execution_provider.h"
#include "core/providers/openvino/openvino_provider_factory_creator.h"
#include "core/session/onnxruntime_session_options_config_keys.h"
#include "nlohmann/json.hpp"

namespace onnxruntime {
Expand Down Expand Up @@ -50,10 +51,10 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
};

std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault("session.disable_cpu_ep_fallback", "0") == "1";
bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault("ep.context_enable", "0") == "1";
bool so_epctx_embed_mode = config_options_.GetConfigOrDefault("ep.context_embed_mode", "1") == "1";
std::string so_cache_path = config_options_.GetConfigOrDefault("ep.context_file_path", "").c_str();
bool so_disable_cpu_fallback = config_options_.GetConfigOrDefault(kOrtSessionOptionsDisableCPUEPFallback, "0") == "1";
bool so_export_ep_ctx_blob = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEnable, "0") == "1";
bool so_epctx_embed_mode = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextEmbedMode, "1") == "1";
std::string so_cache_path = config_options_.GetConfigOrDefault(kOrtSessionOptionEpContextFilePath, "").c_str();

if (so_export_ep_ctx_blob && !so_cache_path.empty()) {
cache_dir_ = so_cache_path;
Expand Down
Loading