Skip to content

Commit 56078fe

Browse files
[QNN_EP] Implement Efficient Mode API (#25146)
### Description - Set context priority to low when workload type is Efficient - Set context priority to command line configured value if Default - Error out otherwise (invalid argument)
1 parent cee25ba commit 56078fe

File tree

5 files changed

+126
-2
lines changed

5 files changed

+126
-2
lines changed

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -839,6 +839,23 @@ Status QnnBackendManager::CreateContextVtcmBackupBufferSharingEnabled(std::unord
839839
return Status::OK();
840840
}
841841

842+
Status QnnBackendManager::SetContextPriority(ContextPriority context_priority) {
843+
QnnContext_Config_t context_priority_config = QNN_CONTEXT_CONFIG_INIT;
844+
ORT_RETURN_IF_ERROR(SetQnnContextConfig(context_priority, context_priority_config));
845+
846+
QnnContext_Config_t* configs[] = {&context_priority_config, nullptr};
847+
for (const auto& context_handle : contexts_) {
848+
auto result = qnn_interface_.contextSetConfig(context_handle, (const QnnContext_Config_t**)configs);
849+
ORT_RETURN_IF(QNN_CONTEXT_NO_ERROR != result, "Failed to set context priority for context handle: ", context_handle);
850+
}
851+
852+
return Status::OK();
853+
}
854+
855+
Status QnnBackendManager::ResetContextPriority() {
856+
return SetContextPriority(context_priority_);
857+
}
858+
842859
Status QnnBackendManager::CreateContext(bool enable_htp_weight_sharing) {
843860
if (true == context_created_) {
844861
LOGS_DEFAULT(INFO) << "Context created already.";

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,11 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
220220
// For each node name, a mapping to the context handle will be created
221221
void ProcessContextFromBinListAsync(Qnn_ContextHandle_t handle, void* notifyParam);
222222

223+
// Sets the context priority to the given value, if valid
224+
Status SetContextPriority(ContextPriority context_priority);
225+
// Resets the context priority to the session default as defined by context_priority_
226+
Status ResetContextPriority();
227+
223228
private:
224229
Status LoadBackend();
225230

onnxruntime/core/providers/qnn/qnn_execution_provider.cc

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1554,4 +1554,38 @@ OrtDevice QNNExecutionProvider::GetOrtDeviceByMemType(OrtMemType /* em_type */)
15541554
return default_device_;
15551555
}
15561556

1557+
Status QNNExecutionProvider::SetEpDynamicOptions(gsl::span<const char* const> keys,
1558+
gsl::span<const char* const> values) {
1559+
if (keys.size() != values.size()) {
1560+
LOGS_DEFAULT(ERROR) << "SetEpDynamicOptions: number of keys (" << keys.size()
1561+
<< ") does not equal number of values (" << values.size() << ").";
1562+
}
1563+
auto key_it = keys.begin();
1564+
auto value_it = values.begin();
1565+
1566+
while (key_it != keys.end() && value_it != values.end()) {
1567+
std::string key(*key_it);
1568+
std::string value(*value_it);
1569+
1570+
if (key == kOrtEpDynamicOptionsWorkloadType) {
1571+
if (value == "Default") {
1572+
ORT_RETURN_IF_ERROR(qnn_backend_manager_->ResetContextPriority());
1573+
} else if (value == "Efficient") {
1574+
ORT_RETURN_IF_ERROR(qnn_backend_manager_->SetContextPriority(qnn::ContextPriority::LOW));
1575+
} else {
1576+
LOGS_DEFAULT(ERROR) << "Invalid EP Workload Type: " << value;
1577+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Invalid EP Workload Type.");
1578+
}
1579+
} else {
1580+
LOGS_DEFAULT(ERROR) << "EP Dynamic Option \"" << key << "\" is not currently supported.";
1581+
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported EP Dynamic Option");
1582+
}
1583+
1584+
key_it++;
1585+
value_it++;
1586+
}
1587+
1588+
return Status::OK();
1589+
}
1590+
15571591
} // namespace onnxruntime

onnxruntime/core/providers/qnn/qnn_execution_provider.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ class QNNExecutionProvider : public IExecutionProvider {
5757

5858
OrtDevice GetOrtDeviceByMemType(OrtMemType mem_type) const override;
5959

60+
Status SetEpDynamicOptions(gsl::span<const char* const> keys,
61+
gsl::span<const char* const> value) override;
62+
6063
private:
6164
std::unordered_set<const Node*> GetSupportedNodes(const GraphViewer& graph_viewer,
6265
const std::unordered_map<const Node*, const NodeUnit*>& node_unit_map,

onnxruntime/test/providers/qnn/qnn_ep_context_test.cc

Lines changed: 67 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,7 +1649,6 @@ static void DumpModelWithSharedCtx(ProviderOptions provider_options,
16491649
Ort::Session session2(*ort_env, ToPathString(onnx_model_path2).c_str(), so);
16501650
}
16511651

1652-
#if defined(__aarch64__) || defined(_M_ARM64)
16531652
static void GetModelInputNames(const std::string& model_path,
16541653
std::vector<std::string>& input_names,
16551654
std::vector<std::string>& output_names,
@@ -1669,7 +1668,6 @@ static void GetModelInputNames(const std::string& model_path,
16691668
output_names.push_back(output->Name());
16701669
}
16711670
}
1672-
#endif
16731671

16741672
// 1. Create 2 QDQ models
16751673
// 2. Initialize 2 Ort sessions which share the same QNN EP from these 2 QDQ models
@@ -1994,6 +1992,73 @@ TEST_F(QnnHTPBackendTests, LoadFromArrayWithQnnEpContextGenPathValidation) {
19941992
});
19951993
}
19961994
}
1995+
1996+
TEST_F(QnnHTPBackendTests, QnnEpDynamicOptions) {
1997+
ProviderOptions provider_options;
1998+
provider_options["backend_type"] = "htp";
1999+
provider_options["offload_graph_io_quantization"] = "0";
2000+
2001+
Ort::SessionOptions so;
2002+
so.AppendExecutionProvider("QNN", provider_options);
2003+
so.SetLogSeverityLevel(ORT_LOGGING_LEVEL_VERBOSE);
2004+
2005+
Ort::Session session(*ort_env, ORT_TSTR("testdata/qnn_ctx/qnn_multi_ctx_embed.onnx"), so);
2006+
2007+
std::vector<std::string> input_names;
2008+
std::vector<std::string> output_names;
2009+
GetModelInputNames("testdata/qnn_ctx/qnn_multi_ctx_embed.onnx", input_names, output_names,
2010+
DefaultLoggingManager().DefaultLogger());
2011+
2012+
// Run sessions
2013+
// prepare input
2014+
std::vector<int64_t> input_dim{3, 4};
2015+
std::vector<float> input_value(3 * 4, 0.0f);
2016+
Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
2017+
std::vector<Ort::Value> ort_inputs;
2018+
std::vector<const char*> input_names_c;
2019+
for (size_t i = 0; i < input_names.size(); ++i) {
2020+
auto input_tensor = Ort::Value::CreateTensor(info, input_value.data(), input_value.size(),
2021+
input_dim.data(), input_dim.size());
2022+
ort_inputs.push_back(std::move(input_tensor));
2023+
input_names_c.push_back(input_names[i].c_str());
2024+
}
2025+
std::vector<const char*> output_names_c;
2026+
for (size_t i = 0; i < output_names.size(); ++i) {
2027+
output_names_c.push_back(output_names[i].c_str());
2028+
}
2029+
2030+
auto ort_output = session.Run(Ort::RunOptions{}, input_names_c.data(), ort_inputs.data(), ort_inputs.size(),
2031+
output_names_c.data(), 1);
2032+
2033+
const char* const workload_type[] = {"ep.dynamic.workload_type"};
2034+
const char* const efficient_type[] = {"Efficient"};
2035+
const char* const default_type[] = {"Default"};
2036+
2037+
// Test Efficient & Default options
2038+
session.SetEpDynamicOptions(workload_type, efficient_type, 1);
2039+
ort_output = session.Run(Ort::RunOptions{}, input_names_c.data(), ort_inputs.data(), ort_inputs.size(),
2040+
output_names_c.data(), 1);
2041+
2042+
session.SetEpDynamicOptions(workload_type, default_type, 1);
2043+
ort_output = session.Run(Ort::RunOptions{}, input_names_c.data(), ort_inputs.data(), ort_inputs.size(),
2044+
output_names_c.data(), 1);
2045+
2046+
// Test invalid EP dynamic option and invalid workload type
2047+
const char* const dne[] = {"DNE"};
2048+
try {
2049+
session.SetEpDynamicOptions(workload_type, dne, 1);
2050+
FAIL() << "Expected exception to be thrown for workload type DNE but was set successfully";
2051+
} catch (const std::exception& e) {
2052+
EXPECT_STREQ("Invalid EP Workload Type.", e.what());
2053+
}
2054+
2055+
try {
2056+
session.SetEpDynamicOptions(dne, efficient_type, 1);
2057+
FAIL() << "Expected exception to be thrown for dynamic option DNE but was set successfully";
2058+
} catch (const std::exception& e) {
2059+
EXPECT_STREQ("Unsupported EP Dynamic Option", e.what());
2060+
}
2061+
}
19972062
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
19982063

19992064
} // namespace test

0 commit comments

Comments
 (0)