Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,8 @@
"-build/include_subdir",
"-runtime/references"
],
"C_Cpp.autoAddFileAssociations": false
"C_Cpp.autoAddFileAssociations": false,
"githubPullRequests.ignoredPullRequestBranches": [
"main"
]
}
9 changes: 9 additions & 0 deletions include/onnxruntime/core/framework/execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,15 @@ class IExecutionProvider {
return Status::OK();
}

/**
Called when InferenceSession::SetEpDynamicOptions is called
TODO: what is the right way of passing parameters?
*/
virtual common::Status SetEpDynamicOptions(gsl::span<const char*> /*keys*/,
gsl::span<const char*> /*values*/) {
return Status::OK();
}

/**
Indicate whether the graph capturing mode (e.g., cuda graph) is enabled for
the provider.
Expand Down
18 changes: 18 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -4722,6 +4722,24 @@ struct OrtApi {
* \param[in] adapter OrtLoraAdapter instance
*/
ORT_API2_STATUS(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);

/// @}
/// \name OrtEpDynamicOptions
/// @{

/** \brief Set DynamicOptions for EPs
*
* \param[in] session
* \param[in] list of keys represented by null-terminated strings
* \param[in] list of values represented by null-terminated strings
* \param[in] number of key-value pairs
*
* \since Version xxx
* @TODO: update version number
* @TODO: should it be SetExecutionProvider... instead of SetEp...?
*/
ORT_API2_STATUS(SetEpDynamicOptions, _In_ OrtSession* sess, _In_ const char** keys, _In_ const char** values,
_In_ size_t kv_len);
};

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,3 @@ static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_con
// If the value is set to -1, cuda graph capture/replay is disabled in that run.
// User are not expected to set the value to 0 as it is reserved for internal use.
static const char* const kOrtRunOptionsConfigCudaGraphAnnotation = "gpu_graph_id";

// Specify the type of workload for this run.
// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
static const char* const kOrtRunOptionsWorkloadType = "run.workload_type";
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,3 @@ static const char* const kOrtSessionOptionsMlasGemmFastMathArm64Bfloat16 = "mlas
// Refer to MatMulNBits op schema for more details.
// If not provided, default is 4.
static const char* const kOrtSessionOptionsQDQMatMulNBitsAccuracyLevel = "session.qdq_matmulnbits_accuracy_level";

// Specify the type of workload for this session.
// “Default”: OS determines the scheduling priority and processor performance to service this workload. [Default]
// “Efficient”: OS treats this workload is efficiency oriented with low scheduling priority and efficient processor performance.
static const char* const kOrtSessionOptionsWorkloadType = "session.workload_type";
32 changes: 32 additions & 0 deletions onnxruntime/core/session/inference_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2475,6 +2475,38 @@
};
} // namespace

// TODO: are we going to do all the ttracing and logging and telemetry for this?

Check warning on line 2478 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2478: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
// TODO: is this the right type? How do we convert to string?

Check warning on line 2479 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2479: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
Status InferenceSession::SetEpDynamicOptions(gsl::span<const char*> keys,
gsl::span<const char*> values) {

Check warning on line 2481 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not indent within a namespace. [whitespace/indent_namespace] [4] Raw Output: onnxruntime/core/session/inference_session.cc:2481: Do not indent within a namespace. [whitespace/indent_namespace] [4]
Status retval = Status::OK();

ORT_TRY {
if (!is_inited_) {
LOGS(*session_logger_, ERROR) << "Session was not initialized";
return Status(common::ONNXRUNTIME, common::FAIL, "Session not initialized.");
}

// info all execution providers InferenceSession:Run started
// TODO: only call SetEpDynamicOptions for all providers in-use

Check warning on line 2491 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2491: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
for (auto& xp : execution_providers_) {
auto status = xp->SetEpDynamicOptions(keys, values);
ORT_CHECK_AND_SET_RETVAL(status);
}
}

ORT_CATCH(const std::exception& e) {
ORT_HANDLE_EXCEPTION([&]() {
retval = Status(common::ONNXRUNTIME, common::FAIL, e.what());
});
}
ORT_CATCH(...) {
retval = Status(common::ONNXRUNTIME, common::RUNTIME_EXCEPTION, "Encountered unknown exception in SetEpDynamicOptions()");

Check warning on line 2504 in onnxruntime/core/session/inference_session.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/session/inference_session.cc:2504: Lines should be <= 120 characters long [whitespace/line_length] [2]
}

return retval;
}

Status InferenceSession::Run(const RunOptions& run_options,
gsl::span<const std::string> feed_names, gsl::span<const OrtValue> feeds,
gsl::span<const std::string> output_names, std::vector<OrtValue>* p_fetches,
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/session/inference_session.h
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,9 @@ class InferenceSession {
*/
[[nodiscard]] common::Status Initialize();

[[nodiscard]] common::Status SetEpDynamicOptions(gsl::span<const char*> keys,
gsl::span<const char*> values);

[[nodiscard]] common::Status Run(const RunOptions& run_options, gsl::span<const std::string> feed_names,
gsl::span<const OrtValue> feeds, gsl::span<const std::string> output_names,
std::vector<OrtValue>* p_fetches,
Expand Down
24 changes: 24 additions & 0 deletions onnxruntime/core/session/onnxruntime_c_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,28 @@

} // namespace

ORT_API_STATUS_IMPL(OrtApis::SetEpDynamicOptions, _In_ OrtSession* sess, _In_ const char** keys,
_In_ const char** values, _In_ size_t kv_len) {
API_IMPL_BEGIN
auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess);

// TODO: is this what we're supposed to do with strings?

Check warning on line 851 in onnxruntime/core/session/onnxruntime_c_api.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/onnxruntime_c_api.cc:851: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
auto keys_span = gsl::make_span(keys, kv_len);
auto values_span = gsl::make_span(values, kv_len);

Status status;

if (kv_len == 0) {
// TODO: how does one return OK or no_values_passed

Check warning on line 858 in onnxruntime/core/session/onnxruntime_c_api.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2] Raw Output: onnxruntime/core/session/onnxruntime_c_api.cc:858: Missing username in TODO; it should look like "// TODO(my_username): Stuff." [readability/todo] [2]
status = Status::OK();
} else {
status = session->SetEpDynamicOptions(keys_span,
values_span);
}
return ToOrtStatus(status);
API_IMPL_END
}

ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options,
_In_reads_(input_len) const char* const* input_names,
_In_reads_(input_len) const OrtValue* const* input, size_t input_len,
Expand Down Expand Up @@ -2785,6 +2807,8 @@
&OrtApis::CreateLoraAdapterFromArray,
&OrtApis::ReleaseLoraAdapter,
&OrtApis::RunOptionsAddActiveLoraAdapter,

&OrtApis::SetEpDynamicOptions,
};

// OrtApiBase can never change as there is no way to know what version of OrtApiBase is returned by OrtGetApiBase.
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/session/ort_apis.h
Original file line number Diff line number Diff line change
Expand Up @@ -531,4 +531,6 @@
ORT_API(void, ReleaseLoraAdapter, _Frees_ptr_opt_ OrtLoraAdapter*);
ORT_API_STATUS_IMPL(RunOptionsAddActiveLoraAdapter, _Inout_ OrtRunOptions* options, _In_ const OrtLoraAdapter* adapter);

ORT_API_STATUS_IMPL(SetEpDynamicOptions, _In_ OrtSession* sess, _In_ const char** keys, _In_ const char** values,
_In_ size_t kv_len);

Check warning on line 535 in onnxruntime/core/session/ort_apis.h

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Do not indent within a namespace. [whitespace/indent_namespace] [4] Raw Output: onnxruntime/core/session/ort_apis.h:535: Do not indent within a namespace. [whitespace/indent_namespace] [4]
} // namespace OrtApis
Loading