diff --git a/c_cxx/plugin_EP/CMakeLists.txt b/c_cxx/plugin_EP/CMakeLists.txt new file mode 100644 index 00000000..b6f0c657 --- /dev/null +++ b/c_cxx/plugin_EP/CMakeLists.txt @@ -0,0 +1,31 @@ +# usage: +# cd build/ +# cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug -DORT_HOME=/path/to/ort_package/onnxruntime-win-x64-gpu-1.23.0 +# cmake --build ./ --config Debug +cmake_minimum_required(VERSION 3.26) +project(plugin_ep_app VERSION 1.0) +set(CMAKE_CXX_STANDARD 17) + +file(GLOB app_src "./*.cc") +add_executable(app ${app_src}) + +# Add dependencies +include(FetchContent) + +# Add GSL +FetchContent_Declare( + gsl + GIT_REPOSITORY https://github.com/microsoft/GSL.git + GIT_TAG v4.0.0 # Use a specific tag or commit +) + +FetchContent_MakeAvailable(gsl) + + +set(DEPS_PATH "${CMAKE_BINARY_DIR}/_deps") + +target_include_directories(app PUBLIC "${ORT_HOME}/include" + "${DEPS_PATH}/gsl-src/include" # GSL is header-only +) + +target_link_libraries(app PUBLIC "onnxruntime.dll") diff --git a/c_cxx/plugin_EP/README.md b/c_cxx/plugin_EP/README.md new file mode 100644 index 00000000..cbdf7665 --- /dev/null +++ b/c_cxx/plugin_EP/README.md @@ -0,0 +1,69 @@ +# Running Inference with a Plugin EP using C++ API +## Prerequisites +- ONNX Runtime version >= 1.23.0 +- A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`. +- ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime. +- The `onnxruntime_providers_shared.dll` (Windows) or `libonnxruntime_providers_shared.so` (Linux) is also required. When a plugin EP is registered, ONNX Runtime internally calls `LoadPluginOrProviderBridge`, which depends on this shared library to determine whether the EP DLL is a plugin or a provider-bridge. +- If you are using a pre-built ONNX Runtime package, all required libraries (e.g., `onnxruntime.dll`, `onnxruntime_providers_shared.dll`, etc.) are already included. + +## Run Inference with explicit OrtEpDevice(s) + +Please see `plugin_ep_inference.cc` for a full example. +1. Register plugin EP library with ONNX Runtime + ````c++ + env.RegisterExecutionProviderLibrary( + "plugin_ep", // Registration name can be anything the application chooses. + ORT_TSTR("plugin_ep.so") // Path to the plugin EP library. + ); + ```` +2. Find the OrtEpDevice for that plugin EP + ````c++ + // Find the Ort::EpDevice for ep_name + std::vector selected_ep_devices = {}; + for (Ort::ConstEpDevice ep_device : ep_devices) { + if (std::string(ep_device.EpName()) == ep_name) { + selected_ep_devices.push_back(ep_device); + break; + } + } + ```` +3. Append the EP to ORT session option + ````c++ + Ort::SessionOptions session_options; + session_options.AppendExecutionProvider_V2(env, selected_ep_devices, ep_options); + ```` +5. Create ORT session with the EP + ````c++ + Ort::Session session(env, ORT_TSTR("path\to\model"), session_options); + ```` +6. Run ORT session + ````c++ + auto output_tensors = + session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 1); + ```` +7. Unregister plugin EP library + ````c++ + env.UnregisterExecutionProviderLibrary(lib_registration_name); + ```` + + + ## Run Inference with automatic EP selection + The workflow is the same as above except for step 2 and 3. + Instead, set the selection policy directly + ````Python + session_options.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_GPU); + ```` + Available "policy": + - `OrtExecutionProviderDevicePolicy_DEFAULT` + - `OrtExecutionProviderDevicePolicy_PREFER_CPU` + - `OrtExecutionProviderDevicePolicy_PREFER_NPU` + - `OrtExecutionProviderDevicePolicy_PREFER_GPU` + - `OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE` + - `OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY` + - `OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER` + + ## Note + For additional APIs and details on plugin EP usage, see the official documentation: + https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library + + diff --git a/c_cxx/plugin_EP/plugin_ep_inference.cc b/c_cxx/plugin_EP/plugin_ep_inference.cc new file mode 100644 index 00000000..4713b56e --- /dev/null +++ b/c_cxx/plugin_EP/plugin_ep_inference.cc @@ -0,0 +1,103 @@ +#include "onnxruntime_cxx_api.h" +#include +#include +#include + +int RunInference() { + const OrtApi* ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION); + Ort::Env env; + + // Registration name can be anything the application chooses + const char* lib_registration_name = "TensorRTEp"; + + // Register plugin EP library with ONNX Runtime. + env.RegisterExecutionProviderLibrary( + lib_registration_name, // Registration name can be anything the application chooses. + ORT_TSTR("TensorRTEp.dll") // Path to the plugin EP library. + ); + + // Unregister the library using the application-specified registration name. + // Must only unregister a library after all sessions that use the library have been released. + auto unregister_plugin_eps_at_scope_exit = gsl::finally([&]() { + env.UnregisterExecutionProviderLibrary(lib_registration_name); + }); + + { + std::vector ep_devices = env.GetEpDevices(); + // EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp()) + std::string ep_name = lib_registration_name; + + // Find the Ort::EpDevice for "TensorRTEp". + std::vector selected_ep_devices = {}; + for (Ort::ConstEpDevice ep_device : ep_devices) { + if (std::string(ep_device.EpName()) == ep_name) { + selected_ep_devices.push_back(ep_device); + break; + } + } + + if (selected_ep_devices[0] == nullptr) { + // Did not find EP. Report application error ... + std::cerr << "Did not find EP: " << ep_name << std::endl; + return -1; + } + + std::unordered_map ep_options; // Optional EP options. + Ort::SessionOptions session_options; + session_options.AppendExecutionProvider_V2(env, selected_ep_devices, ep_options); + + Ort::Session session(env, ORT_TSTR("mul_1.onnx"), session_options); + + // Get default ORT allocator + Ort::AllocatorWithDefaultOptions allocator; + + // Get input name + Ort::AllocatedStringPtr input_name_ptr = session.GetInputNameAllocated(0, allocator); // Keep the smart pointer alive to avoid dangling pointer + const char* input_name = input_name_ptr.get(); + + // Input data + std::vector input_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; + + // Input shape: (3, 2) + std::vector input_shape{3, 2}; + + // Create tensor + Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + + Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, input_values.data(), input_values.size(), + input_shape.data(), input_shape.size()); + + // Get output name + Ort::AllocatedStringPtr output_name_ptr = + session.GetOutputNameAllocated(0, allocator); // Keep the smart pointer alive to avoid dangling pointer + const char* output_name = output_name_ptr.get(); + + // Run session + std::vector input_names{input_name}; + std::vector output_names{output_name}; + + auto output_tensors = + session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 1); + + // Extract output + float* output_data = output_tensors.front().GetTensorMutableData(); + + std::cout << "Output:" << std::endl; + for (int i = 0; i < 6; i++) { + std::cout << output_data[i] << " "; + } + std::cout << std::endl; + + // Expected output: [[1,4],[9,16],[25,36]] + } + + return 0; +} + +int main(int argc, char* argv[]) { + return RunInference(); +} + +// Note: +// The mul_1.onnx can be found here: +// https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md new file mode 100644 index 00000000..d5ac2a3c --- /dev/null +++ b/python/plugin_EP/README.md @@ -0,0 +1,58 @@ +# Running Inference with a Plugin EP using Python API +## Prerequisites +- ONNX Runtime version >= 1.23.0 +- A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`. +- ORT GPU python wheel installed. + +## Run Inference with explicit OrtEpDevice(s) + +Please see `plugin_ep_inference.py` for a full example. +1. Register plugin EP library with ONNX Runtime + ````python + onnxruntime.register_execution_provider_library("plugin_ep.so") + ```` +2. Find the OrtEpDevice for that EP + ````Python + ep_device = onnxruntime.get_ep_devices() + for ep_device in ep_devices: + if ep_device.ep_name == ep_name: + target_ep_device = ep_device + ```` +3. Append the EP to ORT session option + ````Python + sess_options.add_provider_for_devices([target_ep_device], {}) + ```` +5. Create ORT session with the EP + ```Python + sess = onnxrt.InferenceSession("/path/to/model", sess_options=sess_options) + ```` +6. Run ORT session + ````Python + res = sess.run([], {input_name: x}) + ```` +7. Unregister plugin EP library + ```Python + onnxruntime.unregister_execution_provider_library(ep_registration_name) + ```` + + + ## Run Inference with automatic EP selection + The workflow is the same as above except for step 2 and 3. + Instead, set the selection policy directly + ````Python + sess_options.set_provider_selection_policy(policy) + ```` + Available "policy": + - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_GPU` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY` + - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER` + + ## Note + For additional APIs and details on plugin EP usage, see the official documentation: + https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library + + diff --git a/python/plugin_EP/plugin_ep_inference.py b/python/plugin_EP/plugin_ep_inference.py new file mode 100644 index 00000000..92ae0f7d --- /dev/null +++ b/python/plugin_EP/plugin_ep_inference.py @@ -0,0 +1,51 @@ +import onnxruntime as onnxrt +import numpy as np + +# Path to the plugin EP library +ep_lib_path = "C:\\path\\to\\plugin_trt_ep\\TensorRTEp.dll" +# Registration name can be anything the application chooses +ep_registration_name = "TensorRTEp" +# EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp) +ep_name = ep_registration_name + +# Register plugin EP library with ONNX Runtime +onnxrt.register_execution_provider_library(ep_registration_name, ep_lib_path) + +# +# Create ORT session with explicit OrtEpDevice(s) +# + +# Find the OrtEpDevice for "TensorRTEp" +ep_devices = onnxrt.get_ep_devices() +trt_ep_device = None +for ep_device in ep_devices: + if ep_device.ep_name == ep_name: + trt_ep_device = ep_device + +assert trt_ep_device != None + +sess_options = onnxrt.SessionOptions() + +# Equivalent to the C API's SessionOptionsAppendExecutionProvider_V2 that appends "TensorRTEp" to ORT session option +sess_options.add_provider_for_devices([trt_ep_device], {'trt_engine_cache_enable': '1'}) + +assert sess_options.has_providers() == True + +# Create ORT session with "TensorRTEp" plugin EP +sess = onnxrt.InferenceSession("C:\\modles\\mul_1.onnx", sess_options=sess_options) + +# Run sample model and check output +x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) +input_name = sess.get_inputs()[0].name +res = sess.run([], {input_name: x}) +output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32) +np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08) + +# Unregister the library using the application-specified registration name. +# Must only unregister a library after all sessions that use the library have been released. +onnxrt.unregister_execution_provider_library(ep_registration_name) + + +# Note: +# The mul_1.onnx can be found here: +# https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx