microsoft · chilo-ms · Sep 29, 2025 · Sep 29, 2025 · Sep 29, 2025 · Sep 29, 2025
diff --git a/c_cxx/plugin_EP/CMakeLists.txt b/c_cxx/plugin_EP/CMakeLists.txt
@@ -0,0 +1,31 @@
+# usage:
+# cd build/
+# cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug -DORT_HOME=/path/to/ort_package/onnxruntime-win-x64-gpu-1.23.0
+# cmake --build ./ --config Debug
+cmake_minimum_required(VERSION 3.26)
+project(plugin_ep_app VERSION 1.0)
+set(CMAKE_CXX_STANDARD 17)
+
+file(GLOB app_src "./*.cc")
+add_executable(app ${app_src})
+
+# Add dependencies
+include(FetchContent)
+
+# Add GSL
+FetchContent_Declare(
+  gsl
+  GIT_REPOSITORY https://github.com/microsoft/GSL.git
+  GIT_TAG        v4.0.0  # Use a specific tag or commit
+)
+
+FetchContent_MakeAvailable(gsl)
+
+
+set(DEPS_PATH "${CMAKE_BINARY_DIR}/_deps")
+
+target_include_directories(app PUBLIC "${ORT_HOME}/include"
+                                      "${DEPS_PATH}/gsl-src/include" # GSL is header-only
+)
+
+target_link_libraries(app PUBLIC "onnxruntime.dll")
diff --git a/c_cxx/plugin_EP/README.md b/c_cxx/plugin_EP/README.md
@@ -0,0 +1,69 @@
+# Running Inference with a Plugin EP using C++ API
+## Prerequisites
+- ONNX Runtime version >= 1.23.0
+- A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`.
+- ONNX Runtime built as a shared library (e.g., `onnxruntime.dll` on Windows or `libonnxruntime.so` on Linux), since the EP library relies on the public ORT C API (which is ABI-stable) to interact with ONNX Runtime.
+- The `onnxruntime_providers_shared.dll` (Windows) or `libonnxruntime_providers_shared.so` (Linux) is also required. When a plugin EP is registered, ONNX Runtime internally calls `LoadPluginOrProviderBridge`, which depends on this shared library to determine whether the EP DLL is a plugin or a provider-bridge.
+- If you are using a pre-built ONNX Runtime package, all required libraries (e.g., `onnxruntime.dll`, `onnxruntime_providers_shared.dll`, etc.) are already included.
+
+## Run Inference with explicit OrtEpDevice(s)
+
+Please see `plugin_ep_inference.cc` for a full example.
+1. Register plugin EP library with ONNX Runtime
+   ````c++
+   env.RegisterExecutionProviderLibrary(
+       "plugin_ep",              // Registration name can be anything the application chooses.
+       ORT_TSTR("plugin_ep.so")  // Path to the plugin EP library.
+   );
+   ````
+2. Find the OrtEpDevice for that plugin EP
+   ````c++
+   // Find the Ort::EpDevice for ep_name
+    std::vector<Ort::ConstEpDevice> selected_ep_devices = {};
+    for (Ort::ConstEpDevice ep_device : ep_devices) {
+      if (std::string(ep_device.EpName()) == ep_name) {
+        selected_ep_devices.push_back(ep_device);
+        break;
+      }
+    }
+    ````
+3. Append the EP to ORT session option
+    ````c++
+    Ort::SessionOptions session_options;
+    session_options.AppendExecutionProvider_V2(env, selected_ep_devices, ep_options);
+    ````
+5. Create ORT session with the EP
+    ````c++
+    Ort::Session session(env, ORT_TSTR("path\to\model"), session_options);
+    ````
+6. Run ORT session
+   ````c++
+    auto output_tensors =
+        session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 1);
+   ````
+7. Unregister plugin EP library
+   ````c++
+   env.UnregisterExecutionProviderLibrary(lib_registration_name);
+   ````
+
+
+ ## Run Inference with automatic EP selection
+ The workflow is the same as above except for step 2 and 3.
+ Instead, set the selection policy directly 
+ ````Python
+ session_options.SetEpSelectionPolicy(OrtExecutionProviderDevicePolicy_PREFER_GPU);
+ ````
+ Available "policy":
+ - `OrtExecutionProviderDevicePolicy_DEFAULT`
+ - `OrtExecutionProviderDevicePolicy_PREFER_CPU`
+ - `OrtExecutionProviderDevicePolicy_PREFER_NPU`
+ - `OrtExecutionProviderDevicePolicy_PREFER_GPU`
+ - `OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE`
+ - `OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY`
+ - `OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER`
+
+ ## Note
+ For additional APIs and details on plugin EP usage, see the official documentation:
+ https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library
+
+
diff --git a/c_cxx/plugin_EP/plugin_ep_inference.cc b/c_cxx/plugin_EP/plugin_ep_inference.cc
@@ -0,0 +1,103 @@
+#include "onnxruntime_cxx_api.h"
+#include <iostream>
+#include <vector>
+#include <gsl/gsl>
+
+int RunInference() { 
+  const OrtApi* ort_api = OrtGetApiBase()->GetApi(ORT_API_VERSION);
+  Ort::Env env;
+
+  // Registration name can be anything the application chooses
+  const char* lib_registration_name = "TensorRTEp";
+
+  // Register plugin EP library with ONNX Runtime.
+  env.RegisterExecutionProviderLibrary(
+      lib_registration_name,      // Registration name can be anything the application chooses.
+      ORT_TSTR("TensorRTEp.dll")  // Path to the plugin EP library.
+  );
+
+  // Unregister the library using the application-specified registration name.
+  // Must only unregister a library after all sessions that use the library have been released.
+  auto unregister_plugin_eps_at_scope_exit = gsl::finally([&]() { 
+    env.UnregisterExecutionProviderLibrary(lib_registration_name);
+  });
+
+  {
+    std::vector<Ort::ConstEpDevice> ep_devices = env.GetEpDevices();
+    // EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp())
+    std::string ep_name = lib_registration_name;
+
+    // Find the Ort::EpDevice for "TensorRTEp".
+    std::vector<Ort::ConstEpDevice> selected_ep_devices = {};
+    for (Ort::ConstEpDevice ep_device : ep_devices) {
+      if (std::string(ep_device.EpName()) == ep_name) {
+        selected_ep_devices.push_back(ep_device);
+        break;
+      }
+    }
+
+    if (selected_ep_devices[0] == nullptr) {
+      // Did not find EP. Report application error ...
+      std::cerr << "Did not find EP: " << ep_name << std::endl;
+      return -1;
+    }
+
+    std::unordered_map<std::string, std::string> ep_options;  // Optional EP options.
+    Ort::SessionOptions session_options;
+    session_options.AppendExecutionProvider_V2(env, selected_ep_devices, ep_options);
+
+    Ort::Session session(env, ORT_TSTR("mul_1.onnx"), session_options);
+
+    // Get default ORT allocator
+    Ort::AllocatorWithDefaultOptions allocator;
+
+    // Get input name
+    Ort::AllocatedStringPtr input_name_ptr = session.GetInputNameAllocated(0, allocator); // Keep the smart pointer alive to avoid dangling pointer
+    const char* input_name = input_name_ptr.get();
+
+    // Input data
+    std::vector<float> input_values = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+
+    // Input shape: (3, 2)
+    std::vector<int64_t> input_shape{3, 2};
+
+    // Create tensor
+    Ort::MemoryInfo memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);
+
+    Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_values.data(), input_values.size(),
+                                                              input_shape.data(), input_shape.size());
+
+    // Get output name
+    Ort::AllocatedStringPtr output_name_ptr =
+        session.GetOutputNameAllocated(0, allocator);  // Keep the smart pointer alive to avoid dangling pointer
+    const char* output_name = output_name_ptr.get();
+
+    // Run session
+    std::vector<const char*> input_names{input_name};
+    std::vector<const char*> output_names{output_name};
+
+    auto output_tensors =
+        session.Run(Ort::RunOptions{nullptr}, input_names.data(), &input_tensor, 1, output_names.data(), 1);
+
+    // Extract output
+    float* output_data = output_tensors.front().GetTensorMutableData<float>();
+
+    std::cout << "Output:" << std::endl;
+    for (int i = 0; i < 6; i++) {
+      std::cout << output_data[i] << " ";
+    }
+    std::cout << std::endl;
+
+    // Expected output: [[1,4],[9,16],[25,36]]
+  }
+
+  return 0;
+}
+
+int main(int argc, char* argv[]) {
+  return RunInference();
+}
+
+// Note:
+// The mul_1.onnx can be found here:
+// https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx
diff --git a/python/plugin_EP/README.md b/python/plugin_EP/README.md
@@ -0,0 +1,58 @@
+# Running Inference with a Plugin EP using Python API
+## Prerequisites
+- ONNX Runtime version >= 1.23.0
+- A dynamic/shared EP library that exports the functions `CreateEpFactories()` and `ReleaseEpFactory()`.
+- ORT GPU python wheel installed.
+
+## Run Inference with explicit OrtEpDevice(s)
+
+Please see `plugin_ep_inference.py` for a full example.
+1. Register plugin EP library with ONNX Runtime
+   ````python
+   onnxruntime.register_execution_provider_library("plugin_ep.so")
+   ````
+2. Find the OrtEpDevice for that EP
+   ````Python
+   ep_device = onnxruntime.get_ep_devices()
+   for ep_device in ep_devices:
+       if ep_device.ep_name == ep_name:
+           target_ep_device = ep_device
+    ````
+3. Append the EP to ORT session option
+    ````Python
+    sess_options.add_provider_for_devices([target_ep_device], {})
+    ````
+5. Create ORT session with the EP
+    ```Python
+    sess = onnxrt.InferenceSession("/path/to/model", sess_options=sess_options)
+    ````
+6. Run ORT session
+   ````Python
+   res = sess.run([], {input_name: x})
+   ````
+7. Unregister plugin EP library
+    ```Python
+   onnxruntime.unregister_execution_provider_library(ep_registration_name)
+   ````
+
+
+ ## Run Inference with automatic EP selection
+ The workflow is the same as above except for step 2 and 3.
+ Instead, set the selection policy directly 
+ ````Python
+ sess_options.set_provider_selection_policy(policy)
+ ````
+ Available "policy":
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_DEFAULT`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_CPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_NPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_PREFER_GPU`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_PERFORMANCE`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MAX_EFFICIENCY`
+ - `onnxruntime.OrtExecutionProviderDevicePolicy_MIN_OVERALL_POWER`
+
+ ## Note
+ For additional APIs and details on plugin EP usage, see the official documentation:
+ https://onnxruntime.ai/docs/execution-providers/plugin-ep-libraries.html#using-a-plugin-ep-library
+
+
diff --git a/python/plugin_EP/plugin_ep_inference.py b/python/plugin_EP/plugin_ep_inference.py
@@ -0,0 +1,51 @@
+import onnxruntime as onnxrt
+import numpy as np
+
+# Path to the plugin EP library
+ep_lib_path = "C:\\path\\to\\plugin_trt_ep\\TensorRTEp.dll"
+# Registration name can be anything the application chooses
+ep_registration_name = "TensorRTEp"
+# EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp)
+ep_name = ep_registration_name
+
+# Register plugin EP library with ONNX Runtime
+onnxrt.register_execution_provider_library(ep_registration_name, ep_lib_path)
+
+#
+# Create ORT session with explicit OrtEpDevice(s)
+#
+
+# Find the OrtEpDevice for "TensorRTEp"
+ep_devices = onnxrt.get_ep_devices()
+trt_ep_device = None
+for ep_device in ep_devices:
+    if ep_device.ep_name == ep_name:
+        trt_ep_device = ep_device
+
+assert trt_ep_device != None
+
+sess_options = onnxrt.SessionOptions()
+
+# Equivalent to the C API's SessionOptionsAppendExecutionProvider_V2 that appends "TensorRTEp" to ORT session option
+sess_options.add_provider_for_devices([trt_ep_device], {'trt_engine_cache_enable': '1'})
+
+assert sess_options.has_providers() == True
+
+# Create ORT session with "TensorRTEp" plugin EP
+sess = onnxrt.InferenceSession("C:\\modles\\mul_1.onnx", sess_options=sess_options)
+
+# Run sample model and check output
+x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
+input_name = sess.get_inputs()[0].name
+res = sess.run([], {input_name: x})
+output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32)
+np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08)
+
+# Unregister the library using the application-specified registration name.
+# Must only unregister a library after all sessions that use the library have been released.
+onnxrt.unregister_execution_provider_library(ep_registration_name)
+
+
+# Note:
+# The mul_1.onnx can be found here:
+# https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx