Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ if(WIN32)
onnxruntime_add_shared_library(onnxruntime
${SYMBOL_FILE}
"${ONNXRUNTIME_ROOT}/core/dll/dllmain.cc"
"${ONNXRUNTIME_ROOT}/core/dll/delay_load_hook.cc"
"${ONNXRUNTIME_ROOT}/core/dll/onnxruntime.rc"
)
elseif(onnxruntime_BUILD_APPLE_FRAMEWORK)
Expand Down
20 changes: 17 additions & 3 deletions cmake/onnxruntime_nodejs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,26 @@ else()
endif()
endif()

# a list of DLLs that the Node.js binding depends on
set(NODEJS_DLL_DEPS)

# setup providers
if (onnxruntime_USE_CUDA)
set(NODEJS_BINDING_USE_CUDA "--use_cuda")
endif()
if (onnxruntime_USE_DML)
set(NODEJS_BINDING_USE_DML "--use_dml")
list(APPEND NODEJS_DLL_DEPS "$<TARGET_FILE_DIR:onnxruntime>/DirectML.dll")
endif()
if (onnxruntime_USE_WEBGPU)
set(NODEJS_BINDING_USE_WEBGPU "--use_webgpu")
if (WIN32 AND onnxruntime_ENABLE_DAWN_BACKEND_D3D12)
list(APPEND NODEJS_DLL_DEPS "$<TARGET_FILE_DIR:dxcompiler>/dxil.dll")
list(APPEND NODEJS_DLL_DEPS "$<TARGET_FILE_DIR:dxcompiler>/dxcompiler.dll")
endif()
if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
list(APPEND NODEJS_DLL_DEPS "$<TARGET_FILE:dawn::webgpu_dawn>")
endif()
endif()
if (onnxruntime_USE_TENSORRT)
set(NODEJS_BINDING_USE_TENSORRT "--use_tensorrt")
Expand All @@ -94,9 +105,12 @@ add_custom_target(js_common_npm_ci ALL

add_custom_target(nodejs_binding_wrapper ALL
COMMAND ${NPM_CLI} ci
COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR}
--arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_WEBGPU} ${NODEJS_BINDING_USE_TENSORRT}
${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN}
COMMAND ${NPM_CLI} run build -- "--onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR}"
--config=${CMAKE_BUILD_TYPE}
"--onnxruntime-generator=${CMAKE_GENERATOR}"
"--dll_deps=${NODEJS_DLL_DEPS}"
--arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_WEBGPU}
${NODEJS_BINDING_USE_TENSORRT} ${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN}
WORKING_DIRECTORY ${JS_NODE_ROOT}
COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")

Expand Down
36 changes: 27 additions & 9 deletions cmake/onnxruntime_providers_webgpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -23,24 +23,42 @@
onnxruntime_add_include_to_target(onnxruntime_providers_webgpu
onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)

set(onnxruntime_providers_webgpu_dll_deps)

if (onnxruntime_BUILD_DAWN_MONOLITHIC_LIBRARY)
target_link_libraries(onnxruntime_providers_webgpu dawn::webgpu_dawn)

if (onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS)
list(APPEND onnxruntime_DELAYLOAD_FLAGS "/DELAYLOAD:webgpu_dawn.dll")
endif()
if (WIN32)
if (onnxruntime_ENABLE_DELAY_LOADING_WIN_DLLS)
list(APPEND onnxruntime_DELAYLOAD_FLAGS "/DELAYLOAD:webgpu_dawn.dll")
endif()

# Copy webgpu_dawn.dll to the output directory
add_custom_command(
TARGET onnxruntime_providers_webgpu
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different "$<TARGET_FILE:dawn::webgpu_dawn>" "$<TARGET_FILE_DIR:onnxruntime_providers_webgpu>"
VERBATIM )
list(APPEND onnxruntime_providers_webgpu_dll_deps "$<TARGET_FILE:dawn::webgpu_dawn>")
endif()
else()
if (NOT onnxruntime_USE_EXTERNAL_DAWN)
target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native)
endif()
target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_proc)
endif()

if (WIN32 AND onnxruntime_ENABLE_DAWN_BACKEND_D3D12)
# Ensure dxil.dll and dxcompiler.dll exist in the output directory $<TARGET_FILE_DIR:dxcompiler>
add_dependencies(onnxruntime_providers_webgpu copy_dxil_dll)
add_dependencies(onnxruntime_providers_webgpu dxcompiler)

list(APPEND onnxruntime_providers_webgpu_dll_deps "$<TARGET_FILE_DIR:dxcompiler>/dxil.dll")
list(APPEND onnxruntime_providers_webgpu_dll_deps "$<TARGET_FILE_DIR:dxcompiler>/dxcompiler.dll")
endif()

if (onnxruntime_providers_webgpu_dll_deps)
# Copy dependency DLLs to the output directory
add_custom_command(
TARGET onnxruntime_providers_webgpu
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different "${onnxruntime_providers_webgpu_dll_deps}" "$<TARGET_FILE_DIR:onnxruntime_providers_webgpu>"
COMMAND_EXPAND_LISTS
VERBATIM )
endif()

set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime")
12 changes: 12 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,9 @@ set (onnxruntime_global_thread_pools_test_SRC
set (onnxruntime_webgpu_external_dawn_test_SRC
${TEST_SRC_DIR}/webgpu/external_dawn/main.cc)

set (onnxruntime_webgpu_delay_load_test_SRC
${TEST_SRC_DIR}/webgpu/delay_load/main.cc)

# tests from lowest level library up.
# the order of libraries should be maintained, with higher libraries being added first in the list

Expand Down Expand Up @@ -1864,4 +1867,13 @@ if (onnxruntime_USE_WEBGPU AND onnxruntime_USE_EXTERNAL_DAWN)
onnxruntime_add_include_to_target(onnxruntime_webgpu_external_dawn_test dawn::dawncpp_headers dawn::dawn_headers)
endif()

if (onnxruntime_USE_WEBGPU AND WIN32 AND onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT onnxruntime_MINIMAL_BUILD)
AddTest(DYN
TARGET onnxruntime_webgpu_delay_load_test
SOURCES ${onnxruntime_webgpu_delay_load_test_SRC}
LIBS ${SYS_PATH_LIB}
DEPENDS ${all_dependencies}
)
endif()

include(onnxruntime_fuzz_test.cmake)
10 changes: 6 additions & 4 deletions js/node/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,12 @@ endif()
if (WIN32)
file(COPY ${ONNXRUNTIME_WIN_BIN_DIR}/onnxruntime.dll
DESTINATION ${dist_folder})
if (USE_DML)
file(COPY ${ONNXRUNTIME_WIN_BIN_DIR}/DirectML.dll
DESTINATION ${dist_folder})
endif ()
if (ORT_NODEJS_DLL_DEPS)
foreach(dll ${ORT_NODEJS_DLL_DEPS})
file(COPY ${dll} DESTINATION ${dist_folder})
endforeach()
endif()

elseif (APPLE)
file(COPY ${ONNXRUNTIME_BUILD_DIR}/libonnxruntime.dylib
DESTINATION ${dist_folder} FOLLOW_SYMLINK_CHAIN)
Expand Down
5 changes: 5 additions & 0 deletions js/node/script/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ const USE_TENSORRT = !!buildArgs.use_tensorrt;
const USE_COREML = !!buildArgs.use_coreml;
// --use_qnn
const USE_QNN = !!buildArgs.use_qnn;
// --dll_deps=
const DLL_DEPS = buildArgs.dll_deps;

// build path
const ROOT_FOLDER = path.join(__dirname, '..');
Expand Down Expand Up @@ -82,6 +84,9 @@ if (USE_COREML) {
if (USE_QNN) {
args.push('--CDUSE_QNN=ON');
}
if (DLL_DEPS) {
args.push(`--CDORT_NODEJS_DLL_DEPS=${DLL_DEPS}`);
}

// set CMAKE_OSX_ARCHITECTURES for macOS build
if (os.platform() === 'darwin') {
Expand Down
37 changes: 0 additions & 37 deletions js/node/src/directml_load_helper.cc

This file was deleted.

6 changes: 0 additions & 6 deletions js/node/src/directml_load_helper.h

This file was deleted.

4 changes: 0 additions & 4 deletions js/node/src/inference_session_wrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
#include "onnxruntime_cxx_api.h"

#include "common.h"
#include "directml_load_helper.h"
#include "inference_session_wrap.h"
#include "run_options_helper.h"
#include "session_options_helper.h"
Expand All @@ -19,9 +18,6 @@ Napi::FunctionReference& InferenceSessionWrap::GetTensorConstructor() {
}

Napi::Object InferenceSessionWrap::Init(Napi::Env env, Napi::Object exports) {
#if defined(USE_DML) && defined(_WIN32)
LoadDirectMLDll(env);
#endif
// create ONNX runtime env
Ort::InitApi();
ORT_NAPI_THROW_ERROR_IF(
Expand Down
83 changes: 83 additions & 0 deletions onnxruntime/core/dll/delay_load_hook.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

// == workaround for delay loading of dependencies of onnxruntime.dll ==
//
// Problem:
//
// When onnxruntime.dll uses delay loading for its dependencies, the dependencies are loaded using LoadLibraryEx,
// which search the directory of process (.exe) instead of this library (onnxruntime.dll). This is a problem for
// usages of Node.js binding and python binding, because Windows will try to find the dependencies in the directory
// of node.exe or python.exe, which is not the directory of onnxruntime.dll.
//
// Solution:
//
// By using the delay load hook `__pfnDliNotifyHook2`, we can intervene the loading procedure by loading from an
// absolute path. The absolute path is constructed by appending the name of the DLL to load to the directory of
// onnxruntime.dll. This way, we can ensure that the dependencies are loaded from the same directory as onnxruntime.dll.
//
// See also:
// - https://learn.microsoft.com/en-us/cpp/build/reference/understanding-the-helper-function?view=msvc-170#structure-and-constant-definitions
// - https://learn.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-search-order#alternate-search-order-for-unpackaged-apps
//
// The DLL DelayLoad hook is only enabled when the compiler is MSVC and at least one of the following is True:
// - both USE_WEBGPU and BUILD_DAWN_MONOLITHIC_LIBRARY are defined
// - USE_DML is defined
//
#define ORT_DELAY_LOAD_WEBGPU_DAWN_DLL (defined(USE_WEBGPU) && defined(BUILD_DAWN_MONOLITHIC_LIBRARY))
#define ORT_DELAY_LOAD_DIRECTML_DLL defined(USE_DML)
#if defined(_MSC_VER) && (ORT_DELAY_LOAD_WEBGPU_DAWN_DLL || ORT_DELAY_LOAD_DIRECTML_DLL)

#include <Windows.h>
#include <delayimp.h>
#include <stdlib.h>
#include <string>

#include "core/platform/env.h"

namespace {

#define DEFINE_KNOWN_DLL(name) {#name ".dll", L#name L".dll"}

constexpr struct {
const char* str;
const wchar_t* wstr;
} known_dlls[] = {
#if ORT_DELAY_LOAD_WEBGPU_DAWN_DLL
DEFINE_KNOWN_DLL(webgpu_dawn),
#endif
#if ORT_DELAY_LOAD_DIRECTML_DLL
DEFINE_KNOWN_DLL(DirectML),
#endif
};
} // namespace

FARPROC WINAPI delay_load_hook(unsigned dliNotify, PDelayLoadInfo pdli) {
if (dliNotify == dliNotePreLoadLibrary) {
for (size_t i = 0; i < _countof(known_dlls); ++i) {
if (_stricmp(pdli->szDll, known_dlls[i].str) == 0) {
// Try to load the DLL from the same directory as onnxruntime.dll

// First, get the path to onnxruntime.dll
auto path = Env::Default().GetRuntimePath();
if (path.empty()) {
// Failed to get the path to onnxruntime.dll. In this case, we will just return NULL and let the system
// search for the DLL in the default search order.
return NULL;
}

// Append the name of the DLL. Now `path` is the absolute path to the DLL to load.
path.append(known_dlls[i].wstr);

// Load the DLL
return FARPROC(LoadLibraryExW(path.c_str(), NULL,
LOAD_LIBRARY_SEARCH_DEFAULT_DIRS | LOAD_LIBRARY_SEARCH_DLL_LOAD_DIR));
}
}
}
return NULL;
}

extern "C" const PfnDliHook __pfnDliNotifyHook2 = delay_load_hook;

#endif
2 changes: 1 addition & 1 deletion onnxruntime/core/dll/dllmain.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#pragma GCC diagnostic pop
#endif

// dllmain.cpp : Defines the entry point for the DLL application.
// dllmain.cc : Defines the entry point for the DLL application.
BOOL APIENTRY DllMain(HMODULE /*hModule*/,
DWORD ul_reason_for_call,
LPVOID /*lpReserved*/
Expand Down
26 changes: 26 additions & 0 deletions onnxruntime/core/providers/webgpu/webgpu_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
#endif

#include "core/common/common.h"
#include "core/common/path_string.h"
#include "core/platform/env.h"

#include "core/providers/webgpu/compute_context.h"
#include "core/providers/webgpu/webgpu_context.h"
Expand Down Expand Up @@ -50,6 +52,30 @@ void WebGpuContext::Initialize(const WebGpuExecutionProviderInfo& webgpu_ep_info

// Initialization.Step.2 - Create wgpu::Adapter
if (adapter_ == nullptr) {
#if !defined(__EMSCRIPTEN__) && defined(_MSC_VER) && defined(DAWN_ENABLE_D3D12) && !defined(USE_EXTERNAL_DAWN)
// If we are using the D3D12 backend on Windows and the build does not use external Dawn, dxil.dll and dxcompiler.dll are required.
//
// Dawn will try to load them later, but if they are in the different directory to the executable, it may fail to find them.
// To avoid this issue, we try to load them from the same directory as current module (usually onnxruntime.dll).
auto runtime_path = Env::Default().GetRuntimePath();
if (!runtime_path.empty()) {
Status status;
void* module_handle = nullptr;

PathString dxil_path = runtime_path + ToPathString(L"dxil.dll");
status = Env::Default().LoadDynamicLibrary(dxil_path, false, &module_handle);
if (status.IsOK() && module_handle != nullptr) {
modules_.Add(dxil_path, module_handle);
}

PathString dxcompiler_path = runtime_path + ToPathString(L"dxcompiler.dll");
status = Env::Default().LoadDynamicLibrary(dxcompiler_path, false, &module_handle);
if (status.IsOK() && module_handle != nullptr) {
modules_.Add(dxcompiler_path, module_handle);
}
}
#endif

wgpu::RequestAdapterOptions req_adapter_options = {};
wgpu::DawnTogglesDescriptor adapter_toggles_desc = {};
req_adapter_options.nextInChain = &adapter_toggles_desc;
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/providers/webgpu/webgpu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <webgpu/webgpu_cpp.h>

#include "core/common/common.h"
#include "core/framework/library_handles.h"
#include "core/providers/webgpu/webgpu_execution_provider.h"
#include "core/providers/webgpu/buffer_manager.h"
#include "core/providers/webgpu/program_manager.h"
Expand Down Expand Up @@ -153,6 +154,8 @@ class WebGpuContext final {

std::once_flag init_flag_;

LibraryHandles modules_;

wgpu::Instance instance_;
wgpu::Adapter adapter_;
wgpu::Device device_;
Expand Down
Loading
Loading