diff --git a/onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc b/onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc index 2754eebf75421..a679acf42636c 100644 --- a/onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc +++ b/onnxruntime/core/optimizer/layout_transformation/layout_transformation.cc @@ -101,6 +101,15 @@ bool ConvertNodeLayout(const api::NodeRef& node) { } #endif +#if defined(USE_QNN) + if (node.GetExecutionProviderType() == kQnnExecutionProvider) { + if (node.OpType() == "Upsample") { + // Upsample is translated to QNN's Resize, which requires the NHWC layout for processing. + return true; + } + } +#endif + return layout_sensitive_ops.count(node.OpType()) != 0; } } // namespace diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc index 77579dfc793ee..731cb30b74429 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc +++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc @@ -133,6 +133,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() { CreateResizeOpBuilder("Resize", *this); } + { + CreateUpsampleOpBuilder("Upsample", *this); + } + { CreateTopKOpBuilder("TopK", *this); } diff --git a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h index e11eae84341fe..8366e4e57e9d4 100644 --- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h +++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h @@ -75,6 +75,8 @@ void CreateSplitOpBuilder(const std::string& op_type, OpBuilderRegistrations& op void CreateResizeOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); +void CreateUpsampleOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); + void CreateTopKOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); void CreateTileOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations); diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h index 37060fcd9ba93..272d226cd743d 100644 --- a/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/base_op_builder.h @@ -192,6 +192,7 @@ class BaseOpBuilder : public IOpBuilder { {"Reshape", QNN_OP_RESHAPE}, {"Resize", QNN_OP_RESIZE}, + {"Upsample", QNN_OP_RESIZE}, {"Flatten", QNN_OP_RESHAPE}, {"Squeeze", QNN_OP_RESHAPE}, {"Unsqueeze", QNN_OP_RESHAPE}, diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/upsample_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/upsample_op_builder.cc new file mode 100644 index 0000000000000..48214f92b1a61 --- /dev/null +++ b/onnxruntime/core/providers/qnn/builder/opbuilder/upsample_op_builder.cc @@ -0,0 +1,263 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include +#include +#include +#include +#include + +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" +#include "core/providers/qnn/builder/qnn_utils.h" +#include "core/providers/qnn/builder/qnn_model_wrapper.h" +#include "core/providers/qnn/builder/op_builder_factory.h" + +namespace onnxruntime { +namespace qnn { + +class UpsampleOpBuilder : public BaseOpBuilder { + public: + UpsampleOpBuilder() : BaseOpBuilder("UpsampleOpBuilder") {} + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(UpsampleOpBuilder); + + Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger) const final ORT_MUST_USE_RESULT; + + protected: + Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const override ORT_MUST_USE_RESULT; + + Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + const logging::Logger& logger, + bool do_op_validation) const override ORT_MUST_USE_RESULT; + + Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger, + const std::vector& input_names, + size_t output_index, + Qnn_DataType_t qnn_data_type, + QnnQuantParamsWrapper& quant_param) const override ORT_MUST_USE_RESULT; + + private: + const std::unordered_map supported_modes = { + {"nearest", QNN_OP_RESIZE_INTERPOLATION_MODE_NEAREST}, + {"linear", QNN_OP_RESIZE_INTERPOLATION_MODE_LINEAR}, + {"cubic", QNN_OP_RESIZE_INTERPOLATION_MODE_CUBIC}}; + + // Info for Onnx Upsample attribute {, } + const OnnxAttrInfo onnx_mode_attr = {"mode", "nearest"}; +}; + +static Status AddQnnScalar(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector& param_tensor_names, + const Qnn_Scalar_t& qnn_scalar, + const std::string& qnn_scalar_param_name) { + QnnParamWrapper qnn_param_wrapper(node_unit.Index(), node_unit.Name(), qnn_scalar_param_name, qnn_scalar); + param_tensor_names.push_back(qnn_param_wrapper.GetParamTensorName()); + qnn_model_wrapper.AddParamWrapper(std::move(qnn_param_wrapper)); + + return Status::OK(); +} + +Status UpsampleOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger) const { + // Resize ops are sensitive with data layout, no special validation so far + // The nodes from 1st call of GetCapability do not get layout transformer applied, it's still NCHW + // The nodes from 2nd call of GetCapability get layout transformer applied, it's NHWC + // Need to do op validation in 1st call of GetCapability + if (node_unit.Domain() == kMSInternalNHWCDomain) { + return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true); + } + + const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()); + NodeAttrHelper node_helper(node_unit); + + // Check mode + const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr); + ORT_RETURN_IF_NOT(supported_modes.find(interp_mode) != supported_modes.end(), + "QNN EP: Resize does not support mode ", interp_mode.c_str()); + + const auto& input_0 = node_unit.Inputs()[0]; + std::vector input_shape; + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input_0.node_arg, input_shape), + "QNN EP: Cannot get input shape for Onnx Upsample ", input_0.node_arg.Name().c_str()); + const size_t input_rank = input_shape.size(); + + ORT_RETURN_IF(is_npu_backend && (input_rank < 3 || input_rank > 5), + "QNN EP: The input rank for Resize must be at least 3 and no greater than 5 on the HTP."); + + const auto& output_0 = node_unit.Outputs()[0]; + std::vector output_shape; + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output_0.node_arg, output_shape), + "QNN EP: Cannot get output shape for Onnx Upsample ", output_0.node_arg.Name().c_str(), + ". Dynamic scales input is not supported in QNN EP."); + + // Check that only the spatial dimensions (width, height) are resized. The batch_size (N) and channels (C) should + // be untouched. This code runs before layout transformation, so we know that the current layout is "channel first" + // (e.g., N, C, S1, S2, ..., SN). + ORT_RETURN_IF_NOT(input_shape[0] == output_shape[0] && input_shape[1] == output_shape[1], + "QNN EP: Resize may only change the spatial dimensions."); + + if (!is_npu_backend) { + ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type(); + ORT_RETURN_IF(input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"), + "QNN EP: Data type ", input_data_type->c_str(), + " is not supported for Resize operator in CPU backend."); + } + + return Status::OK(); +} + +Status UpsampleOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger, + std::vector& input_names, + bool do_op_validation) const { + const int opset_version = node_unit.SinceVersion(); + const auto& inputs = node_unit.Inputs(); + + if (opset_version > 7 && do_op_validation) { + const std::string& scales_input_name = inputs[1].node_arg.Name(); + ORT_RETURN_IF_NOT(qnn_model_wrapper.IsConstantInput(scales_input_name), + "QNN doesn't support dynamic scales input for ONNX Upsample op ", node_unit.Name().c_str()); + } + + // Only need to consider the first input of Onnx upsample. + ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names)); + + return Status::OK(); +} + +Status UpsampleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + std::vector&& input_names, + const logging::Logger& logger, + bool do_op_validation) const { + std::vector param_tensor_names; + NodeAttrHelper node_helper(node_unit); + const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr); + + const auto& input_0 = node_unit.Inputs()[0]; + std::vector input_shape; + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input_0.node_arg, input_shape), + "QNN EP: Cannot get input shape for Onnx Upsample ", input_0.node_arg.Name().c_str()); + + const size_t input_rank = input_shape.size(); + const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()); + std::string qnn_op_type = GetQnnOpType(node_unit.OpType()); + + if (is_npu_backend && input_rank == 4 && interp_mode != "cubic") { + // Translate QNN's Resize to QNN's ResizeNearestNeighbor/ResizeBilinear to achieve better performance on + // the HTP backend. QNN's ResizeNearestNeighbor and ResizeBilinear are only supported when input rank is 4. + qnn_op_type = (interp_mode == "nearest") ? QNN_OP_RESIZE_NEAREST_NEIGHBOR : QNN_OP_RESIZE_BILINEAR; + + // Parameter 'align_corners' + Qnn_Scalar_t qnn_align_corners = QNN_SCALAR_INIT; + qnn_align_corners.dataType = QNN_DATATYPE_BOOL_8; + qnn_align_corners.bool8Value = false; + const std::string align_corners_param_name = (qnn_op_type == QNN_OP_RESIZE_BILINEAR) + ? QNN_OP_RESIZE_BILINEAR_PARAM_ALIGN_CORNERS + : QNN_OP_RESIZE_NEAREST_NEIGHBOR_PARAM_ALIGN_CORNERS; + + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, + qnn_align_corners, align_corners_param_name)); + + // Parameter 'half_pixel_centers' + Qnn_Scalar_t qnn_half_pixel_centers = QNN_SCALAR_INIT; + qnn_half_pixel_centers.dataType = QNN_DATATYPE_BOOL_8; + qnn_half_pixel_centers.bool8Value = false; + const std::string half_pixel_centers_param_name = (qnn_op_type == QNN_OP_RESIZE_BILINEAR) + ? QNN_OP_RESIZE_BILINEAR_PARAM_HALF_PIXEL_CENTERS + : QNN_OP_RESIZE_NEAREST_NEIGHBOR_PARAM_HALF_PIXEL_CENTERS; + + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, + qnn_half_pixel_centers, half_pixel_centers_param_name)); + + if (qnn_op_type == QNN_OP_RESIZE_BILINEAR) { + // Parameter 'antialias' + Qnn_Scalar_t qnn_antialias = QNN_SCALAR_INIT; + qnn_antialias.dataType = QNN_DATATYPE_BOOL_8; + qnn_antialias.bool8Value = false; + + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, + qnn_antialias, QNN_OP_RESIZE_BILINEAR_PARAM_ANTIALIAS)); + } + } else { + // Remain as QNN's Resize. + // Parameter 'exclude_outside' + Qnn_Scalar_t qnn_exclude_outside = QNN_SCALAR_INIT; + qnn_exclude_outside.dataType = QNN_DATATYPE_BOOL_8; + qnn_exclude_outside.bool8Value = false; + + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, + qnn_exclude_outside, QNN_OP_RESIZE_PARAM_EXCLUDE_OUTSIDE)); + + // Parameter 'transformation_mode' + Qnn_Scalar_t qnn_transformation_mode = QNN_SCALAR_INIT; + qnn_transformation_mode.dataType = QNN_DATATYPE_UINT_32; + qnn_transformation_mode.uint32Value = (supported_modes.at(interp_mode) == QNN_OP_RESIZE_INTERPOLATION_MODE_NEAREST) + ? static_cast(QNN_OP_RESIZE_TRANSFORMATION_MODE_HALF_PIXEL) + : static_cast(QNN_OP_RESIZE_TRANSFORMATION_MODE_ASYMMETRIC); + + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, + qnn_transformation_mode, QNN_OP_RESIZE_PARAM_TRANSFORMATION_MODE)); + + // Parameter 'interpolation_mode' + Qnn_Scalar_t qnn_interp_mode = QNN_SCALAR_INIT; + qnn_interp_mode.dataType = QNN_DATATYPE_UINT_32; + qnn_interp_mode.uint32Value = static_cast(supported_modes.at(interp_mode)); + + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, + qnn_interp_mode, QNN_OP_RESIZE_PARAM_INTERPOLATION_MODE)); + + // Parameter 'nearest_mode'. Process only when 'interpolation_mode' is NEAREST. + if (qnn_interp_mode.uint32Value == QNN_OP_RESIZE_INTERPOLATION_MODE_NEAREST) { + Qnn_Scalar_t qnn_nearest_mode = QNN_SCALAR_INIT; + qnn_nearest_mode.dataType = QNN_DATATYPE_UINT_32; + qnn_nearest_mode.uint32Value = static_cast(QNN_OP_RESIZE_NEAREST_MODE_ROUND_PREFER_FLOOR); + + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, + qnn_nearest_mode, QNN_OP_RESIZE_PARAM_NEAREST_MODE)); + } + } + + ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, + std::move(input_names), + std::move(param_tensor_names), + logger, do_op_validation, qnn_op_type)); + + return Status::OK(); +} + +Status UpsampleOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper, + const NodeUnit& node_unit, + const logging::Logger& logger, + const std::vector& input_names, + size_t output_index, + Qnn_DataType_t qnn_data_type, + QnnQuantParamsWrapper& quant_param) const { + if (!quant_param.IsPerTensor()) { + return Status::OK(); + } + + // Force Resize op's output to use the same quantization parameters as the input if nearly equal. + // This helps the HTP backend employ certain optimizations. + return SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names, + 0 /*input_index*/, output_index, qnn_data_type, quant_param); +} + +void CreateUpsampleOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { + op_registrations.AddOpBuilder(op_type, std::make_unique()); +} + +} // namespace qnn +} // namespace onnxruntime diff --git a/onnxruntime/test/providers/qnn/upsample_op_test.cc b/onnxruntime/test/providers/qnn/upsample_op_test.cc new file mode 100644 index 0000000000000..3371bbef44e1b --- /dev/null +++ b/onnxruntime/test/providers/qnn/upsample_op_test.cc @@ -0,0 +1,114 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#if !defined(ORT_MINIMAL_BUILD) + +#include +#include + +#include "test/providers/qnn/qnn_test_utils.h" +#include "core/graph/node_attr_utils.h" + +#include "core/graph/onnx_protobuf.h" +#include "gtest/gtest.h" + +namespace onnxruntime { +namespace test { + +// Runs a model with a Upsample operator on the QNN CPU backend. Checks the graph node assignment +// and that inference outputs for QNN EP and CPU EP match. +template +static void RunUpsampleTestOnCPU(const TestInputDef& input_def, + const TestInputDef& scales_def, + std::vector&& attrs, + ExpectedEPNodeAssignment expected_ep_assignment, + int opset = 9) { + ProviderOptions provider_options; + provider_options["backend_type"] = "cpu"; + provider_options["offload_graph_io_quantization"] = "0"; + + if (opset <= 7) { + const std::vector& scales = scales_def.GetRawData(); + attrs.push_back(utils::MakeAttribute("scales", scales)); + + RunQnnModelTest(BuildOpTestCase("Upsample", {input_def}, {}, attrs), + provider_options, + opset, + expected_ep_assignment); + } else { + RunQnnModelTest(BuildOpTestCase("Upsample", {input_def}, {scales_def}, attrs), + provider_options, + opset, + expected_ep_assignment); + } +} + +// +// CPU tests: +// + +// Test that Upsample with a dynamic scales input is not supported by QNN EP. +TEST_F(QnnCPUBackendTests, Upsample_DynamicScales_Unsupported) { + RunUpsampleTestOnCPU(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + TestInputDef({4}, false /* is_initializer */, {1.0f, 1.0f, 1.5f, 1.5f}), + {utils::MakeAttribute("mode", "nearest")}, // Attributes + ExpectedEPNodeAssignment::None, // Should not be assigned to QNN EP. + 9); // Opset +} + +// Test Upsample with opset-9, mode `nearest` +TEST_F(QnnCPUBackendTests, Upsample_4D_Nearest_opset9) { + RunUpsampleTestOnCPU(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + TestInputDef({4}, true, {1.0f, 1.0f, 1.5f, 1.5f}), + {utils::MakeAttribute("mode", "nearest")}, // Attributes + ExpectedEPNodeAssignment::All, + 9); // Opset +} + +// Test Upsample with opset-9, mode `linear` +TEST_F(QnnCPUBackendTests, Upsample_4D_Linear_opset9) { + RunUpsampleTestOnCPU(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + TestInputDef({4}, true, {1.0f, 1.0f, 1.5f, 1.5f}), + {utils::MakeAttribute("mode", "linear")}, // Attributes + ExpectedEPNodeAssignment::All, + 9); // Opset +} + +// Test Upsample with opset-7, mode `nearest` +TEST_F(QnnCPUBackendTests, Upsample_4D_Nearest_opset7) { + RunUpsampleTestOnCPU(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + TestInputDef({4}, true, {1.0f, 1.0f, 1.5f, 1.5f}), + {utils::MakeAttribute("mode", "nearest")}, // Attributes + ExpectedEPNodeAssignment::All, + 7); // Opset +} + +// Test Upsample with opset-7, mode `linear` +TEST_F(QnnCPUBackendTests, Upsample_4D_Linear_opset7) { + RunUpsampleTestOnCPU(TestInputDef({1, 3, 4, 4}, false, -10.0f, 10.0f), + TestInputDef({4}, true, {1.0f, 1.0f, 1.5f, 1.5f}), + {utils::MakeAttribute("mode", "linear")}, // Attributes + ExpectedEPNodeAssignment::All, + 7); // Opset +} + +// Test Upsample 5D +TEST_F(QnnCPUBackendTests, Upsample_5D) { + RunUpsampleTestOnCPU(TestInputDef({1, 3, 4, 4, 4}, false, -10.0f, 10.0f), + TestInputDef({5}, true, {1.0f, 1.0f, 1.5f, 1.5f, 1.5f}), + {utils::MakeAttribute("mode", "nearest")}, // Attributes + ExpectedEPNodeAssignment::All, + 9); // Opset +} + +/* +QNN HTP backend tests for the QDQ Upsample model is bypassed and can not be enabled. + +ONNX Upsample is deprecated in domain version 10. However, ONNX QuantizeLinear and DequantizeLinear are enabled in +domain version 10. Their conditions are mutually exclusive, so it is not possible for these ops to coexist in the +same domain version. +*/ + +} // namespace test +} // namespace onnxruntime +#endif // !defined(ORT_MINIMAL_BUILD)