|
| 1 | +// Copyright (c) Microsoft Corporation. All rights reserved. |
| 2 | +// Licensed under the MIT License. |
| 3 | + |
| 4 | +#include <memory> |
| 5 | +#include <string> |
| 6 | +#include <unordered_map> |
| 7 | +#include <utility> |
| 8 | +#include <vector> |
| 9 | + |
| 10 | +#include "core/providers/qnn/builder/opbuilder/base_op_builder.h" |
| 11 | +#include "core/providers/qnn/builder/qnn_utils.h" |
| 12 | +#include "core/providers/qnn/builder/qnn_model_wrapper.h" |
| 13 | +#include "core/providers/qnn/builder/op_builder_factory.h" |
| 14 | + |
| 15 | +namespace onnxruntime { |
| 16 | +namespace qnn { |
| 17 | + |
| 18 | +class UpsampleOpBuilder : public BaseOpBuilder { |
| 19 | + public: |
| 20 | + UpsampleOpBuilder() : BaseOpBuilder("UpsampleOpBuilder") {} |
| 21 | + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(UpsampleOpBuilder); |
| 22 | + |
| 23 | + Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper, |
| 24 | + const NodeUnit& node_unit, |
| 25 | + const logging::Logger& logger) const final ORT_MUST_USE_RESULT; |
| 26 | + |
| 27 | + protected: |
| 28 | + Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper, |
| 29 | + const NodeUnit& node_unit, |
| 30 | + const logging::Logger& logger, |
| 31 | + std::vector<std::string>& input_names, |
| 32 | + bool do_op_validation) const override ORT_MUST_USE_RESULT; |
| 33 | + |
| 34 | + Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, |
| 35 | + const NodeUnit& node_unit, |
| 36 | + std::vector<std::string>&& input_names, |
| 37 | + const logging::Logger& logger, |
| 38 | + bool do_op_validation) const override ORT_MUST_USE_RESULT; |
| 39 | + |
| 40 | + Status OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper, |
| 41 | + const NodeUnit& node_unit, |
| 42 | + const logging::Logger& logger, |
| 43 | + const std::vector<std::string>& input_names, |
| 44 | + size_t output_index, |
| 45 | + Qnn_DataType_t qnn_data_type, |
| 46 | + QnnQuantParamsWrapper& quant_param) const override ORT_MUST_USE_RESULT; |
| 47 | + |
| 48 | + private: |
| 49 | + const std::unordered_map<std::string, uint32_t> supported_modes = { |
| 50 | + {"nearest", QNN_OP_RESIZE_INTERPOLATION_MODE_NEAREST}, |
| 51 | + {"linear", QNN_OP_RESIZE_INTERPOLATION_MODE_LINEAR}, |
| 52 | + {"cubic", QNN_OP_RESIZE_INTERPOLATION_MODE_CUBIC}}; |
| 53 | + |
| 54 | + // Info for Onnx Upsample attribute {<attribute_name>, <default_value>} |
| 55 | + const OnnxAttrInfo<std::string> onnx_mode_attr = {"mode", "nearest"}; |
| 56 | +}; |
| 57 | + |
| 58 | +static Status AddQnnScalar(QnnModelWrapper& qnn_model_wrapper, |
| 59 | + const NodeUnit& node_unit, |
| 60 | + std::vector<std::string>& param_tensor_names, |
| 61 | + const Qnn_Scalar_t& qnn_scalar, |
| 62 | + const std::string& qnn_scalar_param_name) { |
| 63 | + QnnParamWrapper qnn_param_wrapper(node_unit.Index(), node_unit.Name(), qnn_scalar_param_name, qnn_scalar); |
| 64 | + param_tensor_names.push_back(qnn_param_wrapper.GetParamTensorName()); |
| 65 | + qnn_model_wrapper.AddParamWrapper(std::move(qnn_param_wrapper)); |
| 66 | + |
| 67 | + return Status::OK(); |
| 68 | +} |
| 69 | + |
| 70 | +Status UpsampleOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper, |
| 71 | + const NodeUnit& node_unit, |
| 72 | + const logging::Logger& logger) const { |
| 73 | + // Resize ops are sensitive with data layout, no special validation so far |
| 74 | + // The nodes from 1st call of GetCapability do not get layout transformer applied, it's still NCHW |
| 75 | + // The nodes from 2nd call of GetCapability get layout transformer applied, it's NHWC |
| 76 | + // Need to do op validation in 1st call of GetCapability |
| 77 | + if (node_unit.Domain() == kMSInternalNHWCDomain) { |
| 78 | + return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true); |
| 79 | + } |
| 80 | + |
| 81 | + const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()); |
| 82 | + NodeAttrHelper node_helper(node_unit); |
| 83 | + |
| 84 | + // Check mode |
| 85 | + const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr); |
| 86 | + ORT_RETURN_IF_NOT(supported_modes.find(interp_mode) != supported_modes.end(), |
| 87 | + "QNN EP: Resize does not support mode ", interp_mode.c_str()); |
| 88 | + |
| 89 | + const auto& input_0 = node_unit.Inputs()[0]; |
| 90 | + std::vector<uint32_t> input_shape; |
| 91 | + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input_0.node_arg, input_shape), |
| 92 | + "QNN EP: Cannot get input shape for Onnx Upsample ", input_0.node_arg.Name().c_str()); |
| 93 | + const size_t input_rank = input_shape.size(); |
| 94 | + |
| 95 | + ORT_RETURN_IF(is_npu_backend && (input_rank < 3 || input_rank > 5), |
| 96 | + "QNN EP: The input rank for Resize must be at least 3 and no greater than 5 on the HTP."); |
| 97 | + |
| 98 | + const auto& output_0 = node_unit.Outputs()[0]; |
| 99 | + std::vector<uint32_t> output_shape; |
| 100 | + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output_0.node_arg, output_shape), |
| 101 | + "QNN EP: Cannot get output shape for Onnx Upsample ", output_0.node_arg.Name().c_str(), |
| 102 | + ". Dynamic scales input is not supported in QNN EP."); |
| 103 | + |
| 104 | + // Check that only the spatial dimensions (width, height) are resized. The batch_size (N) and channels (C) should |
| 105 | + // be untouched. This code runs before layout transformation, so we know that the current layout is "channel first" |
| 106 | + // (e.g., N, C, S1, S2, ..., SN). |
| 107 | + ORT_RETURN_IF_NOT(input_shape[0] == output_shape[0] && input_shape[1] == output_shape[1], |
| 108 | + "QNN EP: Resize may only change the spatial dimensions."); |
| 109 | + |
| 110 | + if (!is_npu_backend) { |
| 111 | + ONNX_NAMESPACE::DataType input_data_type = input_0.node_arg.Type(); |
| 112 | + ORT_RETURN_IF(input_data_type != ONNX_NAMESPACE::Utils::DataTypeUtils::ToType("float"), |
| 113 | + "QNN EP: Data type ", input_data_type->c_str(), |
| 114 | + " is not supported for Resize operator in CPU backend."); |
| 115 | + } |
| 116 | + |
| 117 | + return Status::OK(); |
| 118 | +} |
| 119 | + |
| 120 | +Status UpsampleOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper, |
| 121 | + const NodeUnit& node_unit, |
| 122 | + const logging::Logger& logger, |
| 123 | + std::vector<std::string>& input_names, |
| 124 | + bool do_op_validation) const { |
| 125 | + const int opset_version = node_unit.SinceVersion(); |
| 126 | + const auto& inputs = node_unit.Inputs(); |
| 127 | + |
| 128 | + if (opset_version > 7 && do_op_validation) { |
| 129 | + const std::string& scales_input_name = inputs[1].node_arg.Name(); |
| 130 | + ORT_RETURN_IF_NOT(qnn_model_wrapper.IsConstantInput(scales_input_name), |
| 131 | + "QNN doesn't support dynamic scales input for ONNX Upsample op ", node_unit.Name().c_str()); |
| 132 | + } |
| 133 | + |
| 134 | + // Only need to consider the first input of Onnx upsample. |
| 135 | + ORT_RETURN_IF_ERROR(ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names)); |
| 136 | + |
| 137 | + return Status::OK(); |
| 138 | +} |
| 139 | + |
| 140 | +Status UpsampleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper, |
| 141 | + const NodeUnit& node_unit, |
| 142 | + std::vector<std::string>&& input_names, |
| 143 | + const logging::Logger& logger, |
| 144 | + bool do_op_validation) const { |
| 145 | + std::vector<std::string> param_tensor_names; |
| 146 | + NodeAttrHelper node_helper(node_unit); |
| 147 | + const std::string interp_mode = GetOnnxAttr(node_helper, onnx_mode_attr); |
| 148 | + |
| 149 | + const auto& input_0 = node_unit.Inputs()[0]; |
| 150 | + std::vector<uint32_t> input_shape; |
| 151 | + ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input_0.node_arg, input_shape), |
| 152 | + "QNN EP: Cannot get input shape for Onnx Upsample ", input_0.node_arg.Name().c_str()); |
| 153 | + |
| 154 | + const size_t input_rank = input_shape.size(); |
| 155 | + const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType()); |
| 156 | + std::string qnn_op_type = GetQnnOpType(node_unit.OpType()); |
| 157 | + |
| 158 | + if (is_npu_backend && input_rank == 4 && interp_mode != "cubic") { |
| 159 | + // Translate QNN's Resize to QNN's ResizeNearestNeighbor/ResizeBilinear to achieve better performance on |
| 160 | + // the HTP backend. QNN's ResizeNearestNeighbor and ResizeBilinear are only supported when input rank is 4. |
| 161 | + qnn_op_type = (interp_mode == "nearest") ? QNN_OP_RESIZE_NEAREST_NEIGHBOR : QNN_OP_RESIZE_BILINEAR; |
| 162 | + |
| 163 | + // Parameter 'align_corners' |
| 164 | + Qnn_Scalar_t qnn_align_corners = QNN_SCALAR_INIT; |
| 165 | + qnn_align_corners.dataType = QNN_DATATYPE_BOOL_8; |
| 166 | + qnn_align_corners.bool8Value = false; |
| 167 | + const std::string align_corners_param_name = (qnn_op_type == QNN_OP_RESIZE_BILINEAR) |
| 168 | + ? QNN_OP_RESIZE_BILINEAR_PARAM_ALIGN_CORNERS |
| 169 | + : QNN_OP_RESIZE_NEAREST_NEIGHBOR_PARAM_ALIGN_CORNERS; |
| 170 | + |
| 171 | + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, |
| 172 | + qnn_align_corners, align_corners_param_name)); |
| 173 | + |
| 174 | + // Parameter 'half_pixel_centers' |
| 175 | + Qnn_Scalar_t qnn_half_pixel_centers = QNN_SCALAR_INIT; |
| 176 | + qnn_half_pixel_centers.dataType = QNN_DATATYPE_BOOL_8; |
| 177 | + qnn_half_pixel_centers.bool8Value = false; |
| 178 | + const std::string half_pixel_centers_param_name = (qnn_op_type == QNN_OP_RESIZE_BILINEAR) |
| 179 | + ? QNN_OP_RESIZE_BILINEAR_PARAM_HALF_PIXEL_CENTERS |
| 180 | + : QNN_OP_RESIZE_NEAREST_NEIGHBOR_PARAM_HALF_PIXEL_CENTERS; |
| 181 | + |
| 182 | + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, |
| 183 | + qnn_half_pixel_centers, half_pixel_centers_param_name)); |
| 184 | + |
| 185 | + if (qnn_op_type == QNN_OP_RESIZE_BILINEAR) { |
| 186 | + // Parameter 'antialias' |
| 187 | + Qnn_Scalar_t qnn_antialias = QNN_SCALAR_INIT; |
| 188 | + qnn_antialias.dataType = QNN_DATATYPE_BOOL_8; |
| 189 | + qnn_antialias.bool8Value = false; |
| 190 | + |
| 191 | + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, |
| 192 | + qnn_antialias, QNN_OP_RESIZE_BILINEAR_PARAM_ANTIALIAS)); |
| 193 | + } |
| 194 | + } else { |
| 195 | + // Remain as QNN's Resize. |
| 196 | + // Parameter 'exclude_outside' |
| 197 | + Qnn_Scalar_t qnn_exclude_outside = QNN_SCALAR_INIT; |
| 198 | + qnn_exclude_outside.dataType = QNN_DATATYPE_BOOL_8; |
| 199 | + qnn_exclude_outside.bool8Value = false; |
| 200 | + |
| 201 | + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, |
| 202 | + qnn_exclude_outside, QNN_OP_RESIZE_PARAM_EXCLUDE_OUTSIDE)); |
| 203 | + |
| 204 | + // Parameter 'transformation_mode' |
| 205 | + Qnn_Scalar_t qnn_transformation_mode = QNN_SCALAR_INIT; |
| 206 | + qnn_transformation_mode.dataType = QNN_DATATYPE_UINT_32; |
| 207 | + qnn_transformation_mode.uint32Value = (supported_modes.at(interp_mode) == QNN_OP_RESIZE_INTERPOLATION_MODE_NEAREST) |
| 208 | + ? static_cast<uint32_t>(QNN_OP_RESIZE_TRANSFORMATION_MODE_HALF_PIXEL) |
| 209 | + : static_cast<uint32_t>(QNN_OP_RESIZE_TRANSFORMATION_MODE_ASYMMETRIC); |
| 210 | + |
| 211 | + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, |
| 212 | + qnn_transformation_mode, QNN_OP_RESIZE_PARAM_TRANSFORMATION_MODE)); |
| 213 | + |
| 214 | + // Parameter 'interpolation_mode' |
| 215 | + Qnn_Scalar_t qnn_interp_mode = QNN_SCALAR_INIT; |
| 216 | + qnn_interp_mode.dataType = QNN_DATATYPE_UINT_32; |
| 217 | + qnn_interp_mode.uint32Value = static_cast<uint32_t>(supported_modes.at(interp_mode)); |
| 218 | + |
| 219 | + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, |
| 220 | + qnn_interp_mode, QNN_OP_RESIZE_PARAM_INTERPOLATION_MODE)); |
| 221 | + |
| 222 | + // Parameter 'nearest_mode'. Process only when 'interpolation_mode' is NEAREST. |
| 223 | + if (qnn_interp_mode.uint32Value == QNN_OP_RESIZE_INTERPOLATION_MODE_NEAREST) { |
| 224 | + Qnn_Scalar_t qnn_nearest_mode = QNN_SCALAR_INIT; |
| 225 | + qnn_nearest_mode.dataType = QNN_DATATYPE_UINT_32; |
| 226 | + qnn_nearest_mode.uint32Value = static_cast<uint32_t>(QNN_OP_RESIZE_NEAREST_MODE_ROUND_PREFER_FLOOR); |
| 227 | + |
| 228 | + ORT_RETURN_IF_ERROR(AddQnnScalar(qnn_model_wrapper, node_unit, param_tensor_names, |
| 229 | + qnn_nearest_mode, QNN_OP_RESIZE_PARAM_NEAREST_MODE)); |
| 230 | + } |
| 231 | + } |
| 232 | + |
| 233 | + ORT_RETURN_IF_ERROR(ProcessOutputs(qnn_model_wrapper, node_unit, |
| 234 | + std::move(input_names), |
| 235 | + std::move(param_tensor_names), |
| 236 | + logger, do_op_validation, qnn_op_type)); |
| 237 | + |
| 238 | + return Status::OK(); |
| 239 | +} |
| 240 | + |
| 241 | +Status UpsampleOpBuilder::OverrideOutputQuantParam(QnnModelWrapper& qnn_model_wrapper, |
| 242 | + const NodeUnit& node_unit, |
| 243 | + const logging::Logger& logger, |
| 244 | + const std::vector<std::string>& input_names, |
| 245 | + size_t output_index, |
| 246 | + Qnn_DataType_t qnn_data_type, |
| 247 | + QnnQuantParamsWrapper& quant_param) const { |
| 248 | + if (!quant_param.IsPerTensor()) { |
| 249 | + return Status::OK(); |
| 250 | + } |
| 251 | + |
| 252 | + // Force Resize op's output to use the same quantization parameters as the input if nearly equal. |
| 253 | + // This helps the HTP backend employ certain optimizations. |
| 254 | + return SetOutputQParamEqualToInputIfNearlyEqual(qnn_model_wrapper, node_unit, logger, input_names, |
| 255 | + 0 /*input_index*/, output_index, qnn_data_type, quant_param); |
| 256 | +} |
| 257 | + |
| 258 | +void CreateUpsampleOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) { |
| 259 | + op_registrations.AddOpBuilder(op_type, std::make_unique<UpsampleOpBuilder>()); |
| 260 | +} |
| 261 | + |
| 262 | +} // namespace qnn |
| 263 | +} // namespace onnxruntime |
0 commit comments