|
4 | 4 | # Licensed under the MIT License. See License.txt in the project root for
|
5 | 5 | # license information.
|
6 | 6 | # --------------------------------------------------------------------------
|
| 7 | +from __future__ import annotations |
7 | 8 |
|
8 | 9 | import itertools
|
| 10 | +import os |
| 11 | +import tempfile |
9 | 12 | import unittest
|
10 | 13 |
|
11 | 14 | import numpy as np
|
12 | 15 | import onnx
|
13 | 16 | from onnx import TensorProto, helper
|
14 |
| -from op_test_utils import TestDataFeeds, check_model_correctness, check_op_type_count, check_qtype_by_node_type |
| 17 | +from op_test_utils import ( |
| 18 | + TestDataFeeds, |
| 19 | + check_model_correctness, |
| 20 | + check_op_type_count, |
| 21 | + check_qtype_by_node_type, |
| 22 | + get_tensor_consumers_and_producers, |
| 23 | +) |
15 | 24 |
|
16 | 25 | from onnxruntime.quantization import QuantFormat, QuantType, quantize_dynamic, quantize_static
|
17 | 26 |
|
@@ -519,5 +528,159 @@ def test_pad_with_empty_string_input_name(self):
|
519 | 528 | self.assertNotEqual(name, "_quantized")
|
520 | 529 |
|
521 | 530 |
|
| 531 | +class TestQDQPad(unittest.TestCase): |
| 532 | + @classmethod |
| 533 | + def setUpClass(cls): |
| 534 | + cls._tmp_model_dir = tempfile.TemporaryDirectory(prefix="ort.qdq.pad_") |
| 535 | + |
| 536 | + # Note: swap with the commented line if you want to see the models in local test dir. |
| 537 | + cls._tmp_dir_path = cls._tmp_model_dir.name |
| 538 | + # cls._tmp_dir_path = "." |
| 539 | + |
| 540 | + @classmethod |
| 541 | + def tearDownClass(cls): |
| 542 | + cls._tmp_model_dir.cleanup() |
| 543 | + |
| 544 | + def build_pad_model( |
| 545 | + self, |
| 546 | + mode: str, |
| 547 | + constant_value: float | None = None, |
| 548 | + opset: int = 21, |
| 549 | + float_type: onnx.TensorProto.DataType = onnx.TensorProto.FLOAT, |
| 550 | + ) -> onnx.ModelProto: |
| 551 | + input_0 = onnx.helper.make_tensor_value_info("input_0", float_type, (3, 2)) |
| 552 | + output_0 = onnx.helper.make_tensor_value_info("output_0", float_type, (3, 4)) |
| 553 | + |
| 554 | + initializers = [] |
| 555 | + pad_input_names = ["input_0"] |
| 556 | + attrs = {"mode": mode} |
| 557 | + |
| 558 | + pads_data = np.array([0, 2, 0, 0], dtype=np.int64) # Pad two vals at beginning of axis 1. |
| 559 | + if opset >= 11: |
| 560 | + initializers.append(onnx.numpy_helper.from_array(pads_data, "pads")) |
| 561 | + pad_input_names.append("pads") |
| 562 | + else: |
| 563 | + attrs["pads"] = pads_data.tolist() |
| 564 | + |
| 565 | + if mode == "constant" and constant_value is not None: |
| 566 | + if opset >= 11: |
| 567 | + initializers.append(onnx.helper.make_tensor("constant_value", float_type, [], [constant_value])) |
| 568 | + pad_input_names.append("constant_value") |
| 569 | + else: |
| 570 | + attrs["value"] = float(constant_value) |
| 571 | + |
| 572 | + pad_node = onnx.helper.make_node("Pad", pad_input_names, ["output_0"], name="Pad0", **attrs) |
| 573 | + |
| 574 | + graph = onnx.helper.make_graph( |
| 575 | + [pad_node], |
| 576 | + "PadFloat", |
| 577 | + [input_0], |
| 578 | + [output_0], |
| 579 | + initializer=initializers, |
| 580 | + ) |
| 581 | + opset_imports = [onnx.helper.make_opsetid("", opset)] |
| 582 | + model = onnx.helper.make_model(graph, opset_imports=opset_imports) |
| 583 | + model = onnx.shape_inference.infer_shapes(model) |
| 584 | + onnx.checker.check_model(model, True) |
| 585 | + return model |
| 586 | + |
| 587 | + def test_qdq_pad_qparams(self): |
| 588 | + """ |
| 589 | + Test that QDQ Pad has equal scale/zero-point for its input and output for certain configurations. |
| 590 | + """ |
| 591 | + test_configs = [ |
| 592 | + # Opset 21 |
| 593 | + ("constant", None, 21, onnx.TensorProto.FLOAT), |
| 594 | + ("constant", None, 21, onnx.TensorProto.FLOAT16), |
| 595 | + ("constant", 0, 21, onnx.TensorProto.FLOAT), |
| 596 | + ("constant", 0, 21, onnx.TensorProto.FLOAT16), |
| 597 | + ("constant", 10.0, 21, onnx.TensorProto.FLOAT), |
| 598 | + ("constant", 10.0, 21, onnx.TensorProto.FLOAT16), |
| 599 | + ("reflect", None, 21, onnx.TensorProto.FLOAT), |
| 600 | + ("reflect", None, 21, onnx.TensorProto.FLOAT16), |
| 601 | + ("edge", None, 21, onnx.TensorProto.FLOAT), |
| 602 | + ("edge", None, 21, onnx.TensorProto.FLOAT16), |
| 603 | + ("wrap", None, 21, onnx.TensorProto.FLOAT), |
| 604 | + ("wrap", None, 21, onnx.TensorProto.FLOAT16), |
| 605 | + # Model with opset 10 will use pad of opset 2, which uses attributes instead of inputs. |
| 606 | + # Opset 10 Q/DQ ops don't support float16. |
| 607 | + ("constant", None, 10, onnx.TensorProto.FLOAT), |
| 608 | + ("constant", 0, 10, onnx.TensorProto.FLOAT), |
| 609 | + ("constant", 10.0, 10, onnx.TensorProto.FLOAT), |
| 610 | + ("reflect", None, 10, onnx.TensorProto.FLOAT), |
| 611 | + ("edge", None, 10, onnx.TensorProto.FLOAT), |
| 612 | + ] |
| 613 | + |
| 614 | + for pad_mode, constant_value, opset, float_type in test_configs: |
| 615 | + with self.subTest(pad_mode=pad_mode, constant_value=constant_value, opset=opset, float_type=float_type): |
| 616 | + label = f"_{pad_mode}_{constant_value}_opset{opset}_{onnx.TensorProto.DataType.Name(float_type)}" |
| 617 | + float_model_path = os.path.join(self._tmp_dir_path, f"pad{label}.float.onnx") |
| 618 | + qdq_model_path = os.path.join(self._tmp_dir_path, f"pad{label}.qdq.onnx") |
| 619 | + |
| 620 | + float_model = self.build_pad_model(pad_mode, constant_value, opset=opset, float_type=float_type) |
| 621 | + onnx.save_model(float_model, float_model_path) |
| 622 | + |
| 623 | + # Create a data reader |
| 624 | + np_dtype = onnx.helper.tensor_dtype_to_np_dtype(float_type) |
| 625 | + input_data_list = [ |
| 626 | + {"input_0": np.array([[1.0, 1.2], [2.3, 3.4], [4.5, 5.7]], dtype=np_dtype)}, |
| 627 | + {"input_0": np.array([[2.3, 3.4], [4.5, 5.7], [1.0, 1.2]], dtype=np_dtype)}, |
| 628 | + ] |
| 629 | + data_reader = TestDataFeeds(input_data_list) |
| 630 | + |
| 631 | + # quantize model to QDQ |
| 632 | + quantize_static( |
| 633 | + float_model_path, |
| 634 | + qdq_model_path, |
| 635 | + data_reader, |
| 636 | + quant_format=QuantFormat.QDQ, |
| 637 | + activation_type=QuantType.QUInt8, |
| 638 | + weight_type=QuantType.QInt8, |
| 639 | + ) |
| 640 | + |
| 641 | + expected_op_counts = {"DequantizeLinear": 2, "QuantizeLinear": 2, "Pad": 1} |
| 642 | + if constant_value is not None and opset >= 11: |
| 643 | + expected_op_counts["DequantizeLinear"] += 1 # The constant padding value is quantized. |
| 644 | + check_op_type_count(self, qdq_model_path, **expected_op_counts) |
| 645 | + |
| 646 | + if pad_mode != "reflect": |
| 647 | + # Do not check model correctness for 'reflect' mode because ONNX Runtime implementation does |
| 648 | + # not match the ONNX reference implementation. See the following issue: |
| 649 | + # https://github.com/microsoft/onnxruntime/issues/20801 |
| 650 | + data_reader.rewind() |
| 651 | + check_model_correctness(self, float_model_path, qdq_model_path, data_reader.get_next()) |
| 652 | + |
| 653 | + qdq_model = onnx.load_model(qdq_model_path) |
| 654 | + quant_output_same_as_input = False |
| 655 | + |
| 656 | + if pad_mode in ("reflect", "edge", "wrap"): |
| 657 | + quant_output_same_as_input = True |
| 658 | + |
| 659 | + if pad_mode == "constant" and constant_value in (None, 0): |
| 660 | + quant_output_same_as_input = True |
| 661 | + |
| 662 | + pad_node = next((node for node in qdq_model.graph.node if node.op_type == "Pad"), None) |
| 663 | + self.assertNotEqual(pad_node, None) |
| 664 | + self.assertEqual(pad_node.op_type, "Pad") |
| 665 | + |
| 666 | + # Get the parent and child nodes of the Pad and check that they are DQ/Q. |
| 667 | + consumers, producers = get_tensor_consumers_and_producers(qdq_model) |
| 668 | + input_dq_node = producers.get(pad_node.input[0], None) |
| 669 | + self.assertNotEqual(input_dq_node, None) |
| 670 | + self.assertEqual(input_dq_node.op_type, "DequantizeLinear") |
| 671 | + |
| 672 | + output_q_node = consumers.get(pad_node.output[0], [None])[0] |
| 673 | + self.assertNotEqual(output_q_node, None) |
| 674 | + self.assertEqual(output_q_node.op_type, "QuantizeLinear") |
| 675 | + |
| 676 | + # Check that the Pad's input DQ uses the same scale/zp as the Pad's output Q. |
| 677 | + if quant_output_same_as_input: |
| 678 | + self.assertEqual(input_dq_node.input[1], output_q_node.input[1]) # Same scale |
| 679 | + self.assertEqual(input_dq_node.input[2], output_q_node.input[2]) # Same zero-point |
| 680 | + else: |
| 681 | + self.assertNotEqual(input_dq_node.input[1], output_q_node.input[1]) |
| 682 | + self.assertNotEqual(input_dq_node.input[2], output_q_node.input[2]) |
| 683 | + |
| 684 | + |
522 | 685 | if __name__ == "__main__":
|
523 | 686 | unittest.main()
|
0 commit comments