From fa76fc3d963f68bc10a62b5300c6c70d63784d1e Mon Sep 17 00:00:00 2001 From: Jambay Kinley Date: Fri, 21 Feb 2025 08:26:33 +0000 Subject: [PATCH 1/2] nodes_to_exclude in get_qnn_qdq_config --- .../quantization/execution_providers/qnn/quant_config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py index 9384bab13f3cc..10dac2b87021d 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py @@ -55,6 +55,7 @@ def get_qnn_qdq_config( stride: int | None = None, calibration_providers: list[str] | None = None, op_types_to_quantize: list[str] | None = None, + nodes_to_exclude: list[str] | None = None, ) -> StaticQuantConfig: """ Returns a static quantization configuration suitable for running QDQ models on QNN EP. @@ -122,6 +123,8 @@ def get_qnn_qdq_config( calibration_providers: Execution providers to run the session during calibration. Default is None which uses [ "CPUExecutionProvider" ]. op_types_to_quantize: If set to None, all operator types will be quantized except for OP_TYPES_TO_EXCLUDE + nodes_to_exclude: List of nodes names to exclude from quantization. The nodes in this list will be excluded from + quantization when it is not None. Returns: A StaticQuantConfig object @@ -167,10 +170,13 @@ def get_qnn_qdq_config( ) op_types_to_quantize_set = set(op_types_to_quantize) if op_types_to_quantize else None + nodes_to_exclude_set = set(nodes_to_exclude) if nodes_to_exclude else None for node in model.graph.node: if op_types_to_quantize_set and node.op_type not in op_types_to_quantize_set: continue + if nodes_to_exclude_set and node.name in nodes_to_exclude_set: + continue op_types.add(node.op_type) qnn_compat.process_node(node) @@ -201,6 +207,7 @@ def get_qnn_qdq_config( op_types_to_quantize=op_types_to_quantize if op_types_to_quantize else list(op_types.difference(OP_TYPES_TO_EXCLUDE)), + nodes_to_exclude=nodes_to_exclude, per_channel=per_channel, use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD), calibration_providers=calibration_providers, From c85f5dd7e62115f1fed673321c021fb18d3bee88 Mon Sep 17 00:00:00 2001 From: Jambay Kinley Date: Fri, 21 Feb 2025 00:40:06 -0800 Subject: [PATCH 2/2] lint fix Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .../tools/quantization/execution_providers/qnn/quant_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py index 10dac2b87021d..ea995d4707ba3 100644 --- a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py +++ b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py @@ -123,7 +123,7 @@ def get_qnn_qdq_config( calibration_providers: Execution providers to run the session during calibration. Default is None which uses [ "CPUExecutionProvider" ]. op_types_to_quantize: If set to None, all operator types will be quantized except for OP_TYPES_TO_EXCLUDE - nodes_to_exclude: List of nodes names to exclude from quantization. The nodes in this list will be excluded from + nodes_to_exclude: List of nodes names to exclude from quantization. The nodes in this list will be excluded from quantization when it is not None. Returns: