Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions onnxruntime/python/tools/quantization/onnx_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1071,7 +1071,7 @@ def quantize_weight_per_channel(
scale_name,
zp_name,
QuantizedValueType.Initializer,
None,
channel_axis,
)
self.quantized_value_map[weight_name] = quantized_value

Expand All @@ -1096,8 +1096,9 @@ def _dequantize_value(self, value_name):
if self.model.model.producer_name != "onnx-quantizer" or (
self.model.model.producer_name == "onnx-quantizer" and scale_init is not None
):
# axis is not specified so scale_init must be a scalar.
assert scale_init is None or onnx.numpy_helper.to_array(scale_init).size == 1
# Per-tensor (axis=None) requires a scalar scale.
if quantized_value.axis is None:
assert scale_init is None or onnx.numpy_helper.to_array(scale_init).size == 1

dqlinear_name = value_name + "_DequantizeLinear"
dqlinear_node = self.model.find_node_by_name(dqlinear_name, self.new_nodes, self.model.graph())
Expand All @@ -1108,7 +1109,11 @@ def _dequantize_value(self, value_name):
quantized_value.zp_name,
]
dequantize_node = onnx.helper.make_node(
"DequantizeLinear", dqlinear_inputs, [value_name], dqlinear_name
"DequantizeLinear",
dqlinear_inputs,
[value_name],
dqlinear_name,
axis=quantized_value.axis,
)
return dequantize_node
else:
Expand Down
89 changes: 89 additions & 0 deletions onnxruntime/test/python/quantization/test_quant_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,95 @@ def get_next(self):
assert os.path.exists(preprocessed_path), f"missing output {preprocessed_path!r}"
assert os.path.exists(quantized_path), f"missing output {quantized_path!r}"

def test_dynamic_quantize_per_channel_emits_axis_attribute(self):
"""Per-channel dynamic quantization must emit axis on DequantizeLinear nodes.

Regression test for https://github.com/microsoft/onnxruntime/issues/19997.
`quantize_dynamic(per_channel=True)` previously constructed QuantizedValue
with axis=None and built DequantizeLinear without an axis attribute, producing
an invalid per-tensor dequantization for per-channel quantized weights.
When the per-channel quantized weight also appears as a graph output,
_dequantize_outputs calls _dequantize_value, which triggered an assertion
error (scale not scalar) and would have emitted a DequantizeLinear lacking
the required axis attribute.
"""
try:
import numpy as np # noqa: PLC0415
import onnx # noqa: PLC0415
from onnx import TensorProto, helper, numpy_helper # noqa: PLC0415

from onnxruntime.quantization import QuantType, quantize_dynamic # noqa: PLC0415
except ImportError as exc:
raise unittest.SkipTest(f"Required import missing: {exc}") from exc

# Build a model: input (5, 4) @ weight (4, 8) -> output (5, 8).
# The weight is also passed through Identity and exposed as a second graph
# output so that _dequantize_outputs calls _dequantize_value on the
# per-channel-quantized weight initializer.
# Weight axis=1 is the output-feature axis (per-channel quantization target).
Comment on lines +91 to +95
Copy link

Copilot AI Apr 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test docstring/comments suggest the _dequantize_outputs -> _dequantize_value path is exercised because the per-channel weight is a graph output, but this model outputs weight_out (Identity output), not the initializer weight. In practice the DequantizeLinear insertion here is likely triggered when the quantizer processes the unsupported Identity and dequantizes its (now-quantized) weight input. Updating the comment/docstring to match the actual mechanism would make the regression intent clearer and avoid confusion for future maintainers.

Copilot uses AI. Check for mistakes.
np.random.seed(42)
weight_data = np.random.normal(0, 0.1, (4, 8)).astype(np.float32)
weight_init = numpy_helper.from_array(weight_data, name="weight")

input_vi = helper.make_tensor_value_info("input", TensorProto.FLOAT, [5, 4])
output_vi = helper.make_tensor_value_info("output", TensorProto.FLOAT, [5, 8])
weight_out_vi = helper.make_tensor_value_info("weight_out", TensorProto.FLOAT, [4, 8])

matmul_node = helper.make_node("MatMul", ["input", "weight"], ["output"])
identity_node = helper.make_node("Identity", ["weight"], ["weight_out"])

graph = helper.make_graph(
[matmul_node, identity_node],
"test_graph",
[input_vi],
[output_vi, weight_out_vi],
[weight_init],
)
model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 17)])
model.ir_version = 8

with tempfile.TemporaryDirectory() as tmp:
model_fp_path = os.path.join(tmp, "model_fp.onnx")
model_q_path = os.path.join(tmp, "model_q.onnx")
onnx.save(model, model_fp_path)

# This must not raise AssertionError due to per-channel scale not being scalar.
quantize_dynamic(
model_fp_path,
model_q_path,
per_channel=True,
weight_type=QuantType.QInt8,
)

q_model = onnx.load(model_q_path)

# Find the DequantizeLinear node that dequantizes the weight initializer.
init_names = {init.name for init in q_model.graph.initializer}
dq_nodes = [n for n in q_model.graph.node if n.op_type == "DequantizeLinear"]
self.assertGreater(len(dq_nodes), 0, "Expected at least one DequantizeLinear node")

weight_dq = None
for node in dq_nodes:
if node.input[0] in init_names:
weight_dq = node
break
self.assertIsNotNone(weight_dq, "No DequantizeLinear node found with a weight initializer as input")

# The axis attribute must be present.
# MatMulInteger passes axis=-1 (last dimension) to quantize_weight_per_channel.
axis_attrs = [attr for attr in weight_dq.attribute if attr.name == "axis"]
self.assertEqual(len(axis_attrs), 1, "DequantizeLinear node is missing the 'axis' attribute")
# MatMulInteger quantizes weight with axis=-1 (default in __quantize_inputs).
self.assertEqual(axis_attrs[0].i, -1, f"Expected axis=-1, got axis={axis_attrs[0].i}")

# The scale initializer must be 1-D with size > 1 (truly per-channel, not collapsed).
scale_name = weight_dq.input[1]
scale_init = next((i for i in q_model.graph.initializer if i.name == scale_name), None)
self.assertIsNotNone(scale_init, f"Scale initializer '{scale_name}' not found")
scale_array = numpy_helper.to_array(scale_init)
self.assertEqual(scale_array.ndim, 1, f"Expected 1-D scale, got shape {scale_array.shape}")
self.assertGreater(scale_array.size, 1, "Scale has only one element; expected per-channel scale")


if __name__ == "__main__":
unittest.main(verbosity=2)