[5680954][ONNX][Autocast] Fix 0-dim scalar constant issue (#691)

gcunhase · web-flow · commit b28616536231 · 2025-12-18T10:12:58.000-05:00
## What does this PR do? **Type of change:** Bug fix **Overview:** Ops with 0-dim scalar constants were being forced to have shape of 1 instead of 0. This PR fixes that issue. ## Usage ```python $ python -m modelopt.onnx.autocast --onnx_path=$MODEL_NAME.onnx ``` ## Testing Added unittest. ## Before your PR is "*Ready for review*"  - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes - **Did you write any new necessary tests?**: Yes - **Did you add or update any necessary documentation?**: No - **Did you update [Changelog](https://github.com/NVIDIA/Model-Optimizer/blob/main/CHANGELOG.rst)?**: No Signed-off-by: gcunhase <4861122+gcunhase@users.noreply.github.com>
diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py
@@ -1336,10 +1336,6 @@ def _convert_constant_values(self, const_node, cast_node: onnx.NodeProto) -> Non
         else:
             casted_data = original_data.astype(cast_dtype)
 
-        # Workaround for 0-dimensional tensors (scalars)
-        if casted_data.ndim == 0:
-            casted_data = casted_data.reshape(1)
-
         # Create a new constant node with casted data
         if cast_to_type == onnx.TensorProto.BFLOAT16:
             # Create TensorProto manually for bfloat16
diff --git a/tests/_test_utils/onnx/lib_test_models.py b/tests/_test_utils/onnx/lib_test_models.py
@@ -75,7 +75,7 @@ def forward(self, x):
 class SimpleMLP(nn.Module):
     """Simple toy model."""
 
-    def __init__(self, fi=16, f1=18, f2=20, fo=22):
+    def __init__(self, fi=16, f1=18, f2=20, fo=22, bias_add=False):
         super().__init__()
         self.net = nn.Sequential(
             nn.Linear(fi, f1, bias=False),
@@ -84,10 +84,13 @@ def __init__(self, fi=16, f1=18, f2=20, fo=22):
             nn.ReLU(),
             nn.Linear(f2, fo, bias=False),
         )
+        self.bias_add = bias_add
 
     def forward(self, x):
         for mod in self.net:
             x = mod(x)
+        if self.bias_add:
+            x += 1e-4
         return x
 
 
diff --git a/tests/unit/onnx/test_autocast_quantize.py b/tests/unit/onnx/test_autocast_quantize.py
@@ -36,20 +36,21 @@ def assert_nodes_are_quantized(nodes):
 
 
 @pytest.mark.parametrize("keep_io_types", [True, False])
-def test_autocast_quantize_int8(tmp_path, keep_io_types):
-    model_torch = SimpleMLP()
+@pytest.mark.parametrize("bias_add", [True, False])
+def test_autocast_quantize_int8(tmp_path, keep_io_types, bias_add):
+    model_torch = SimpleMLP(bias_add=bias_add)
     input_tensor = torch.randn(2, 16, 16)
     low_precision_type = "fp16"
 
-    onnx_path = os.path.join(tmp_path, "model.onnx")
+    onnx_path = os.path.join(tmp_path, f"model{'_biasAdd' if bias_add else ''}.onnx")
     export_as_onnx(model_torch, input_tensor, onnx_filename=onnx_path)
 
     # Convert model to low precision
     converted_model = convert_to_mixed_precision(
         onnx_path, keep_io_types=keep_io_types, low_precision_type=low_precision_type
     )
     converted_model_path = onnx_path.replace(
-        ".onnx", f".{low_precision_type}.{'keepIOTypes' if keep_io_types else ''}.onnx"
+        ".onnx", f".{low_precision_type}{'_keepIOTypes' if keep_io_types else ''}.onnx"
     )
     onnx.save(converted_model, converted_model_path)