Merge branch 'main' into main

inisis · web-flow · commit 21b98911388d · 2025-11-17T17:35:47.000+08:00
Signed-off-by: inisis &lt;desmond.yao@buaa.edu.cn&gt;
diff --git a/modelopt/onnx/autocast/graphsanitizer.py b/modelopt/onnx/autocast/graphsanitizer.py
@@ -18,6 +18,7 @@
 import numpy as np
 import onnx
 import onnx_graphsurgeon as gs
+import onnxscript
 from onnx import helper, numpy_helper
 
 import modelopt.onnx.autocast.utils as utils
@@ -144,6 +145,7 @@ def convert_opset(self) -> None:
         """Convert the model to the given opset version.
 
         The method checks all opset imports and converts the model if any are below the minimum version.
+        Uses onnxscript for conversion when available, which handles large models (>2GB) better.
         """
         # Check all opset imports
         default_opsets = list(self.model.opset_import)
@@ -163,10 +165,30 @@ def convert_opset(self) -> None:
         if any(op.version < self.min_opset for op in default_opsets):
             invalid_opsets = [op.version for op in default_opsets if op.version < self.min_opset]
             try:
-                self.model = onnx.version_converter.convert_version(self.model, self.min_opset)
+                logger.info(
+                    f"Converting model from opset {invalid_opsets} to {self.min_opset} using onnxscript..."
+                )
+
+                # Convert to onnxscript IR
+                model_ir = onnxscript.ir.serde.deserialize_model(self.model)
+
+                # onnxscript handles conversion of large models better than the standard ONNX version_converter
+                # Convert opset with fallback=True (automatically falls back to C API if needed)
+                onnxscript.version_converter.convert_version(
+                    model_ir, target_version=self.min_opset, fallback=True
+                )
+
+                # Convert back to ONNX proto
+                self.model = onnxscript.ir.serde.serialize_model(model_ir)
+                logger.info(f"Successfully converted model to opset {self.min_opset}")
             except Exception as e:
                 logger.warning(f"Failed to convert model to opset {self.min_opset}: {e!s}")
                 logger.warning(f"Attempting to continue with the original opsets: {invalid_opsets}")
+        else:
+            logger.debug(
+                f"No opset conversion needed. Current opset {[op.version for op in default_opsets]} >= min_opset "
+                "{self.min_opset}"
+            )
 
     def set_ir_version(self, max_ir_version: int | None) -> None:
         """Set the model's IR version to the maximum supported version.
diff --git a/modelopt/onnx/autocast/precisionconverter.py b/modelopt/onnx/autocast/precisionconverter.py
@@ -68,7 +68,7 @@ class InitializerConsumerTracker:
 OP_TYPES_NOT_SUPPORTED_IN_LOW_PRECISION = ["Upsample", "NonMaxSuppression", "Celu"]
 
 # Temporarily block these ops in low precision, as they are not supported yet
-OP_TYPES_NOT_SUPPORTED_IN_LOW_PRECISION.extend(["Scan", "If", "Loop", "LSTM"])
+OP_TYPES_NOT_SUPPORTED_IN_LOW_PRECISION.extend(["Scan", "If", "Loop"])
 
 # Mapping of op types to indices of inputs that should not be converted to low precision.
 SKIP_LOW_PRECISION_MAPPING_FP16 = {"Resize": {2}}
diff --git a/setup.py b/setup.py
@@ -52,6 +52,7 @@
         "onnxruntime-gpu~=1.22.0 ; platform_machine != 'aarch64' and platform_system != 'Darwin' and platform_system != 'Windows'",  # noqa: E501
         "onnxruntime-directml==1.20.0; platform_system == 'Windows'",
         "onnxscript",  # For test_onnx_dynamo_export unit test
+        "onnxscript",  # For autocast opset conversion and test_onnx_dynamo_export unit test
         "polygraphy>=0.49.22",
         "onnxslim",
     ],