Replace ONNX simplification package from onnxsim to onnxslim (#478)

inisis · kevalmorabia97 · web-flow · commit 261858c9b89e · 2025-11-27T01:00:49.000+05:30
## What does this PR do? **Type of change:** Add onnxslim support **Overview:** [Onnxslim](https://github.com/inisis/OnnxSlim) is under active development and committed to long-time-support, it's easy to use and is dependent on very few packages. ## Usage ```python $ python -m modelopt.onnx.quantization --onnx_path=$MODEL_NAME.onnx --simplify ``` ## Testing  ## Before your PR is "*Ready for review*"  - **Make sure you read and follow [Contributor guidelines](https://github.com/NVIDIA/TensorRT-Model-Optimizer/blob/main/CONTRIBUTING.md)** and your commits are signed. - **Is this change backward compatible?**: Yes/No  - **Did you write any new necessary tests?**: Yes/No - **Did you add or update any necessary documentation?**: Yes/No - **Did you update [Changelog](https://github.com/NVIDIA/TensorRT-Model-Optimizer/blob/main/CHANGELOG.rst)?**: Yes/No  ## Additional Information  --------- Signed-off-by: inisis <desmond.yao@buaa.edu.cn> Co-authored-by: Keval Morabia <28916987+kevalmorabia97@users.noreply.github.com>
diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml
@@ -19,8 +19,6 @@ unit:
     TRANSFORMERS: latest
   image: python:3.$PYTHON
   before_script:
-    # Install cmake to build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
-    - if [ "$PYTHON" = "12" ]; then apt-get update && apt-get install -y cmake; fi
     - pip install tox
   script:
     - tox -e py3$PYTHON-torch$TORCH-tf_$TRANSFORMERS-unit
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -28,6 +28,7 @@ Model Optimizer Changelog (Linux)
 **Misc**
 
 - Bump minimum recommended transformers version to 4.53.
+- Replace ONNX simplification package from ``onnxsim`` to ``onnxslim``.
 
 0.39 (2025-11-11)
 ^^^^^^^^^^^^^^^^^
diff --git a/modelopt/onnx/quantization/quantize.py b/modelopt/onnx/quantization/quantize.py
@@ -41,6 +41,7 @@
 import onnx
 import onnx.onnx_cpp2py_export.checker as C
 import onnx_graphsurgeon as gs
+import onnxslim
 
 from modelopt.onnx.logging_config import configure_logging, logger
 from modelopt.onnx.op_types import is_data_dependent_shape_op
@@ -133,16 +134,8 @@ def _preprocess_onnx(
     if simplify:
         logger.info("Attempting to simplify model")
         try:
-            import onnxsim
-        except ModuleNotFoundError as e:
-            logger.warning(
-                "onnxsim is not installed. Please install it with 'pip install onnxsim'."
-            )
-            raise e
-
-        try:
-            model_simp, check = onnxsim.simplify(onnx_model)
-            if check:
+            model_simp = onnxslim.slim(onnx_model, skip_fusion_patterns=["FusionGemm"])
+            if model_simp:
                 onnx_model = model_simp
                 onnx_path = os.path.join(output_dir, f"{model_name}_simp.onnx")
                 save_onnx(onnx_model, onnx_path, use_external_data_format)
diff --git a/setup.py b/setup.py
@@ -52,7 +52,7 @@
         "onnxruntime-gpu~=1.22.0 ; platform_machine != 'aarch64' and platform_system != 'Darwin' and platform_system != 'Windows'",  # noqa: E501
         "onnxruntime-directml==1.20.0; platform_system == 'Windows'",
         "onnxscript",  # For autocast opset conversion and test_onnx_dynamo_export unit test
-        "onnxsim ; python_version < '3.12' and platform_machine != 'aarch64'",
+        "onnxslim>=0.1.76",
         "polygraphy>=0.49.22",
     ],
     "hf": [
diff --git a/tests/gpu/onnx/test_simplify.py b/tests/gpu/onnx/test_simplify.py
@@ -57,14 +57,14 @@ def test_onnx_simplification(tmp_path):
         assert os.path.isfile(output_onnx_path), "Quantized ONNX was not found!"
 
         # Load the simplified model and check that the model doesn't contain Identity nodes,
-        #   only 3 layers (Conv->BN->Relu).
+        #   only 2 layers (Conv->Relu).
         graph = gs.import_onnx(onnx.load(simplified_onnx_path))
         identity_nodes = [n for n in graph.nodes if n.op == "Identity"]
         assert not identity_nodes, "Simplified ONNX model contains Identity nodes but it shouldn't."
-        assert len(graph.nodes) == 3, (
-            f"Number of nodes doesn't match the expected: {len(graph.nodes)} vs 3."
+        assert len(graph.nodes) == 2, (
+            f"Number of nodes doesn't match the expected: {len(graph.nodes)} vs 2."
         )
-        assert all(n.op in ["Conv", "BatchNormalization", "Relu"] for n in graph.nodes), (
+        assert all(n.op in ["Conv", "Relu"] for n in graph.nodes), (
             "Graph contains more ops than expected."
         )
 
diff --git a/tox.ini b/tox.ini
@@ -18,9 +18,6 @@ deps =
     torch28: torchvision~=0.23.0
     torch29: torchvision~=0.24.0
 
-    # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
-    py312: onnxsim
-
     # Install megatron-core for special unit tests
     megatron-core
 
@@ -42,9 +39,6 @@ deps =
     # Make sure torch 2.9 is used
     torchvision~=0.24.0
 
-    # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
-    py312: onnxsim
-
     # ONNX unit tests heavily rely on torch / torchvision
     onnx: .[onnx,dev-test]
     onnx: torchvision
@@ -80,9 +74,6 @@ commands_pre =
     # Install Eagle-3 test dependencies
     pip install tiktoken blobfile sentencepiece
 
-    # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
-    py312: pip install onnxsim
-
     # NOTE: User is expected to have correct torch-cuda version pre-installed if using --current-env
     #   to avoid possible CUDA version mismatch
     pip install -e .[all,dev-test]