NVIDIA · pranavm-nvidia · Oct 9, 2024 · Sep 24, 2024 · Sep 26, 2024 · Sep 26, 2024
@@ -42,15 +42,15 @@ jobs:
         username: ${{ github.actor }}
         password: ${{ secrets.GITHUB_TOKEN }}
 
-    - name: build-new-container
+    - name: Build new container
       if: steps.filter.outputs.local_container == 'true'
       uses: docker/build-push-action@v6
       with:
         context: tripy/
         tags: ${{ env.NEW_TEST_IMAGE }}
         push: false
 
-    - name: pull-latest-container
+    - name: Pull latest container
       if: steps.filter.outputs.local_container != 'true'
       run: docker pull ${{ env.l0_image }}
 
@@ -63,10 +63,31 @@ jobs:
           python3 docs/generate_rsts.py
           sphinx-build build/doc_sources build/docs -c docs/ -j 4 -W -n
 
-    - name: run-test
+    - name: Run tests
       uses: addnab/docker-run-action@v3
       with:
         image: ${{ env.l0_image }}
         options: --gpus all -v ${{ github.workspace }}/tripy:/tripy
         run: |
-          pytest --cov=tripy/ --cov-config=.coveragerc tests/ -v -m "not l1 and not manual" -n 4 --durations=15
+          pytest --cov=tripy/ --cov-config=.coveragerc tests/ -v -m "not l1 and not manual" -n 4 --durations=15 --ignore tests/performance
+
+    - name: Run performance benchmarks
+      uses: addnab/docker-run-action@v3
+      with:
+        image: ${{ env.l0_image }}
+        options: --gpus all -v ${{ github.workspace }}/tripy:/tripy
+        run: |
+          pytest tests/performance -v -m "not l1 and not manual" --benchmark-warmup=on --benchmark-json benchmark.json
+
+    - name: Store benchmark result
+      uses: benchmark-action/github-action-benchmark@v1
+      with:
+        tool: 'pytest'
+        output-file-path: ${{ github.workspace }}/tripy/benchmark.json
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        auto-push: true
+        # Show alert with commit comment on detecting possible performance regression
+        alert-threshold: '105%'
+        comment-on-alert: true
+        fail-on-alert: true
+        gh-pages-branch: benchmarks
@@ -53,4 +53,4 @@ jobs:
     - name: l1-test
       run: |
         cd /tripy/
-        pytest --cov=tripy/ --cov-config=.coveragerc tests/ -v -m "not manual" -n 4 --durations=15
+        pytest --cov=tripy/ --cov-config=.coveragerc tests/ -v -m "l1 and not manual" -n 4 --durations=15 --ignore tests/performance
diff --git a/tripy/pyproject.toml b/tripy/pyproject.toml
@@ -55,6 +55,10 @@ test = [
   "pytest-profiling==1.7.0",
   "pytest-cov==4.1.0",
   "pytest-xdist==3.6.1",
+  "pytest-benchmark==4.0.0",
+  "pytest-lazy-fixture==0.6.3",
+  # Triton is required for torch.compile
+  "triton==3.0.0",
   "snakeviz==2.2.0",
   "coverage==7.4.1",
   "vulture==2.11",

diff --git a/tripy/tests/README.md b/tripy/tests/README.md
@@ -10,15 +10,20 @@ The `tests/integration` directory captures the latter group of tests.
 
 You can run all tests locally in the development container by running:
 ```bash
-pytest tests/ -v
+pytest tests/ -v -n 4 --dist worksteal --ignore tests/performance
+pytest tests/performance -v
 ```
 
+Performance tests are run separately because they must run serially to ensure
+accurate measurements.
+
 You can also provide marker arguments to only run specific test cadences
 (see [the test cadence section](#test-cadence) below). For example, to run only
 L0 tests, use:
 
 ```bash
-pytest tests/ -v -m "not l1 and not manual" -n 4
+pytest tests/ -v -m "not l1 and not manual" -n 4 --dist worksteal --ignore tests/performance
+pytest tests/performance -v -m "not l1 and not manual"
 ```
 
 
@@ -56,7 +61,7 @@ http://localhost:8080/snakeviz/%2Ftripy%2Fprof%2Fcombined.prof
 You can generate code coverage reports locally by running:
 
 ```bash
-pytest --cov=tripy/ --cov-report=html --cov-config=.coveragerc tests/ -n 4 -v
+pytest --cov=tripy/ --cov-report=html --cov-config=.coveragerc tests/ -v
 ```
 
 To view the report, open the `htmlcov/index.html` file from the root directory in a browser.
@@ -125,3 +130,26 @@ Any caption other than `Example` will have a prefix of `Example: ` prepended to
 
 **NOTE: The docstrings must *not* import `tripy`, `numpy`, or `torch`. They will be imported**
     **automatically as `tp`, `np`, and `torch` respectively. Any other modules will need to be imported.**
+
+
+### Performance Tests
+
+In addition to functional tests, we also run performance tests of three kinds:
+
+1. Regression tests, which compare current Tripy performance to historical data
+    to ensure we don't regress. We use the
+    [`pytest-benchmark`](https://pytest-benchmark.readthedocs.io/en/latest/)
+    plugin to gather data and the
+    [Continuous Benchmark GitHub Action](https://github.com/marketplace/actions/continuous-benchmark)
+    for regression testing.
+
+    You can view graphs and charts of the historical data by opening the
+    [`index.html` file from the `benchmarks` branch](https://github.com/NVIDIA/TensorRT-Incubator/blob/benchmarks/dev/bench/index.html)
+    in a browser.
+
+2. Comparative tests, which compare Tripy and `torch.compile`.
+
+3. Overhead tests, which check the overhead introduced by Tripy as compared
+    to running the underlying MLIR executable by itself. This is done by measuring
+    how long it takes to run an empty executable since in that case, all the time
+    is taken by the Tripy wrapper code.
diff --git a/tripy/tests/backend/api/test_executable.py b/tripy/tests/backend/api/test_executable.py
@@ -66,12 +66,12 @@ def test_kwargs(self, single_return_executable):
             (
                 [tp.ones((2, 2), dtype=tp.float32), tp.ones((2, 2), dtype=tp.float32)],
                 {"b": tp.ones((2, 2), dtype=tp.float32)},
-                "Extra keyword arguments: \['b'\]",
+                r"Extra keyword arguments: \['b'\]",
             ),
             (
                 [tp.ones((2, 2), dtype=tp.float32), tp.ones((2, 2), dtype=tp.float32)],
                 {"c": tp.ones((2, 2), dtype=tp.float32)},
-                "Extra keyword arguments: \['c'\]",
+                r"Extra keyword arguments: \['c'\]",
             ),
         ],
     )

diff --git a/tripy/tests/frontend/module/test_module.py b/tripy/tests/frontend/module/test_module.py
@@ -46,7 +46,7 @@ def test_get_set_attr(self, network):
 
     def test_incompatible_parameter_cannot_be_set(self, network):
         with helper.raises(
-            tp.TripyException, match="New parameter shape: \[2, 3\] is not compatible with current shape: \[2\]"
+            tp.TripyException, match=r"New parameter shape: \[2, 3\] is not compatible with current shape: \[2\]"
         ):
             network.param = tp.Parameter(tp.ones((2, 3)))
 

diff --git a/tripy/tests/frontend/test_shape.py b/tripy/tests/frontend/test_shape.py
@@ -50,13 +50,19 @@ class TestShapeScalar:
             np.array(2, dtype=np.int32),
         ],
     )
-    def test_scalar_shape(self, value):
+    def test_construction(self, value):
         s = tp.ShapeScalar(value)
 
         assert isinstance(s, tp.ShapeScalar)
         assert s.trace_tensor.producer.inputs == []
 
-    def test_scalar_shape_str_method(self):
+    def test_int_conversion(self):
+        val = 4
+        s = tp.ShapeScalar(val)
+
+        assert int(s) == val
+
+    def test_str_method(self):
         s = tp.ShapeScalar(12)
         assert s.__str__() == f"shape_scalar(12)"
 

diff --git a/tripy/tests/frontend/test_tensor.py b/tripy/tests/frontend/test_tensor.py
@@ -22,12 +22,12 @@
 import numpy as np
 import pytest
 import torch
+from tests.conftest import DATA_TYPE_TEST_CASES
+from tests.helper import NUMPY_TO_TRIPY
 
 import tripy as tp
-from tests.conftest import DATA_TYPE_TEST_CASES
-from tests.helper import NUMPY_TYPES, np_to_tripy_dtype
-from tripy.utils.stack_info import SourceInfo
 from tripy.common.utils import get_element_type
+from tripy.utils.stack_info import SourceInfo
 
 
 class TestTensor:
@@ -52,12 +52,12 @@ def test_tensor_device(self, kind):
         assert isinstance(a.trace_tensor.producer, tp.frontend.trace.ops.Storage)
         assert a.trace_tensor.producer.device.kind == kind
 
-    @pytest.mark.parametrize("dtype", NUMPY_TYPES)
+    @pytest.mark.parametrize("dtype", NUMPY_TO_TRIPY.keys())
     def test_dtype_from_numpy(self, dtype):
 
         np_array = np.array([1, 2, 3], dtype=dtype)
         tensor = tp.Tensor(np_array)
-        tp_dtype = np_to_tripy_dtype(dtype)
+        tp_dtype = NUMPY_TO_TRIPY[dtype]
         assert tensor.dtype == tp_dtype
 
     def test_bool_tensor(self):

diff --git a/tripy/tests/frontend/trace/ops/test_reshape.py b/tripy/tests/frontend/trace/ops/test_reshape.py
@@ -55,7 +55,7 @@ def test_incorrect_dims(self):
 
         with helper.raises(
             tp.TripyException,
-            match="number of output elements \(1\) doesn't match expected number of elements \(4\)",
+            match=r"number of output elements \(1\) doesn't match expected number of elements \(4\)",
             has_stack_info_for=[a, b],
         ):
             b.eval()

diff --git a/tripy/tests/helper.py b/tripy/tests/helper.py
@@ -102,35 +102,22 @@ def check_mlir(mlir, expected):
 
 
 # Supported NumPy data types
-NUMPY_TYPES = [
-    np.int8,
+NUMPY_TO_TRIPY = {
+    bool: tp.bool,
+    np.int8: tp.int8,
+    np.int32: tp.int32,
+    np.int64: tp.int64,
+    np.float16: tp.float16,
+    np.float32: tp.float32,
     # np.int16,  # TODO(#247): Add support for int16
-    np.int32,
-    np.int64,
     # np.uint8,  # TODO(#247): Add support for uint8
     # np.uint16, # TODO(#190): Add support for unsupported MLIR-TensorRT types.
     # np.uint32, # TODO(#190): Add support for unsupported MLIR-TensorRT types.
     # np.uint64, # TODO(#190): Add support for unsupported MLIR-TensorRT types.
-    np.float16,
-    np.float32,
     # np.float64,  # TODO(#247): Add support for float64
-]
-
-
-def np_to_tripy_dtype(dtype):
-    return {
-        bool: tp.bool,
-        np.int8: tp.int8,
-        np.int32: tp.int32,
-        np.int64: tp.int64,
-        np.float16: tp.float16,
-        np.float32: tp.float32,
-    }[dtype]
-
+}
 
-def torch_type_supported(data: np.ndarray):
-    unsupported_dtypes = [np.int16, np.uint16, np.uint32, np.uint64]
-    return data.dtype not in unsupported_dtypes
+TRIPY_TO_NUMPY = {v: k for k, v in NUMPY_TO_TRIPY.items()}
 
 
 TORCH_DTYPES = {

diff --git a/tripy/tests/integration/test_cast.py b/tripy/tests/integration/test_cast.py
@@ -21,7 +21,7 @@
 
 import tripy as tp
 from tests.conftest import skip_if_older_than_sm89
-from tests.helper import np_to_tripy_dtype
+from tests.helper import NUMPY_TO_TRIPY
 
 
 class TestCast:
@@ -50,8 +50,8 @@ class TestCast:
         ],
     )
     def test_cast(self, input_dtype, target_dtype):
-        tp_input_dtype = np_to_tripy_dtype(input_dtype)
-        tp_target_dtype = np_to_tripy_dtype(target_dtype)
+        tp_input_dtype = NUMPY_TO_TRIPY[input_dtype]
+        tp_target_dtype = NUMPY_TO_TRIPY[target_dtype]
 
         # TODO(#222): Integer casts with negative numbers fail in many cases
         input_tensor = tp.Tensor([0, 1, 2], dtype=tp_input_dtype)
@@ -71,7 +71,7 @@ def test_cast_quantized_dtypes_into_bool(self, source_dtype):
 
     @pytest.mark.parametrize("target_dtype", [np.float32, np.int32, np.int64, np.int8])
     def test_cast_from_bool(self, target_dtype):
-        tp_target_dtype = np_to_tripy_dtype(target_dtype)
+        tp_target_dtype = NUMPY_TO_TRIPY[target_dtype]
 
         # in principle, it is not important what *specific* values we convert to,
         # so long as false is mapped to 0 and true to nonzero

diff --git a/tripy/tests/integration/test_stack.py b/tripy/tests/integration/test_stack.py
@@ -57,6 +57,6 @@ def test_stack_different_shapes(self):
         b = tp.ones((4, 3))
         with raises(
             tp.TripyException,
-            match="error: shapes of operand \(0\) and \(1\) are not compatible at non-concat index 1:",
+            match=r"error: shapes of operand \(0\) and \(1\) are not compatible at non-concat index 1:",
         ):
             tp.stack([a, b]).eval()
diff --git a/tripy/tests/performance/__init__.py b/tripy/tests/performance/__init__.py
@@ -0,0 +1,14 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tripy/tests/performance/cases/__init__.py b/tripy/tests/performance/cases/__init__.py
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__all__ = []
+
+
+# In order to make the pytest fixtures defined in this submodule visible, we
+# need to import them in the test using their function names. To do so, we can
+# export them via this file by making them local variables and adding them to `__all__`.
+#
+# Note that just importing the module is sufficient to update PERF_CASES, but does
+# not make the actual fixture function visible to pytest.
+def __discover_modules():
+    import importlib
+    import pkgutil
+
+    mods = [importlib.import_module("tests.performance.cases")]
+    while mods:
+        mod = mods.pop(0)
+
+        yield mod
+
+        if hasattr(mod, "__path__"):
+            mods.extend(
+                [
+                    importlib.import_module(f"{mod.__name__}.{submod.name}")
+                    for submod in pkgutil.iter_modules(mod.__path__)
+                ]
+            )
+
+
+modules = list(__discover_modules())[1:]
+
+# Discover and import all perf fixtures.
+from tests.performance.conftest import PERF_CASES
+
+__perf_case_names = {case.name for case in PERF_CASES}
+
+for mod in modules:
+    for name, obj in mod.__dict__.items():
+        if name in __perf_case_names:
+            locals()[name] = obj
+            __all__.append(name)