NVIDIA
diff --git a/‎mlir-tensorrt/CONTRIBUTING.md‎
Lines changed: 29 additions & 0 deletions b/‎mlir-tensorrt/CONTRIBUTING.md‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎mlir-tensorrt/Version.cmake‎
Lines changed: 1 addition & 1 deletion b/‎mlir-tensorrt/Version.cmake‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp‎
Lines changed: 0 additions & 6 deletions b/‎mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎tripy/docs/packages.html‎
Lines changed: 16 additions & 0 deletions b/‎tripy/docs/packages.html‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎tripy/docs/post0_developer_guides/02-debugging.md‎
Lines changed: 1 addition & 0 deletions b/‎tripy/docs/post0_developer_guides/02-debugging.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tripy/examples/__init__.py‎
Lines changed: 16 additions & 0 deletions b/‎tripy/examples/__init__.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎tripy/examples/diffusion/README.md‎
Lines changed: 39 additions & 0 deletions b/‎tripy/examples/diffusion/README.md‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎tripy/examples/diffusion/__init__.py‎
Lines changed: 16 additions & 0 deletions b/‎tripy/examples/diffusion/__init__.py‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎tripy/examples/diffusion/assets/torch_ref_fp16_fuji_steps50_seed420.png‎
461 KB b/‎tripy/examples/diffusion/assets/torch_ref_fp16_fuji_steps50_seed420.png‎
461 KB
diff --git a/‎tripy/examples/diffusion/compare_images.py‎
Lines changed: 142 additions & 0 deletions b/‎tripy/examples/diffusion/compare_images.py‎
Lines changed: 142 additions & 0 deletions
@@ -9,6 +9,35 @@ described here: https://llvm.org/docs/CodingStandards.html
 
 Python files are formatted using the [`black` formatter](https://black.readthedocs.io/en/stable/).
 
+## Development Environment
+
+This project provides a pre-configured CUDA 12.5 development environment using [Dev Containers](https://containers.dev/). We offer configurations for both `ubuntu` and `rockylinux8`, located in the `.devcontainer` directory.
+
+### VS Code (Recommended)
+1. Install the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers).
+2. Open the project in VS Code.
+3. When prompted, click "Reopen in Container" and select your preferred OS configuration.
+
+VS Code will automatically build the container and connect to the development environment.
+
+### Dev Containers CLI
+If you are not using VS Code, you can manage the environment with the [Dev Containers CLI](https://github.com/devcontainers/cli).
+
+1.  Install the CLI.
+2.  Choose one of the available configurations from the `.devcontainer` directory (e.g., `cuda12.5-ubuntu-llvm17`).
+3.  From the project root, build and start the container by running the `up` command. Replace `<config-name>` with your chosen configuration.
+    ```bash
+    devcontainer up --workspace-folder . --config .devcontainer/<config-name>/devcontainer.json
+    ```
+    For example:
+    ```bash
+    devcontainer up --workspace-folder . --config .devcontainer/cuda12.5-ubuntu-llvm17/devcontainer.json
+    ```
+4.  To open a shell inside the running container, use the `exec` command:
+    ```bash
+    devcontainer exec --workspace-folder . --config .devcontainer/<config-name>/devcontainer.json /bin/bash
+    ```
+
 ## How to Submit a PR
 
 - Fork the repo on GitHub
 
@@ -1,5 +1,5 @@
 set(MLIR_TENSORRT_VERSION_MAJOR "0")
 set(MLIR_TENSORRT_VERSION_MINOR "1")
-set(MLIR_TENSORRT_VERSION_PATCH "42")
+set(MLIR_TENSORRT_VERSION_PATCH "43")
 set(MLIR_TENSORRT_VERSION
   "${MLIR_TENSORRT_VERSION_MAJOR}.${MLIR_TENSORRT_VERSION_MINOR}.${MLIR_TENSORRT_VERSION_PATCH}")
@@ -477,12 +477,6 @@ static Status setTensorAddressesOrReport(
   ADD_TENSORRT_MODULE_RANGE("set_tensor_addresses");
   unsigned idx = 0;
   for (auto &[name, ptr, dims] : buffers) {
-    constexpr intptr_t kMinAlignmentBytes = 256;
-    if (ptr % kMinAlignmentBytes != 0)
-      MTRT_WARNV("TensorRT input {0} (ptr = {1:X}) does not meet minimum "
-                 "alignment of {2} bytes",
-                 name, ptr, kMinAlignmentBytes);
-
     bool result =
         context->setTensorAddress(name.c_str(), reinterpret_cast<void *>(ptr));
 
 
@@ -216,6 +216,22 @@ <h1>Package Index</h1>
     <a
         href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.42/mlir_tensorrt_runtime-0.1.42+cuda12.trt109-cp39-cp39-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.42+cuda12.trt109-cp39-cp39-linux_x86_64.whl</a><br>
 
+    <a
+        href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp310-cp310-linux_x86_64.whl">mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp310-cp310-linux_x86_64.whl</a><br>
+    <a
+        href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp311-cp311-linux_x86_64.whl">mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp311-cp311-linux_x86_64.whl</a><br>
+    <a
+        href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp312-cp312-linux_x86_64.whl">mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp312-cp312-linux_x86_64.whl</a><br>
+    <a
+        href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp39-cp39-linux_x86_64.whl">mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp39-cp39-linux_x86_64.whl</a><br>
+    <a
+        href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp310-cp310-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp310-cp310-linux_x86_64.whl</a><br>
+    <a
+        href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp311-cp311-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp311-cp311-linux_x86_64.whl</a><br>
+    <a
+        href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp312-cp312-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp312-cp312-linux_x86_64.whl</a><br>
+    <a
+        href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp39-cp39-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp39-cp39-linux_x86_64.whl</a><br>
 
 </body>
 
 
@@ -12,6 +12,7 @@ We include some environment variables to enable extra debugging information from
 - `export TRIPY_MLIR_DEBUG_PATH=<mlir-debug-path>` sets the directory for IR dumps. The default path is `mlir-dumps`.
 - `export TRIPY_TRT_DEBUG_ENABLED=1` will dump TensorRT engines and their layer information.
 - `export TRIPY_TRT_DEBUG_PATH=<trt-debug-path>` sets the directory for TensorRT dumps. Default path is `tensorrt-dumps`.
+- `export MTRT_TENSORRT_NVTX=DETAILED` will enable detailed nvtx profiling verbosity for TRT layers.
 
 
 ## Using A Debugger
 
@@ -0,0 +1,16 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
@@ -0,0 +1,39 @@
+# Implementing Stable Diffusion
+
+## Introduction
+
+This example implements a Stable Diffusion model using Tripy.
+There are 3 components:
+
+1. `model.py` defines the model using `tripy.Module` and associated APIs. `clip_model.py`, `unet_model.py`, `vae_model.py` implement specific components of the diffusion model. All files live under the `models/` folder.
+2. `weight_loader.py` loads weights from a HuggingFace checkpoint.
+3. `example.py` runs the end-to-end example, taking input text as a command-line argument, running inference, and then saves the generated output.
+
+The model defaults to running in `float16`, but you can increase the precision by using the `--fp32` flag.
+
+## Running The Example
+
+1. Install prerequisites:
+
+    ```bash
+    python3 -m pip install -r requirements.txt
+    ```
+
+2. Run the example:
+
+    ```bash
+    python3 example.py --seed 420 --steps 50 --prompt "a beautiful photograph of Mt. Fuji during cherry blossom" --engine-dir fp16_engines --verbose
+    ```
+
+3. **[Optional]** Compare with torch reference to verify accuracy:
+    ```bash
+    python3 compare_images.py
+    ```
+
+    <!--
+    Tripy: TEST: EXPECTED_STDOUT Start
+    ```
+    .*Passed: Images are similar.*SSIM.*0\.8
+    ```
+    Tripy: TEST: EXPECTED_STDOUT End
+    -->
@@ -0,0 +1,16 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
@@ -0,0 +1,142 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import os
+import numpy as np
+from PIL import Image
+from skimage.metrics import structural_similarity
+import glob
+
+
+def load_reference_image(image_path, verbose=False):
+    """Load reference image from file path."""
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Reference image not found: {image_path}")
+
+    if verbose:
+        print(f"[I] Loading reference image from {image_path}")
+    return Image.open(image_path)
+
+
+def load_tripy_image(image_path, verbose=False):
+    """Load tripy image from file path."""
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Tripy image not found: {image_path}")
+
+    if verbose:
+        print(f"[I] Loading tripy image from {image_path}")
+    return Image.open(image_path)
+
+
+def find_latest_image_in_output(output_dir="output", verbose=False):
+    """Find the most recent image in the output directory."""
+    if not os.path.exists(output_dir):
+        raise FileNotFoundError(f"Output directory not found: {output_dir}")
+
+    # Look for PNG files in the output directory
+    pattern = os.path.join(output_dir, "*.png")
+    image_files = glob.glob(pattern)
+
+    if not image_files:
+        raise FileNotFoundError(f"No PNG images found in {output_dir}")
+
+    image_files.sort(key=os.path.getmtime, reverse=True)
+
+    if verbose:
+        print(f"[I] Found {len(image_files)} images in {output_dir}")
+        print(f"[I] Using most recent image: {image_files[0]}")
+
+    return image_files[0]
+
+
+def compare_images(tripy_img, reference_img, threshold=0.80):
+    """Compare two images using structural similarity index."""
+    # Convert both images to grayscale numpy arrays for comparison
+    tripy_array = np.array(tripy_img.convert("L"))
+    reference_array = np.array(reference_img.convert("L"))
+
+    # Ensure both images have the same dimensions
+    if tripy_array.shape != reference_array.shape:
+        print(f"[W] Image shape mismatch: tripy {tripy_array.shape} vs reference {reference_array.shape}")
+        # Resize reference to match tripy output
+        reference_img_resized = reference_img.resize(tripy_img.size, Image.Resampling.LANCZOS)
+        reference_array = np.array(reference_img_resized.convert("L"))
+
+    # Calculate structural similarity
+    ssim = structural_similarity(tripy_array, reference_array)
+
+    if ssim >= threshold:
+        print(f"[I] Passed: Images are similar (SSIM >= {threshold})")
+        return True
+    else:
+        print(f"[I] Failed: Images are not similar enough (SSIM < {threshold})")
+        return False
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Compare tripy diffusion output with a reference image",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    # Image loading options
+    parser.add_argument(
+        "--tripy-image",
+        type=str,
+        default=None,
+        help="Path to tripy output image to compare. If not specified, will use the most recent image in output/ directory",
+    )
+    parser.add_argument(
+        "--reference",
+        type=str,
+        default="assets/torch_ref_fp16_fuji_steps50_seed420.png",
+        help="Path to reference image file to compare against",
+    )
+
+    parser.add_argument("--threshold", type=float, default=0.80, help="SSIM threshold for considering images similar")
+    parser.add_argument(
+        "--verbose", action="store_true", default=False, help="Enable verbose output with timing and progress bars"
+    )
+
+    args = parser.parse_args()
+
+    # Load reference image
+    try:
+        reference_img = load_reference_image(args.reference)
+    except FileNotFoundError as e:
+        print(f"[E] {e}")
+        return 1
+
+    # Load tripy image
+    try:
+        if args.tripy_image:
+            tripy_img = load_tripy_image(args.tripy_image, args.verbose)
+        else:
+            image_path = find_latest_image_in_output(verbose=args.verbose)
+            tripy_img = load_tripy_image(image_path, args.verbose)
+    except FileNotFoundError as e:
+        print(f"[E] {e}")
+        return 1
+
+    is_similar = compare_images(tripy_img, reference_img, args.threshold)
+
+    return not is_similar
+
+
+if __name__ == "__main__":
+    exit(main())