Skip to content

Commit 992c265

Browse files
authored
Merge branch 'main' into mfl/raise-normalization
2 parents efb8d09 + f821499 commit 992c265

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1647
-78
lines changed

mlir-tensorrt/CONTRIBUTING.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,35 @@ described here: https://llvm.org/docs/CodingStandards.html
99

1010
Python files are formatted using the [`black` formatter](https://black.readthedocs.io/en/stable/).
1111

12+
## Development Environment
13+
14+
This project provides a pre-configured CUDA 12.5 development environment using [Dev Containers](https://containers.dev/). We offer configurations for both `ubuntu` and `rockylinux8`, located in the `.devcontainer` directory.
15+
16+
### VS Code (Recommended)
17+
1. Install the [Dev Containers extension](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers).
18+
2. Open the project in VS Code.
19+
3. When prompted, click "Reopen in Container" and select your preferred OS configuration.
20+
21+
VS Code will automatically build the container and connect to the development environment.
22+
23+
### Dev Containers CLI
24+
If you are not using VS Code, you can manage the environment with the [Dev Containers CLI](https://github.com/devcontainers/cli).
25+
26+
1. Install the CLI.
27+
2. Choose one of the available configurations from the `.devcontainer` directory (e.g., `cuda12.5-ubuntu-llvm17`).
28+
3. From the project root, build and start the container by running the `up` command. Replace `<config-name>` with your chosen configuration.
29+
```bash
30+
devcontainer up --workspace-folder . --config .devcontainer/<config-name>/devcontainer.json
31+
```
32+
For example:
33+
```bash
34+
devcontainer up --workspace-folder . --config .devcontainer/cuda12.5-ubuntu-llvm17/devcontainer.json
35+
```
36+
4. To open a shell inside the running container, use the `exec` command:
37+
```bash
38+
devcontainer exec --workspace-folder . --config .devcontainer/<config-name>/devcontainer.json /bin/bash
39+
```
40+
1241
## How to Submit a PR
1342

1443
- Fork the repo on GitHub

mlir-tensorrt/Version.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
set(MLIR_TENSORRT_VERSION_MAJOR "0")
22
set(MLIR_TENSORRT_VERSION_MINOR "1")
3-
set(MLIR_TENSORRT_VERSION_PATCH "42")
3+
set(MLIR_TENSORRT_VERSION_PATCH "43")
44
set(MLIR_TENSORRT_VERSION
55
"${MLIR_TENSORRT_VERSION_MAJOR}.${MLIR_TENSORRT_VERSION_MINOR}.${MLIR_TENSORRT_VERSION_PATCH}")

mlir-tensorrt/executor/lib/Runtime/Backend/Lua/Modules/TensorRT/TensorRTModule.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -477,12 +477,6 @@ static Status setTensorAddressesOrReport(
477477
ADD_TENSORRT_MODULE_RANGE("set_tensor_addresses");
478478
unsigned idx = 0;
479479
for (auto &[name, ptr, dims] : buffers) {
480-
constexpr intptr_t kMinAlignmentBytes = 256;
481-
if (ptr % kMinAlignmentBytes != 0)
482-
MTRT_WARNV("TensorRT input {0} (ptr = {1:X}) does not meet minimum "
483-
"alignment of {2} bytes",
484-
name, ptr, kMinAlignmentBytes);
485-
486480
bool result =
487481
context->setTensorAddress(name.c_str(), reinterpret_cast<void *>(ptr));
488482

tripy/docs/packages.html

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,22 @@ <h1>Package Index</h1>
216216
<a
217217
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.42/mlir_tensorrt_runtime-0.1.42+cuda12.trt109-cp39-cp39-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.42+cuda12.trt109-cp39-cp39-linux_x86_64.whl</a><br>
218218

219+
<a
220+
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp310-cp310-linux_x86_64.whl">mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp310-cp310-linux_x86_64.whl</a><br>
221+
<a
222+
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp311-cp311-linux_x86_64.whl">mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp311-cp311-linux_x86_64.whl</a><br>
223+
<a
224+
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp312-cp312-linux_x86_64.whl">mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp312-cp312-linux_x86_64.whl</a><br>
225+
<a
226+
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp39-cp39-linux_x86_64.whl">mlir_tensorrt_compiler-0.1.43+cuda12.trt109-cp39-cp39-linux_x86_64.whl</a><br>
227+
<a
228+
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp310-cp310-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp310-cp310-linux_x86_64.whl</a><br>
229+
<a
230+
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp311-cp311-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp311-cp311-linux_x86_64.whl</a><br>
231+
<a
232+
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp312-cp312-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp312-cp312-linux_x86_64.whl</a><br>
233+
<a
234+
href="https://github.com/NVIDIA/TensorRT-Incubator/releases/download/mlir-tensorrt-v0.1.43/mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp39-cp39-linux_x86_64.whl">mlir_tensorrt_runtime-0.1.43+cuda12.trt109-cp39-cp39-linux_x86_64.whl</a><br>
219235

220236
</body>
221237

tripy/docs/post0_developer_guides/02-debugging.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ We include some environment variables to enable extra debugging information from
1212
- `export TRIPY_MLIR_DEBUG_PATH=<mlir-debug-path>` sets the directory for IR dumps. The default path is `mlir-dumps`.
1313
- `export TRIPY_TRT_DEBUG_ENABLED=1` will dump TensorRT engines and their layer information.
1414
- `export TRIPY_TRT_DEBUG_PATH=<trt-debug-path>` sets the directory for TensorRT dumps. Default path is `tensorrt-dumps`.
15+
- `export MTRT_TENSORRT_NVTX=DETAILED` will enable detailed nvtx profiling verbosity for TRT layers.
1516

1617

1718
## Using A Debugger

tripy/examples/__init__.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#
2+
# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#

tripy/examples/diffusion/README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Implementing Stable Diffusion
2+
3+
## Introduction
4+
5+
This example implements a Stable Diffusion model using Tripy.
6+
There are 3 components:
7+
8+
1. `model.py` defines the model using `tripy.Module` and associated APIs. `clip_model.py`, `unet_model.py`, `vae_model.py` implement specific components of the diffusion model. All files live under the `models/` folder.
9+
2. `weight_loader.py` loads weights from a HuggingFace checkpoint.
10+
3. `example.py` runs the end-to-end example, taking input text as a command-line argument, running inference, and then saves the generated output.
11+
12+
The model defaults to running in `float16`, but you can increase the precision by using the `--fp32` flag.
13+
14+
## Running The Example
15+
16+
1. Install prerequisites:
17+
18+
```bash
19+
python3 -m pip install -r requirements.txt
20+
```
21+
22+
2. Run the example:
23+
24+
```bash
25+
python3 example.py --seed 420 --steps 50 --prompt "a beautiful photograph of Mt. Fuji during cherry blossom" --engine-dir fp16_engines --verbose
26+
```
27+
28+
3. **[Optional]** Compare with torch reference to verify accuracy:
29+
```bash
30+
python3 compare_images.py
31+
```
32+
33+
<!--
34+
Tripy: TEST: EXPECTED_STDOUT Start
35+
```
36+
.*Passed: Images are similar.*SSIM.*0\.8
37+
```
38+
Tripy: TEST: EXPECTED_STDOUT End
39+
-->
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#
2+
# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
461 KB
Loading
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
#
2+
# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
#
17+
18+
import argparse
19+
import os
20+
import numpy as np
21+
from PIL import Image
22+
from skimage.metrics import structural_similarity
23+
import glob
24+
25+
26+
def load_reference_image(image_path, verbose=False):
27+
"""Load reference image from file path."""
28+
if not os.path.exists(image_path):
29+
raise FileNotFoundError(f"Reference image not found: {image_path}")
30+
31+
if verbose:
32+
print(f"[I] Loading reference image from {image_path}")
33+
return Image.open(image_path)
34+
35+
36+
def load_tripy_image(image_path, verbose=False):
37+
"""Load tripy image from file path."""
38+
if not os.path.exists(image_path):
39+
raise FileNotFoundError(f"Tripy image not found: {image_path}")
40+
41+
if verbose:
42+
print(f"[I] Loading tripy image from {image_path}")
43+
return Image.open(image_path)
44+
45+
46+
def find_latest_image_in_output(output_dir="output", verbose=False):
47+
"""Find the most recent image in the output directory."""
48+
if not os.path.exists(output_dir):
49+
raise FileNotFoundError(f"Output directory not found: {output_dir}")
50+
51+
# Look for PNG files in the output directory
52+
pattern = os.path.join(output_dir, "*.png")
53+
image_files = glob.glob(pattern)
54+
55+
if not image_files:
56+
raise FileNotFoundError(f"No PNG images found in {output_dir}")
57+
58+
image_files.sort(key=os.path.getmtime, reverse=True)
59+
60+
if verbose:
61+
print(f"[I] Found {len(image_files)} images in {output_dir}")
62+
print(f"[I] Using most recent image: {image_files[0]}")
63+
64+
return image_files[0]
65+
66+
67+
def compare_images(tripy_img, reference_img, threshold=0.80):
68+
"""Compare two images using structural similarity index."""
69+
# Convert both images to grayscale numpy arrays for comparison
70+
tripy_array = np.array(tripy_img.convert("L"))
71+
reference_array = np.array(reference_img.convert("L"))
72+
73+
# Ensure both images have the same dimensions
74+
if tripy_array.shape != reference_array.shape:
75+
print(f"[W] Image shape mismatch: tripy {tripy_array.shape} vs reference {reference_array.shape}")
76+
# Resize reference to match tripy output
77+
reference_img_resized = reference_img.resize(tripy_img.size, Image.Resampling.LANCZOS)
78+
reference_array = np.array(reference_img_resized.convert("L"))
79+
80+
# Calculate structural similarity
81+
ssim = structural_similarity(tripy_array, reference_array)
82+
83+
if ssim >= threshold:
84+
print(f"[I] Passed: Images are similar (SSIM >= {threshold})")
85+
return True
86+
else:
87+
print(f"[I] Failed: Images are not similar enough (SSIM < {threshold})")
88+
return False
89+
90+
91+
def main():
92+
parser = argparse.ArgumentParser(
93+
description="Compare tripy diffusion output with a reference image",
94+
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
95+
)
96+
97+
# Image loading options
98+
parser.add_argument(
99+
"--tripy-image",
100+
type=str,
101+
default=None,
102+
help="Path to tripy output image to compare. If not specified, will use the most recent image in output/ directory",
103+
)
104+
parser.add_argument(
105+
"--reference",
106+
type=str,
107+
default="assets/torch_ref_fp16_fuji_steps50_seed420.png",
108+
help="Path to reference image file to compare against",
109+
)
110+
111+
parser.add_argument("--threshold", type=float, default=0.80, help="SSIM threshold for considering images similar")
112+
parser.add_argument(
113+
"--verbose", action="store_true", default=False, help="Enable verbose output with timing and progress bars"
114+
)
115+
116+
args = parser.parse_args()
117+
118+
# Load reference image
119+
try:
120+
reference_img = load_reference_image(args.reference)
121+
except FileNotFoundError as e:
122+
print(f"[E] {e}")
123+
return 1
124+
125+
# Load tripy image
126+
try:
127+
if args.tripy_image:
128+
tripy_img = load_tripy_image(args.tripy_image, args.verbose)
129+
else:
130+
image_path = find_latest_image_in_output(verbose=args.verbose)
131+
tripy_img = load_tripy_image(image_path, args.verbose)
132+
except FileNotFoundError as e:
133+
print(f"[E] {e}")
134+
return 1
135+
136+
is_similar = compare_images(tripy_img, reference_img, args.threshold)
137+
138+
return not is_similar
139+
140+
141+
if __name__ == "__main__":
142+
exit(main())

0 commit comments

Comments
 (0)