Remove alignment warnings, improve packaging

akhilg-nv · akhilg-nv · commit 0f1b4ee168cc · 2025-07-29T17:08:24.000-07:00
diff --git a/tripy/examples/__init__.py b/tripy/examples/__init__.py
@@ -0,0 +1,16 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/tripy/examples/diffusion/__init__.py b/tripy/examples/diffusion/__init__.py
@@ -0,0 +1,16 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/tripy/examples/diffusion/example.py b/tripy/examples/diffusion/example.py
@@ -25,9 +25,9 @@
 import nvtripy as tp
 
 from transformers import CLIPTokenizer
-from examples.diffusion.models.clip_model import CLIPConfig
-from examples.diffusion.models.model import StableDiffusion, StableDiffusionConfig
-from examples.diffusion.weight_loader import load_from_diffusers
+from models.clip_model import CLIPConfig
+from models.model import StableDiffusion, StableDiffusionConfig
+from weight_loader import load_from_diffusers
 
 
 def compile_model(model, inputs, engine_path, verbose=False):
@@ -57,18 +57,19 @@ def compile_clip(model, engine_path, dtype=tp.int32, verbose=False):
     return compile_model(model, inputs, engine_path, verbose=verbose)
 
 
-def compile_unet(model, engine_path, dtype, verbose=False):
+def compile_unet(model, steps, engine_path, dtype, verbose=False):
     unconditional_context_shape = (1, 77, 768)
     conditional_context_shape = (1, 77, 768)
     latent_shape = (1, 4, 64, 64)
     inputs = (
         tp.InputInfo(unconditional_context_shape, dtype=dtype),
         tp.InputInfo(conditional_context_shape, dtype=dtype),
         tp.InputInfo(latent_shape, dtype=dtype),
+        tp.InputInfo((steps,), dtype=dtype),
+        tp.InputInfo((steps,), dtype=dtype),
+        tp.InputInfo((steps,), dtype=dtype),
         tp.InputInfo((1,), dtype=dtype),
-        tp.InputInfo((1,), dtype=dtype),
-        tp.InputInfo((1,), dtype=dtype),
-        tp.InputInfo((1,), dtype=dtype),
+        tp.InputInfo((1,), dtype=tp.int32),
     )
     return compile_model(model, inputs, engine_path, verbose=verbose)
 
@@ -90,6 +91,7 @@ def run_diffusion_loop(model, unconditional_context, context, latent, steps, gui
     torch_dtype = torch.float16 if dtype == tp.float16 else torch.float32
     idx_timesteps = list(range(1, 1000, 1000 // steps))
     num_timesteps = len(idx_timesteps)
+    print(f"num_timesteps: {num_timesteps}")
     timesteps = torch.tensor(idx_timesteps, dtype=torch_dtype, device="cuda")
     guidance = torch.tensor([guidance], dtype=torch_dtype, device="cuda")
 
@@ -104,14 +106,16 @@ def run_diffusion_loop(model, unconditional_context, context, latent, steps, gui
         iterator = list(range(num_timesteps))[::-1]
 
     for index in iterator:
+        idx = torch.tensor([index], dtype=torch.int32, device="cuda")
         latent = model(
             unconditional_context,
             context,
             latent,
-            tp.Tensor(timesteps[index : index + 1]),
-            tp.Tensor(alphas[index : index + 1]),
-            tp.Tensor(alphas_prev[index : index + 1]),
+            tp.Tensor(timesteps),
+            tp.Tensor(alphas),
+            tp.Tensor(alphas_prev),
             tp.Tensor(guidance),
+            tp.Tensor(idx),
         )
 
     return latent
@@ -161,8 +165,9 @@ def tripy_diffusion(args):
         os.mkdir(args.engine_dir)
 
     # Load existing engines if they exist, otherwise compile and save them
+    timesteps_size = len(list(range(1, 1000, 1000 // args.steps)))
     clip_compiled = compile_clip(model.text_encoder, engine_path=clip_path, verbose=args.verbose)
-    unet_compiled = compile_unet(model, engine_path=unet_path, dtype=dtype, verbose=args.verbose)
+    unet_compiled = compile_unet(model, timesteps_size, engine_path=unet_path, dtype=dtype, verbose=args.verbose)
     vae_compiled = compile_vae(model.decode, engine_path=vae_path, dtype=dtype, verbose=args.verbose)
 
     # Run through CLIP to get context from prompt
diff --git a/tripy/examples/diffusion/models/__init__.py b/tripy/examples/diffusion/models/__init__.py
@@ -0,0 +1,16 @@
+#
+# SPDX-FileCopyrightText: Copyright (c) 2025-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/tripy/examples/diffusion/models/clip_model.py b/tripy/examples/diffusion/models/clip_model.py
@@ -19,7 +19,7 @@
 
 from dataclasses import dataclass
 
-from examples.diffusion.models.utils import scaled_dot_product_attention
+from models.utils import scaled_dot_product_attention
 
 
 @dataclass
diff --git a/tripy/examples/diffusion/models/model.py b/tripy/examples/diffusion/models/model.py
@@ -23,10 +23,10 @@
 from typing import Optional
 from dataclasses import dataclass, field
 
-from examples.diffusion.models.clip_model import CLIPTextTransformer, CLIPConfig
-from examples.diffusion.models.unet_model import UNetModel, UNetConfig
-from examples.diffusion.models.vae_model import AutoencoderKL, VAEConfig
-from examples.diffusion.models.utils import clamp
+from models.clip_model import CLIPTextTransformer, CLIPConfig
+from models.unet_model import UNetModel, UNetConfig
+from models.vae_model import AutoencoderKL, VAEConfig
+from models.utils import clamp
 
 
 @dataclass
@@ -81,7 +81,12 @@ def decode(self, x):
         x = clamp(tp.permute(tp.reshape(x, (3, 512, 512)), (1, 2, 0)), 0, 1) * 255
         return x
 
-    def __call__(self, unconditional_context, context, latent, timestep, alphas, alphas_prev, guidance):
+    def __call__(
+        self, unconditional_context, context, latent, timesteps, alphas_cumprod, alphas_cumprod_prev, guidance, index
+    ):
+        timestep = tp.reshape(timesteps[index], (1,))
+        alphas = alphas_cumprod[index]
+        alphas_prev = alphas_cumprod_prev[index]
         e_t = self.get_model_output(unconditional_context, context, latent, timestep, guidance)
         x_prev, _ = self.get_x_prev_and_pred_x0(latent, e_t, alphas, alphas_prev)
         return x_prev
diff --git a/tripy/examples/diffusion/models/unet_model.py b/tripy/examples/diffusion/models/unet_model.py
@@ -21,7 +21,7 @@
 import nvtripy as tp
 from dataclasses import dataclass
 
-from examples.diffusion.models.utils import scaled_dot_product_attention, Upsample, Downsample
+from models.utils import scaled_dot_product_attention, Upsample, Downsample
 
 
 @dataclass
@@ -289,7 +289,7 @@ def __init__(self, config: UNetConfig):
             config.model_channels, config.io_channels, (3, 3), padding=((1, 1), (1, 1)), dtype=config.dtype
         )
 
-    def __call__(self, x, timesteps=None, context=None):
+    def __call__(self, x, timesteps=None, context=None, index=None):
         t_emb = timestep_embedding(timesteps, self.config.model_channels, self.config.dtype)
         emb = self.time_embedding(t_emb)
         x = self.conv_in(x)
diff --git a/tripy/examples/diffusion/models/vae_model.py b/tripy/examples/diffusion/models/vae_model.py
@@ -20,7 +20,7 @@
 import nvtripy as tp
 from dataclasses import dataclass
 
-from examples.diffusion.models.utils import scaled_dot_product_attention, Upsample, Downsample
+from models.utils import scaled_dot_product_attention, Upsample, Downsample
 
 
 @dataclass