Rebase to TOT, small fixes

yizhuoz004 · yizhuoz004 · commit 8c8e5c48d067 · 2024-12-13T15:02:09.000-08:00
diff --git a/tripy/examples/diffusion/example.py b/tripy/examples/diffusion/example.py
@@ -83,6 +83,7 @@ def get_alphas_cumprod(beta_start=0.00085, beta_end=0.0120, n_training_steps=100
 def run_diffusion_loop(model, unconditional_context, context, latent, steps, guidance, dtype):
     np_type = np.float16 if dtype == tp.float16 else np.float32
     idx_timesteps = list(range(1, 1000, 1000 // steps))
+    timesteps = np.array(idx_timesteps, dtype=np_type)
     guidance = np.array([guidance], dtype=np_type)
 
     print(f"[I] Running diffusion for {steps} timesteps...")
@@ -91,12 +92,12 @@ def run_diffusion_loop(model, unconditional_context, context, latent, steps, gui
     guidance = tp.Tensor(guidance)
 
     model.stream = tp.Stream()
-    for index in tqdm(range(len(idx_timesteps))):
+    for index, timestep in (t := tqdm(list(enumerate(timesteps))[::-1])):
         latent = model(
             unconditional_context,
             context,
             latent,
-            tp.Tensor(np.array([idx_timesteps[index]], dtype=np_type)),
+            tp.Tensor(np.array([timestep])),
             tp.Tensor(alphas[index : index + 1]),
             tp.Tensor(alphas_prev[index : index + 1]),
             guidance,
diff --git a/tripy/examples/diffusion/helper.py b/tripy/examples/diffusion/helper.py
@@ -34,12 +34,5 @@ def scaled_dot_product_attention(
     return tp.cast(tp.softmax((qk + attn_mask) if attn_mask is not None else qk, -1), query.dtype) @ value
 
 
-def sequential(input: tp.Tensor, ll: List[Callable[[tp.Tensor], tp.Tensor]]):
-    """
-    Applies a sequence of functions to `self` chaining the output of each function to the input of the next.
-    """
-    return reduce(lambda x, f: f(x), ll, input)
-
-
 def clamp(tensor: tp.Tensor, min: int, max: int):
     return tp.minimum(tp.maximum(tensor, tp.ones_like(tensor) * min), tp.ones_like(tensor) * max)
diff --git a/tripy/examples/diffusion/unet_model.py b/tripy/examples/diffusion/unet_model.py
@@ -22,7 +22,7 @@
 import tripy as tp
 from dataclasses import dataclass
 
-from examples.diffusion.helper import scaled_dot_product_attention, sequential
+from examples.diffusion.helper import scaled_dot_product_attention
 from examples.diffusion.vae_model import Upsample, Downsample
 
 
@@ -70,7 +70,7 @@ def __init__(self, config: UNetConfig, query_dim, context_dim, n_heads, d_head):
         self.to_v = tp.Linear(context_dim, n_heads * d_head, bias=False, dtype=config.dtype)
         self.num_heads = n_heads
         self.head_size = d_head
-        self.to_out = [tp.Linear(n_heads * d_head, query_dim, dtype=config.dtype)]
+        self.to_out = tp.Sequential(tp.Linear(n_heads * d_head, query_dim, dtype=config.dtype),)
         self.dtype = config.dtype
 
     def __call__(self, x, context=None):
@@ -83,7 +83,7 @@ def __call__(self, x, context=None):
             scaled_dot_product_attention(q, k, v, embedding_dim=self.head_size, dtype=self.dtype), 1, 2
         )
         h_ = tp.reshape(attention, (x.shape[0], -1, self.num_heads * self.head_size))
-        out = sequential(h_, self.to_out)
+        out = self.to_out(h_)
         return out
 
 
@@ -108,14 +108,14 @@ def __call__(self, x):
 
 class FeedForward(tp.Module):
     def __init__(self, config: UNetConfig, dim, mult=4):
-        self.net = [
+        self.net = tp.Sequential(
             GEGLU(config, dim, dim * mult),
             Dummy(),  # Accounts for Dropout layer, needed for weight loading
             tp.Linear(dim * mult, dim, dtype=config.dtype),
-        ]
+        )
 
     def __call__(self, x):
-        return sequential(x, self.net)
+        return self.net(x)
 
 
 class BasicTransformerBlock(tp.Module):
diff --git a/tripy/examples/diffusion/weight_loader.py b/tripy/examples/diffusion/weight_loader.py
@@ -37,7 +37,7 @@ def load_weights_from_hf(model, hf_model, dtype, debug=False):
         weight = hf_state_dict[key]
         if "norm" not in key:
             weight = weight.to(torch_dtype)
-        param = tp.Parameter(weight)
+        param = tp.Tensor(weight.contiguous())
         tripy_state_dict[key.removeprefix("text_model.")] = param
 
     model.load_state_dict(tripy_state_dict)