NVIDIA · albertocarpentieri · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025
diff --git a/examples/weather/corrdiff/train.py b/examples/weather/corrdiff/train.py
@@ -325,6 +325,7 @@ def main(cfg: DictConfig) -> None:
             find_unused_parameters=True,  # dist.find_unused_parameters,
             bucket_cap_mb=35,
             gradient_as_bucket_view=True,
+            static_graph=True
         )
     if cfg.wandb.watch_model and dist.rank == 0:
         wandb.watch(model)
@@ -681,7 +682,6 @@ def main(cfg: DictConfig) -> None:
                                         "img_clean": img_clean_valid,
                                         "img_lr": img_lr_valid,
                                         "augment_pipe": None,
-                                        "use_patch_grad_acc": use_patch_grad_acc,
                                     }
                                     if use_patch_grad_acc is not None:
                                         loss_valid_kwargs[

diff --git a/physicsnemo/models/diffusion/song_unet.py b/physicsnemo/models/diffusion/song_unet.py
@@ -20,6 +20,7 @@
 
 import numpy as np
 import nvtx
+import math
 import torch
 from torch.nn.functional import silu
 from torch.utils.checkpoint import checkpoint
@@ -336,7 +337,7 @@ def __init__(
             self.img_shape_x = img_resolution[1]
 
         # set the threshold for checkpointing based on image resolution
-        self.checkpoint_threshold = (self.img_shape_y >> checkpoint_level) + 1
+        self.checkpoint_threshold = (math.floor(math.sqrt(self.img_shape_x * self.img_shape_y)) >> checkpoint_level) + 1
 
         # Optional additive learned positition embed after the first conv
         self.additive_pos_embed = additive_pos_embed
@@ -552,10 +553,10 @@ def forward(self, x, noise_labels, class_labels, augment_labels=None):
                     else:
                         # For UNetBlocks check if we should use gradient checkpointing
                         if isinstance(block, UNetBlock):
-                            if x.shape[-1] > self.checkpoint_threshold:
+                            if math.floor(math.sqrt(x.shape[-2] * x.shape[-1])) > self.checkpoint_threshold:
                                 # self.checkpoint = checkpoint?
                                 # else: self.checkpoint  = lambda(block,x,emb:block(x,emb))
-                                x = checkpoint(block, x, emb)
+                                x = checkpoint(block, x, emb, use_reentrant=False)
                             else:
                                 # AssertionError: Only support NHWC layout.
                                 x = block(x, emb)
@@ -584,12 +585,12 @@ def forward(self, x, noise_labels, class_labels, augment_labels=None):
                             x = torch.cat([x, skips.pop()], dim=1)
                         # check for checkpointing on decoder blocks and up sampling blocks
                         if (
-                            x.shape[-1] > self.checkpoint_threshold and "_block" in name
+                            math.floor(math.sqrt(x.shape[-2] * x.shape[-1])) > self.checkpoint_threshold and "_block" in name
                         ) or (
-                            x.shape[-1] > (self.checkpoint_threshold / 2)
+                            math.floor(math.sqrt(x.shape[-2] * x.shape[-1])) > (self.checkpoint_threshold / 2)
                             and "_up" in name
                         ):
-                            x = checkpoint(block, x, emb)
+                            x = checkpoint(block, x, emb, use_reentrant=False)
                         else:
                             x = block(x, emb)
             return aux
@@ -1111,28 +1112,22 @@ def _get_positional_embedding(self):
         elif self.gridtype == "linear":
             if self.N_grid_channels != 2:
                 raise ValueError("N_grid_channels must be set to 2 for gridtype linear")
-            x = np.meshgrid(np.linspace(-1, 1, self.img_shape_y))
-            y = np.meshgrid(np.linspace(-1, 1, self.img_shape_x))
-            grid_x, grid_y = np.meshgrid(y, x)
+            y = np.meshgrid(np.linspace(-1, 1, self.img_shape_y))
+            x = np.meshgrid(np.linspace(-1, 1, self.img_shape_x))
+            grid_y, grid_x = np.meshgrid(x, y)
             grid = torch.from_numpy(
-                np.stack((grid_x, grid_y), axis=0)
+                np.stack((grid_y, grid_x), axis=0)
             )  # (2, img_shape_y, img_shape_x)
             grid.requires_grad = False
         elif self.gridtype == "sinusoidal" and self.N_grid_channels == 4:
             # print('sinusuidal grid added ......')
-            x1 = np.meshgrid(np.sin(np.linspace(0, 2 * np.pi, self.img_shape_y)))
-            x2 = np.meshgrid(np.cos(np.linspace(0, 2 * np.pi, self.img_shape_y)))
-            y1 = np.meshgrid(np.sin(np.linspace(0, 2 * np.pi, self.img_shape_x)))
-            y2 = np.meshgrid(np.cos(np.linspace(0, 2 * np.pi, self.img_shape_x)))
-            grid_x1, grid_y1 = np.meshgrid(y1, x1)
-            grid_x2, grid_y2 = np.meshgrid(y2, x2)
-            grid = torch.squeeze(
-                torch.from_numpy(
-                    np.expand_dims(
-                        np.stack((grid_x1, grid_y1, grid_x2, grid_y2), axis=0), axis=0
-                    )
-                )
-            )  # (4, img_shape_y, img_shape_x)
+            x1 = np.meshgrid(np.sin(np.linspace(0, 2 * np.pi, self.img_shape_x)))
+            x2 = np.meshgrid(np.cos(np.linspace(0, 2 * np.pi, self.img_shape_x)))
+            y1 = np.meshgrid(np.sin(np.linspace(0, 2 * np.pi, self.img_shape_y)))
+            y2 = np.meshgrid(np.cos(np.linspace(0, 2 * np.pi, self.img_shape_y)))
+            grid_y1, grid_x1 = np.meshgrid(x1, y1)
+            grid_y2, grid_x2 = np.meshgrid(x2, y2)
+            grid = torch.from_numpy(np.stack((grid_x1, grid_y1, grid_x2, grid_y2), axis=0))
             grid.requires_grad = False
         elif self.gridtype == "sinusoidal" and self.N_grid_channels != 4:
             if self.N_grid_channels % 4 != 0:
@@ -1153,10 +1148,10 @@ def _get_positional_embedding(self):
             )  # (N_grid_channels, img_shape_y, img_shape_x)
             grid.requires_grad = False
         elif self.gridtype == "test" and self.N_grid_channels == 2:
-            idx_x = torch.arange(self.img_shape_y)
-            idx_y = torch.arange(self.img_shape_x)
-            mesh_x, mesh_y = torch.meshgrid(idx_x, idx_y)
-            grid = torch.stack((mesh_x, mesh_y), dim=0)  # (2, img_shape_y, img_shape_x)
+            idx_x = torch.arange(self.img_shape_x)
+            idx_y = torch.arange(self.img_shape_y)
+            mesh_y, mesh_x = torch.meshgrid(idx_y, idx_x)
+            grid = torch.stack((mesh_y, mesh_x), dim=0)  # (2, img_shape_y, img_shape_x)
         else:
             raise ValueError("Gridtype not supported.")
         return grid