[Trainer/bug] Ensure model is not inference mode (CORE-72) (Comfy-Org#13400)

KohakuBlueleaf · web-flow · commit f350acdf213a · 2026-06-09T23:07:47.000-04:00
* Ensure model is not inference mode

* force clone inside training mode to avoid inference tensor

* Allow force deepcopy for model patcher
diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py
@@ -379,10 +379,11 @@ def get_free_memory(self, device):
     def get_clone_model_override(self):
         return self.model, (self.backup, self.backup_buffers, self.object_patches_backup, self.pinned)
 
-    def clone(self, disable_dynamic=False, model_override=None):
+    def clone(self, disable_dynamic=False, model_override=None, force_deepcopy=False):
         class_ = self.__class__
-        if self.is_dynamic() and disable_dynamic:
-            class_ = ModelPatcher
+        if self.is_dynamic() and disable_dynamic or force_deepcopy:
+            if self.is_dynamic() and disable_dynamic:
+                class_ = ModelPatcher
             if model_override is None:
                 if self.cached_patcher_init is None:
                     raise RuntimeError("Cannot create non-dynamic delegate: cached_patcher_init is not initialized.")
diff --git a/comfy_extras/nodes_train.py b/comfy_extras/nodes_train.py
@@ -1149,45 +1149,45 @@ def execute(
         # Process conditioning
         positive = _process_conditioning(positive)
 
-        # Setup model and dtype
-        mp = model.clone()
-        use_grad_scaler = False
-        lora_dtype = node_helpers.string_to_torch_dtype(lora_dtype)
-        if training_dtype != "none":
-            dtype = node_helpers.string_to_torch_dtype(training_dtype)
-            mp.set_model_compute_dtype(dtype)
-        else:
-            # Detect model's native dtype for autocast
-            model_dtype = mp.model.get_dtype()
-            if model_dtype == torch.float16:
-                dtype = torch.float16
-                # GradScaler only supports float16 gradients, not bfloat16.
-                # Only enable it when lora params will also be in float16.
-                if lora_dtype != torch.bfloat16:
-                    use_grad_scaler = True
-                # Warn about fp16 accumulation instability during training
-                if PerformanceFeature.Fp16Accumulation in args.fast:
-                    logging.warning(
-                        "WARNING: FP16 model detected with fp16_accumulation enabled. "
-                        "This combination can be numerically unstable during training and may cause NaN values. "
-                        "Suggested fixes: 1) Set training_dtype to 'bf16', or 2) Disable fp16_accumulation (remove from --fast flags)."
-                    )
+        with torch.inference_mode(False):
+            # Setup model and dtype
+            mp = model.clone(force_deepcopy=True)
+            use_grad_scaler = False
+            lora_dtype = node_helpers.string_to_torch_dtype(lora_dtype)
+            if training_dtype != "none":
+                dtype = node_helpers.string_to_torch_dtype(training_dtype)
+                mp.set_model_compute_dtype(dtype)
             else:
-                # For fp8, bf16, or other dtypes, use bf16 autocast
-                dtype = torch.bfloat16
+                # Detect model's native dtype for autocast
+                model_dtype = mp.model.get_dtype()
+                if model_dtype == torch.float16:
+                    dtype = torch.float16
+                    # GradScaler only supports float16 gradients, not bfloat16.
+                    # Only enable it when lora params will also be in float16.
+                    if lora_dtype != torch.bfloat16:
+                        use_grad_scaler = True
+                    # Warn about fp16 accumulation instability during training
+                    if PerformanceFeature.Fp16Accumulation in args.fast:
+                        logging.warning(
+                            "WARNING: FP16 model detected with fp16_accumulation enabled. "
+                            "This combination can be numerically unstable during training and may cause NaN values. "
+                            "Suggested fixes: 1) Set training_dtype to 'bf16', or 2) Disable fp16_accumulation (remove from --fast flags)."
+                        )
+                else:
+                    # For fp8, bf16, or other dtypes, use bf16 autocast
+                    dtype = torch.bfloat16
 
-        # Prepare latents and compute counts
-        latents_dtype = dtype if dtype not in (None,) else torch.bfloat16
-        latents, num_images, multi_res = _prepare_latents_and_count(
-            latents, latents_dtype, bucket_mode
-        )
+            # Prepare latents and compute counts
+            latents_dtype = dtype if dtype not in (None,) else torch.bfloat16
+            latents, num_images, multi_res = _prepare_latents_and_count(
+                latents, latents_dtype, bucket_mode
+            )
 
-        # Validate and expand conditioning
-        positive = _validate_and_expand_conditioning(positive, num_images, bucket_mode)
+            # Validate and expand conditioning
+            positive = _validate_and_expand_conditioning(positive, num_images, bucket_mode)
 
-        with torch.inference_mode(False):
             # Setup models for training
-            mp.model.requires_grad_(False)
+            mp.model.requires_grad_(False).train()
 
             # Load existing LoRA weights if provided
             existing_weights, existing_steps = _load_existing_lora(existing_lora)