main: force cudnn.benchmark to false (Comfy-Org#14390)

rattus128 · web-flow · commit 6d18f4adacea · 2026-06-10T13:54:32.000-04:00
Some custom nodes try to set this true globally. It messes with dynamic
VRAM with one-off spikes that can OOM but this is also very high risk
for windows where such allocations might get serviced by shared memory
fallback.

Trump it.
diff --git a/comfy/model_management.py b/comfy/model_management.py
@@ -534,8 +534,10 @@ def aotriton_supported(gpu_arch):
 except:
     pass
 
-if torch.cuda.is_available() and torch.backends.cudnn.is_available() and PerformanceFeature.AutoTune in args.fast:
-    torch.backends.cudnn.benchmark = True
+
+def set_cudnn_benchmark():
+    if torch.cuda.is_available() and torch.backends.cudnn.is_available():
+        torch.backends.cudnn.benchmark = PerformanceFeature.AutoTune in args.fast
 
 try:
     if torch_version_numeric >= (2, 5):
diff --git a/main.py b/main.py
@@ -490,6 +490,11 @@ def start_comfyui(asyncio_loop=None):
         init_custom_nodes=(not args.disable_all_custom_nodes) or len(args.whitelist_custom_nodes) > 0,
         init_api_nodes=not args.disable_api_nodes
     ))
+
+    # Re-apply Comfy's cuDNN benchmark policy after custom-node imports. Benchmark
+    # mode can request near-card-sized autotune workspaces, and some custom nodes set it at import time.
+    comfy.model_management.set_cudnn_benchmark()
+
     hook_breaker_ac10a0.restore_functions()
 
     cuda_malloc_warning()