From 5a9561288d4c9b48822e0f93422bdc70c94dd565 Mon Sep 17 00:00:00 2001
From: Qubitium <qubitium@modelcloud.ai>
Date: Fri, 26 Sep 2025 08:02:07 +0000
Subject: [PATCH] fix (skip) cache flush when original device is `cpu` and
 offloaded to disk `meta`

---
 src/accelerate/utils/modeling.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/accelerate/utils/modeling.py b/src/accelerate/utils/modeling.py
index e7c63d81075..830249ffb10 100644
--- a/src/accelerate/utils/modeling.py
+++ b/src/accelerate/utils/modeling.py
@@ -404,7 +404,7 @@ def set_module_tensor_to_device(
                     module.weight = module.weight.cuda(device_index)
 
     # clean pre and post forward hook
-    if clear_cache and device != "cpu":
+    if clear_cache and device not in ("cpu", "meta"):
         clear_device_cache()
 
     # When handling tied weights, we update tied_params_map to keep track of the tied weights that have already been allocated on the device in