Skip to content

Commit 7b5a766

Browse files
authored
Merge pull request #172 from VE-FORBRYDERNE/accelerate-patch
Fix "is on the meta device" error when loading model with disk cache
2 parents 8ee7950 + 3233e78 commit 7b5a766

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

aiserver.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2402,6 +2402,15 @@ def get_original_key(key):
24022402
if utils.num_shards is None or utils.current_shard >= utils.num_shards:
24032403
if utils.offload_index:
24042404
for name, tensor in utils.named_buffers:
2405+
dtype = tensor.dtype
2406+
if convert_to_float16 and breakmodel.primary_device != "cpu" and vars.hascuda and (vars.breakmodel or vars.usegpu):
2407+
dtype = torch.float16
2408+
if breakmodel.primary_device == "cpu" or (not vars.usegpu and not vars.breakmodel):
2409+
dtype = torch.float32
2410+
if name in model_dict and model_dict[name].dtype is not dtype:
2411+
model_dict[name] = model_dict[name].to(dtype)
2412+
if tensor.dtype is not dtype:
2413+
tensor = tensor.to(dtype)
24052414
if name not in utils.offload_index:
24062415
accelerate.utils.offload_weight(tensor, name, "accelerate-disk-cache", index=utils.offload_index)
24072416
accelerate.utils.save_offload_index(utils.offload_index, "accelerate-disk-cache")
@@ -2574,7 +2583,7 @@ def new_rebuild_tensor(storage: Union[torch_lazy_loader.LazyTensor, torch.Storag
25742583
if not args.colab or args.savemodel:
25752584
import shutil
25762585
tokenizer.save_pretrained("models/{}".format(vars.model.replace('/', '_')))
2577-
if(vars.fp32_model): # Use save_pretrained to convert fp32 models to fp16
2586+
if(vars.fp32_model and ("breakmodel" not in globals() or not breakmodel.disk_blocks)): # Use save_pretrained to convert fp32 models to fp16, unless we are using disk cache because save_pretrained is not supported in that case
25782587
model = model.half()
25792588
model.save_pretrained("models/{}".format(vars.model.replace('/', '_')), max_shard_size="500MiB")
25802589
else: # For fp16 models, we can just copy the model files directly

0 commit comments

Comments
 (0)