Update save.py

danielhanchen · danielhanchen · commit a6e86f43a6c6 · 2025-03-14T07:26:23.000-07:00
diff --git a/unsloth/save.py b/unsloth/save.py
@@ -2218,12 +2218,59 @@ def unsloth_convert_lora_to_ggml_and_save_locally(
 
 
 from .models.loader_utils import get_model_name
-from unsloth_zoo.saving_utils import merge_and_overwrite_lora
+from unsloth_zoo.saving_utils import (
+    merge_and_overwrite_lora,
+    prepare_saving,
+)
 from unsloth_zoo.llama_cpp import (
     install_llama_cpp,
-    convert_to_gguf,
+    convert_to_gguf as _convert_to_gguf,
 )
 
+@torch.inference_mode
+def save_to_gguf_generic(
+    model,
+    save_directory,
+    quantization_type = "Q8_0",
+    repo_id = None,
+    token = None,
+):
+    if token is None and repo_id is not None: token = get_token()
+    if repo_id is not None and token is None:
+        raise RuntimeError("Unsloth: Please specify a token for uploading!")
+
+    if not os.path.exists(os.path.join("llama.cpp", "unsloth_convert_hf_to_gguf.py")):
+        install_llama_cpp(just_clone_repo = True)
+    pass
+
+    metadata = _convert_to_gguf(
+        save_directory,
+        print_output = True,
+        quantization_type = quantization_type,
+    )
+    if repo_id is not None:
+        prepare_saving(
+            model,
+            repo_id,
+            push_to_hub = True,
+            max_shard_size = "50GB",
+            private = True,
+            token = token,
+        )
+    pass
+
+    from huggingface_hub import HfApi
+    api = HfApi(token = token)
+    api.upload_folder(
+        folder_path = save_directory,
+        repo_id = repo_id,
+        repo_type = "model",
+        allow_patterns = ["*.gguf*"],
+    )
+    return metadata
+pass
+
+
 @torch.inference_mode
 def unsloth_generic_save(
     model,
@@ -2467,8 +2514,8 @@ def patch_saving_functions(model, vision = False):
         # Vision only 1 option
         model.push_to_hub_merged     = types.MethodType(unsloth_generic_push_to_hub_merged,     model)
         model.save_pretrained_merged = types.MethodType(unsloth_generic_save_pretrained_merged, model)
-        model.push_to_hub_gguf       = types.MethodType(not_implemented_save,                   model)
-        model.save_pretrained_gguf   = types.MethodType(not_implemented_save,                   model)
+        model.push_to_hub_gguf       = types.MethodType(save_to_gguf_generic,                   model)
+        model.save_pretrained_gguf   = types.MethodType(save_to_gguf_generic,                   model)
     pass
     return model
 pass