diff --git a/clip.hpp b/clip.hpp index 2307ee3c..913ef4c6 100644 --- a/clip.hpp +++ b/clip.hpp @@ -6,7 +6,7 @@ /*================================================== CLIPTokenizer ===================================================*/ -std::pair, std::string> extract_and_remove_lora(std::string text) { +static inline std::pair, std::string> extract_and_remove_lora(std::string text) { std::regex re("]+)>"); std::smatch matches; std::unordered_map filename2multiplier; @@ -31,7 +31,7 @@ std::pair, std::string> extract_and_remov return std::make_pair(filename2multiplier, text); } -std::vector> bytes_to_unicode() { +static inline std::vector> bytes_to_unicode() { std::vector> byte_unicode_pairs; std::set byte_set; for (int b = static_cast('!'); b <= static_cast('~'); ++b) { diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index d0604044..6576aa30 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -840,7 +840,7 @@ int main(int argc, const char* argv[]) { } if (params.mode == CONVERT) { - bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype); + bool success = convert(params.model_path.c_str(), params.vae_path.c_str(), params.output_path.c_str(), params.wtype, params.prompt.c_str(), params.lora_model_dir.c_str()); if (!success) { fprintf(stderr, "convert '%s'/'%s' to '%s' failed\n", @@ -1218,4 +1218,4 @@ int main(int argc, const char* argv[]) { free(input_image_buffer); return 0; -} \ No newline at end of file +} diff --git a/lora.hpp b/lora.hpp index ee14bce2..3fa606a2 100644 --- a/lora.hpp +++ b/lora.hpp @@ -247,7 +247,7 @@ struct LoraModel : public GGMLRunner { std::set applied_lora_tensors; for (auto it : model_tensors) { std::string k_tensor = it.first; - struct ggml_tensor* weight = model_tensors[it.first]; + struct ggml_tensor* weight = it.second; std::vector keys = to_lora_keys(k_tensor, version); if (keys.size() == 0) diff --git a/model.cpp b/model.cpp index 24da39f6..e42cd0c1 100644 --- a/model.cpp +++ b/model.cpp @@ -10,6 +10,8 @@ #include "stable-diffusion.h" #include "util.h" #include "vocab.hpp" +#include "clip.hpp" +#include "lora.hpp" #include "ggml-alloc.h" #include "ggml-backend.h" @@ -1977,7 +1979,7 @@ bool ModelLoader::tensor_should_be_converted(const TensorStorage& tensor_storage return false; } -bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type) { +bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type, const std::unordered_map& loras) { auto backend = ggml_backend_cpu_init(); size_t mem_size = 1 * 1024 * 1024; // for padding mem_size += tensor_storages.size() * ggml_tensor_overhead(); @@ -1987,6 +1989,9 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type gguf_context* gguf_ctx = gguf_init_empty(); + // lora lookup table + std::map tensors; + auto on_new_tensor_cb = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) -> bool { const std::string& name = tensor_storage.name; @@ -2012,19 +2017,44 @@ bool ModelLoader::save_to_gguf_file(const std::string& file_path, ggml_type type gguf_add_tensor(gguf_ctx, tensor); + tensors[name] = tensor; + return true; }; - bool success = load_tensors(on_new_tensor_cb, backend); - ggml_backend_free(backend); + if (!load_tensors(on_new_tensor_cb, backend)) { + ggml_backend_free(backend); + ggml_free(ggml_ctx); + gguf_free(gguf_ctx); + return false; + } + LOG_INFO("load tensors done"); - LOG_INFO("trying to save tensors to %s", file_path.c_str()); - if (success) { - gguf_write_to_file(gguf_ctx, file_path.c_str(), false); + + for (const auto& [lora_path, lora_scale] : loras) { + LoraModel lora(backend, lora_path); + if (!lora.load_from_file()) { + LOG_WARN("load lora tensors from '%s' failed", lora_path.c_str()); + ggml_backend_free(backend); + ggml_free(ggml_ctx); + gguf_free(gguf_ctx); + return false; + } + + lora.multiplier = lora_scale; + lora.apply(tensors, get_sd_version(), 4); + lora.free_params_buffer(); + LOG_INFO("applied '%s':%f", lora_path.c_str(), lora_scale); } + + ggml_backend_free(backend); + + LOG_INFO("trying to save tensors to %s", file_path.c_str()); + gguf_write_to_file(gguf_ctx, file_path.c_str(), false); + ggml_free(ggml_ctx); gguf_free(gguf_ctx); - return success; + return true; } int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) { @@ -2051,7 +2081,7 @@ int64_t ModelLoader::get_params_mem_size(ggml_backend_t backend, ggml_type type) return mem_size; } -bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type) { +bool convert(const char* input_path, const char* vae_path, const char* output_path, sd_type_t output_type, const char* prompt, const char* lora_model_dir) { ModelLoader model_loader; if (!model_loader.init_from_file(input_path)) { @@ -2065,6 +2095,38 @@ bool convert(const char* input_path, const char* vae_path, const char* output_pa return false; } } - bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type); + + // process prompt for loras + std::unordered_map loras; + if (prompt != nullptr && lora_model_dir != nullptr) { + auto result_pair = extract_and_remove_lora(prompt); + std::unordered_map extracted_loras = result_pair.first; + + for (auto& kv : extracted_loras) { + LOG_INFO("lora %s:%.2f", kv.first.c_str(), kv.second); + + // save_to_gguf_file expects file paths + std::string st_file_path = path_join(lora_model_dir, kv.first + ".safetensors"); + std::string ckpt_file_path = path_join(lora_model_dir, kv.first + ".ckpt"); + std::string file_path; + if (file_exists(st_file_path)) { + file_path = st_file_path; + } else if (file_exists(ckpt_file_path)) { + file_path = ckpt_file_path; + } else { + LOG_WARN("can not find %s or %s for lora %s", st_file_path.c_str(), ckpt_file_path.c_str(), kv.first.c_str()); + continue; + } + + LOG_INFO("found at '%s'", file_path.c_str()); + loras[file_path] = kv.second; + } + + if (result_pair.second != "") { + LOG_WARN("unused prompt after lora extraction: '%s'", result_pair.second.c_str()); + } + } + + bool success = model_loader.save_to_gguf_file(output_path, (ggml_type)output_type, loras); return success; } diff --git a/model.h b/model.h index 79c25337..efc1e722 100644 --- a/model.h +++ b/model.h @@ -221,7 +221,7 @@ class ModelLoader { ggml_backend_t backend, std::set ignore_tensors = {}); - bool save_to_gguf_file(const std::string& file_path, ggml_type type); + bool save_to_gguf_file(const std::string& file_path, ggml_type type, const std::unordered_map& loras = {}); bool tensor_should_be_converted(const TensorStorage& tensor_storage, ggml_type type); int64_t get_params_mem_size(ggml_backend_t backend, ggml_type type = GGML_TYPE_COUNT); ~ModelLoader() = default; diff --git a/stable-diffusion.h b/stable-diffusion.h index b4d6fc32..79779f31 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -257,7 +257,7 @@ SD_API void free_upscaler_ctx(upscaler_ctx_t* upscaler_ctx); SD_API sd_image_t upscale(upscaler_ctx_t* upscaler_ctx, sd_image_t input_image, uint32_t upscale_factor); -SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, enum sd_type_t output_type); +SD_API bool convert(const char* input_path, const char* vae_path, const char* output_path, enum sd_type_t output_type, const char* prompt, const char* lora_model_dir); SD_API uint8_t* preprocess_canny(uint8_t* img, int width,