From 42c90d21ca0fb92c086e57ba5f7fa1caeeae2bd1 Mon Sep 17 00:00:00 2001 From: wheelspawn Date: Thu, 13 Jun 2024 14:37:23 -0500 Subject: [PATCH 1/6] fix issue #7924 --- convert-hf-to-gguf.py | 81 ++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 48 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 025405a2c6ce1..af4378e2f835e 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- from __future__ import annotations @@ -47,12 +46,11 @@ class Model: _model_classes: dict[str, type[Model]] = {} dir_model: Path - ftype: gguf.LlamaFileType + ftype: int is_big_endian: bool endianess: gguf.GGUFEndian use_temp_file: bool lazy: bool - model_name: str | None part_names: list[str] is_safetensors: bool hparams: dict[str, Any] @@ -65,7 +63,7 @@ class Model: # subclasses should define this! model_arch: gguf.MODEL_ARCH - def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None): + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") self.dir_model = dir_model @@ -74,11 +72,10 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.use_temp_file = use_temp_file self.lazy = not eager - self.model_name = model_name - self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors") + self.part_names = Model.get_model_part_names(self.dir_model, ".safetensors") self.is_safetensors = len(self.part_names) > 0 if not self.is_safetensors: - self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") + self.part_names = Model.get_model_part_names(self.dir_model, ".bin") self.hparams = Model.load_hparams(self.dir_model) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @@ -96,7 +93,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, ftype_lw: str = ftype_up.lower() # allow templating the file name with the output ftype, useful with the "auto" ftype self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up) - self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) + self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) @classmethod def __init_subclass__(cls): @@ -140,7 +137,7 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]: from safetensors import safe_open ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu")) else: - ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True)) + ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=False, weights_only=True)) with ctx as model_part: tensor_names_from_parts.update(model_part.keys()) @@ -177,14 +174,14 @@ def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, bid: int | return False return name == (key_name + suffix) - def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str: + def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias", ".beta", ".gamma")) -> str: new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes) if new_name is None: raise ValueError(f"Can not map tensor {name!r}") return new_name def set_gguf_parameters(self): - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_block_count(self.block_count) if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None: @@ -248,6 +245,9 @@ def write_tensors(self): # we don't need these if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): continue + + if name.startswith("bert."): + name = name.removeprefix("bert.") old_dtype = data_torch.dtype @@ -261,7 +261,7 @@ def write_tensors(self): if part.isdecimal(): bid = int(part) break - + for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)): data: np.ndarray = data # type hint n_dims = len(data.shape) @@ -326,21 +326,21 @@ def write_tensors(self): def write(self): self.write_tensors() - self.gguf_writer.write_header_to_file(self.fname_out) + self.gguf_writer.write_header_to_file() self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_tensors_to_file(progress=True) self.gguf_writer.close() def write_vocab(self): - self.gguf_writer.write_header_to_file(self.fname_out) + self.gguf_writer.write_header_to_file() self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() @staticmethod - def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]: + def get_model_part_names(dir_model: Path, suffix: str) -> list[str]: part_names: list[str] = [] for filename in os.listdir(dir_model): - if filename.startswith(prefix) and filename.endswith(suffix): + if filename.endswith(suffix): part_names.append(filename) part_names.sort() @@ -423,9 +423,6 @@ def get_vocab_base_pre(self, tokenizer) -> str: # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! - if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": - # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B - res = "llama-bpe" if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754": # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base res = "deepseek-llm" @@ -435,6 +432,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed": # ref: https://huggingface.co/tiiuae/falcon-7b res = "falcon" + if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": + # ref: https://huggingface.co/google-bert/bert-base-uncased + res = "bert" if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": # ref: https://huggingface.co/BAAI/bge-small-en-v1.5 res = "bert-bge" @@ -453,18 +453,12 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff": # ref: https://huggingface.co/smallcloudai/Refact-1_6-base res = "refact" - if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8": - # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01 - res = "command-r" if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea": # ref: https://huggingface.co/Qwen/Qwen1.5-7B res = "qwen2" if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166": # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf res = "olmo" - if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e": - # ref: https://huggingface.co/databricks/dbrx-base - res = "dbrx" if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en res = "jina-v2-en" @@ -477,9 +471,6 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d": # ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct res = "smaug-bpe" - if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a": - # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code - res = "jina-v2-code" if res is None: logger.warning("\n") @@ -667,7 +658,7 @@ class GPTNeoXModel(Model): def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -800,7 +791,7 @@ def set_vocab(self): def set_gguf_parameters(self): block_count = self.hparams["n_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_context_length(self.hparams["max_seq_len"]) self.gguf_writer.add_embedding_length(self.hparams["d_model"]) self.gguf_writer.add_block_count(block_count) @@ -852,7 +843,7 @@ def set_gguf_parameters(self): raise ValueError("gguf: can not find ctx length parameter.") self.gguf_writer.add_file_type(self.ftype) - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) @@ -889,7 +880,7 @@ def set_gguf_parameters(self): else: raise ValueError("gguf: can not find ctx length parameter.") - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) @@ -1012,7 +1003,7 @@ def set_gguf_parameters(self): else: raise ValueError("gguf: can not find ctx length parameter.") - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) @@ -1208,7 +1199,7 @@ def set_gguf_parameters(self): hparams = self.hparams block_count = hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -1683,7 +1674,7 @@ class GPT2Model(Model): model_arch = gguf.MODEL_ARCH.GPT2 def set_gguf_parameters(self): - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_block_count(self.hparams["n_layer"]) self.gguf_writer.add_context_length(self.hparams["n_ctx"]) self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) @@ -2193,7 +2184,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter del bid # unused # we are only using BERT for embeddings so we don't need the pooling layer - if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"): + if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias") or "cls." in name: return [] # we don't need these return [(self.map_tensor_name(name), data_torch)] @@ -2250,7 +2241,7 @@ def set_gguf_parameters(self): hparams = self.hparams block_count = hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -2350,7 +2341,7 @@ def set_gguf_parameters(self): # Fail early for models which don't have a block expansion factor of 2 assert d_inner == 2 * d_model - self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) + self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default self.gguf_writer.add_embedding_length(d_model) self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading @@ -2457,13 +2448,11 @@ def __init__(self, *args, **kwargs): def get_tensors(self): for name, data in super().get_tensors(): - if 'gated_layer' in name: + if 'gated_layers' in name: d1 = data[:self.intermediate_size, :] name1 = name.replace('gated_layers', 'gated_layers_w') - name1 = name1.replace('up_gated_layer', 'gated_layers_v') d2 = data[self.intermediate_size:, :] name2 = name.replace('gated_layers', 'gated_layers_v') - name2 = name2.replace('up_gated_layer', 'gated_layers_w') yield name1, d1 yield name2, d2 continue @@ -2848,13 +2837,8 @@ def main() -> None: hparams = Model.load_hparams(dir_model) with torch.inference_mode(): - try: - model_class = Model.from_model_architecture(hparams["architectures"][0]) - except NotImplementedError: - logger.error(f"Model {hparams['architectures'][0]} is not supported") - sys.exit(1) - - model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name) + model_class = Model.from_model_architecture(hparams["architectures"][0]) + model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy) logger.info("Set model parameters") model_instance.set_gguf_parameters() @@ -2876,3 +2860,4 @@ def main() -> None: if __name__ == '__main__': main() + From edb1cca353d6376899c07457e97ca5c96797cdfc Mon Sep 17 00:00:00 2001 From: wheelspawn Date: Thu, 13 Jun 2024 14:40:43 -0500 Subject: [PATCH 2/6] Revert "fix issue #7924" This reverts commit 42c90d21ca0fb92c086e57ba5f7fa1caeeae2bd1. --- convert-hf-to-gguf.py | 81 +++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 33 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index af4378e2f835e..025405a2c6ce1 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: utf-8 -*- from __future__ import annotations @@ -46,11 +47,12 @@ class Model: _model_classes: dict[str, type[Model]] = {} dir_model: Path - ftype: int + ftype: gguf.LlamaFileType is_big_endian: bool endianess: gguf.GGUFEndian use_temp_file: bool lazy: bool + model_name: str | None part_names: list[str] is_safetensors: bool hparams: dict[str, Any] @@ -63,7 +65,7 @@ class Model: # subclasses should define this! model_arch: gguf.MODEL_ARCH - def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool): + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None): if type(self) is Model: raise TypeError(f"{type(self).__name__!r} should not be directly instantiated") self.dir_model = dir_model @@ -72,10 +74,11 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE self.use_temp_file = use_temp_file self.lazy = not eager - self.part_names = Model.get_model_part_names(self.dir_model, ".safetensors") + self.model_name = model_name + self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors") self.is_safetensors = len(self.part_names) > 0 if not self.is_safetensors: - self.part_names = Model.get_model_part_names(self.dir_model, ".bin") + self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin") self.hparams = Model.load_hparams(self.dir_model) self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"]) self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) @@ -93,7 +96,7 @@ def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, ftype_lw: str = ftype_up.lower() # allow templating the file name with the output ftype, useful with the "auto" ftype self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up) - self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) + self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file) @classmethod def __init_subclass__(cls): @@ -137,7 +140,7 @@ def get_tensors(self) -> Iterator[tuple[str, Tensor]]: from safetensors import safe_open ctx = cast(ContextManager[Any], safe_open(self.dir_model / part_name, framework="pt", device="cpu")) else: - ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=False, weights_only=True)) + ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True)) with ctx as model_part: tensor_names_from_parts.update(model_part.keys()) @@ -174,14 +177,14 @@ def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, bid: int | return False return name == (key_name + suffix) - def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias", ".beta", ".gamma")) -> str: + def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str: new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes) if new_name is None: raise ValueError(f"Can not map tensor {name!r}") return new_name def set_gguf_parameters(self): - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_block_count(self.block_count) if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None: @@ -245,9 +248,6 @@ def write_tensors(self): # we don't need these if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): continue - - if name.startswith("bert."): - name = name.removeprefix("bert.") old_dtype = data_torch.dtype @@ -261,7 +261,7 @@ def write_tensors(self): if part.isdecimal(): bid = int(part) break - + for new_name, data in ((n, d.squeeze().numpy()) for n, d in self.modify_tensors(data_torch, name, bid)): data: np.ndarray = data # type hint n_dims = len(data.shape) @@ -326,21 +326,21 @@ def write_tensors(self): def write(self): self.write_tensors() - self.gguf_writer.write_header_to_file() + self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.write_tensors_to_file(progress=True) self.gguf_writer.close() def write_vocab(self): - self.gguf_writer.write_header_to_file() + self.gguf_writer.write_header_to_file(self.fname_out) self.gguf_writer.write_kv_data_to_file() self.gguf_writer.close() @staticmethod - def get_model_part_names(dir_model: Path, suffix: str) -> list[str]: + def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]: part_names: list[str] = [] for filename in os.listdir(dir_model): - if filename.endswith(suffix): + if filename.startswith(prefix) and filename.endswith(suffix): part_names.append(filename) part_names.sort() @@ -423,6 +423,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: # NOTE: if you get an error here, you need to update the convert-hf-to-gguf-update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! + if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": + # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B + res = "llama-bpe" if chkhsh == "049ecf7629871e3041641907f3de7c733e4dbfdc736f57d882ba0b0845599754": # ref: https://huggingface.co/deepseek-ai/deepseek-llm-7b-base res = "deepseek-llm" @@ -432,9 +435,6 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "8aeee3860c56296a157a1fe2fad249ec40aa59b1bb5709f4ade11c4e6fe652ed": # ref: https://huggingface.co/tiiuae/falcon-7b res = "falcon" - if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": - # ref: https://huggingface.co/google-bert/bert-base-uncased - res = "bert" if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": # ref: https://huggingface.co/BAAI/bge-small-en-v1.5 res = "bert-bge" @@ -453,12 +453,18 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "6221ad2852e85ce96f791f476e0b390cf9b474c9e3d1362f53a24a06dc8220ff": # ref: https://huggingface.co/smallcloudai/Refact-1_6-base res = "refact" + if chkhsh == "9c2227e4dd922002fb81bde4fc02b0483ca4f12911410dee2255e4987644e3f8": + # ref: https://huggingface.co/CohereForAI/c4ai-command-r-v01 + res = "command-r" if chkhsh == "e636dc30a262dcc0d8c323492e32ae2b70728f4df7dfe9737d9f920a282b8aea": # ref: https://huggingface.co/Qwen/Qwen1.5-7B res = "qwen2" if chkhsh == "b6dc8df998e1cfbdc4eac8243701a65afe638679230920b50d6f17d81c098166": # ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf res = "olmo" + if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e": + # ref: https://huggingface.co/databricks/dbrx-base + res = "dbrx" if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f": # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en res = "jina-v2-en" @@ -471,6 +477,9 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "c136ed14d01c2745d4f60a9596ae66800e2b61fa45643e72436041855ad4089d": # ref: https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct res = "smaug-bpe" + if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a": + # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code + res = "jina-v2-code" if res is None: logger.warning("\n") @@ -658,7 +667,7 @@ class GPTNeoXModel(Model): def set_gguf_parameters(self): block_count = self.hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -791,7 +800,7 @@ def set_vocab(self): def set_gguf_parameters(self): block_count = self.hparams["n_layers"] - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(self.hparams["max_seq_len"]) self.gguf_writer.add_embedding_length(self.hparams["d_model"]) self.gguf_writer.add_block_count(block_count) @@ -843,7 +852,7 @@ def set_gguf_parameters(self): raise ValueError("gguf: can not find ctx length parameter.") self.gguf_writer.add_file_type(self.ftype) - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) @@ -880,7 +889,7 @@ def set_gguf_parameters(self): else: raise ValueError("gguf: can not find ctx length parameter.") - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) @@ -1003,7 +1012,7 @@ def set_gguf_parameters(self): else: raise ValueError("gguf: can not find ctx length parameter.") - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_source_hf_repo(hf_repo) self.gguf_writer.add_tensor_data_layout("Meta AI original pth") self.gguf_writer.add_context_length(ctx_length) @@ -1199,7 +1208,7 @@ def set_gguf_parameters(self): hparams = self.hparams block_count = hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -1674,7 +1683,7 @@ class GPT2Model(Model): model_arch = gguf.MODEL_ARCH.GPT2 def set_gguf_parameters(self): - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_block_count(self.hparams["n_layer"]) self.gguf_writer.add_context_length(self.hparams["n_ctx"]) self.gguf_writer.add_embedding_length(self.hparams["n_embd"]) @@ -2184,7 +2193,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter del bid # unused # we are only using BERT for embeddings so we don't need the pooling layer - if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias") or "cls." in name: + if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"): return [] # we don't need these return [(self.map_tensor_name(name), data_torch)] @@ -2241,7 +2250,7 @@ def set_gguf_parameters(self): hparams = self.hparams block_count = hparams["num_hidden_layers"] - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(hparams["max_position_embeddings"]) self.gguf_writer.add_embedding_length(hparams["hidden_size"]) self.gguf_writer.add_block_count(block_count) @@ -2341,7 +2350,7 @@ def set_gguf_parameters(self): # Fail early for models which don't have a block expansion factor of 2 assert d_inner == 2 * d_model - self.gguf_writer.add_name(self.dir_model.name) + self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default self.gguf_writer.add_embedding_length(d_model) self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading @@ -2448,11 +2457,13 @@ def __init__(self, *args, **kwargs): def get_tensors(self): for name, data in super().get_tensors(): - if 'gated_layers' in name: + if 'gated_layer' in name: d1 = data[:self.intermediate_size, :] name1 = name.replace('gated_layers', 'gated_layers_w') + name1 = name1.replace('up_gated_layer', 'gated_layers_v') d2 = data[self.intermediate_size:, :] name2 = name.replace('gated_layers', 'gated_layers_v') + name2 = name2.replace('up_gated_layer', 'gated_layers_w') yield name1, d1 yield name2, d2 continue @@ -2837,8 +2848,13 @@ def main() -> None: hparams = Model.load_hparams(dir_model) with torch.inference_mode(): - model_class = Model.from_model_architecture(hparams["architectures"][0]) - model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy) + try: + model_class = Model.from_model_architecture(hparams["architectures"][0]) + except NotImplementedError: + logger.error(f"Model {hparams['architectures'][0]} is not supported") + sys.exit(1) + + model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name) logger.info("Set model parameters") model_instance.set_gguf_parameters() @@ -2860,4 +2876,3 @@ def main() -> None: if __name__ == '__main__': main() - From f5e2558f3bff083d6d6ecd427799579247889c03 Mon Sep 17 00:00:00 2001 From: wheelspawn Date: Thu, 13 Jun 2024 14:45:46 -0500 Subject: [PATCH 3/6] fix issue #7924 --- convert-hf-to-gguf.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 025405a2c6ce1..51b524ccef70f 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -177,7 +177,7 @@ def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, bid: int | return False return name == (key_name + suffix) - def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str: + def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias", ".beta", ".gamma")) -> str: new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes) if new_name is None: raise ValueError(f"Can not map tensor {name!r}") @@ -248,7 +248,10 @@ def write_tensors(self): # we don't need these if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): continue - + + if name.startswith("bert."): + name = name.removeprefix("bert.") + old_dtype = data_torch.dtype # convert any unsupported data types to float32 @@ -2193,7 +2196,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter del bid # unused # we are only using BERT for embeddings so we don't need the pooling layer - if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias"): + if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias") or "cls." in name: return [] # we don't need these return [(self.map_tensor_name(name), data_torch)] From a0c5a0e82fe78a80915c30278d1ff00c05378f4f Mon Sep 17 00:00:00 2001 From: wheelspawn Date: Thu, 13 Jun 2024 14:48:06 -0500 Subject: [PATCH 4/6] fix line --- convert-hf-to-gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 51b524ccef70f..32bac2c9e3022 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -248,7 +248,7 @@ def write_tensors(self): # we don't need these if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): continue - + if name.startswith("bert."): name = name.removeprefix("bert.") From 7a5d932eaf492dd1fb6115dddf0e9d5657494f48 Mon Sep 17 00:00:00 2001 From: wheelspawn Date: Thu, 13 Jun 2024 21:59:23 -0500 Subject: [PATCH 5/6] move changes from local to BertModel --- convert-hf-to-gguf.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 32bac2c9e3022..0365d57d30ee4 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -177,7 +177,7 @@ def match_model_tensor_name(self, name: str, key: gguf.MODEL_TENSOR, bid: int | return False return name == (key_name + suffix) - def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias", ".beta", ".gamma")) -> str: + def map_tensor_name(self, name: str, try_suffixes: Sequence[str] = (".weight", ".bias")) -> str: new_name = self.tensor_map.get_name(key=name, try_suffixes=try_suffixes) if new_name is None: raise ValueError(f"Can not map tensor {name!r}") @@ -249,9 +249,6 @@ def write_tensors(self): if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")): continue - if name.startswith("bert."): - name = name.removeprefix("bert.") - old_dtype = data_torch.dtype # convert any unsupported data types to float32 @@ -2194,12 +2191,15 @@ def phantom(tok): def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: del bid # unused - + + name = name.removeprefix("bert.") + # we are only using BERT for embeddings so we don't need the pooling layer if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias") or "cls." in name: return [] # we don't need these - - return [(self.map_tensor_name(name), data_torch)] + + try_suffixes = (".weight", ".bias", ".beta", ".gamma") + return [(self.map_tensor_name(name, try_suffixes), data_torch)] @Model.register("NomicBertModel") From da43a545ef4905a5e4f532efff9ecc9c45c82e85 Mon Sep 17 00:00:00 2001 From: wheelspawn Date: Tue, 18 Jun 2024 10:04:58 -0500 Subject: [PATCH 6/6] rename normalization layers --- convert-hf-to-gguf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 0365d57d30ee4..c7a66afaecdfa 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -2197,6 +2197,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter # we are only using BERT for embeddings so we don't need the pooling layer if name in ("embeddings.position_ids", "pooler.dense.weight", "pooler.dense.bias") or "cls." in name: return [] # we don't need these + + name = name.replace("gamma", "weight") + name = name.replace("beta", "bias") try_suffixes = (".weight", ".bias", ".beta", ".gamma") return [(self.map_tensor_name(name, try_suffixes), data_torch)]