hf : add basic huggingface model example

danbev · danbev · commit aba54e94f103 · 2025-10-29T08:07:07.000+01:00
This example is very basic and exists mainly to help me understand the
huggingface transformers that we convert in llama.cpp.
diff --git a/fundamentals/huggingface/.gitignore b/fundamentals/huggingface/.gitignore
@@ -0,0 +1,2 @@
+venv
+__pycache__
diff --git a/fundamentals/huggingface/create_save_weights.py b/fundamentals/huggingface/create_save_weights.py
@@ -0,0 +1,14 @@
+from model.configuration_model import ModelConfig
+from model.modeling_model      import ModelLM
+
+config = ModelConfig(vocab_size=100, hidden_size=32, num_hidden_layers=2, tie_word_embeddings=True)
+config.auto_map = {
+    "AutoConfig": "configuration_model.ModelConfig",
+    "AutoModel": "modeling_model.ModelLM",
+    "AutoModelForCausalLM": "modeling_model.ModelLM"
+}
+model = ModelLM(config)
+
+model.save_pretrained("model", safe_serialization=True)
+print("Saved to ./model")
+
diff --git a/fundamentals/huggingface/model/config.json b/fundamentals/huggingface/model/config.json
@@ -0,0 +1,18 @@
+{
+  "architectures": [
+    "ModelLM"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_model.ModelConfig",
+    "AutoModel": "modeling_model.ModelLM",
+    "AutoModelForCausalLM": "modeling_model.ModelLM"
+  },
+  "dtype": "float32",
+  "hidden_size": 32,
+  "model_type": "model",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 2,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "vocab_size": 100
+}
diff --git a/fundamentals/huggingface/model/configuration_model.py b/fundamentals/huggingface/model/configuration_model.py
@@ -0,0 +1,21 @@
+from transformers import PretrainedConfig
+
+class ModelConfig(PretrainedConfig):
+    model_type = "model"
+
+    def __init__(
+        self,
+        vocab_size=100,
+        hidden_size=32,
+        num_hidden_layers=2,
+        num_attention_heads=4,
+        tie_word_embeddings=False,
+        **kwargs
+    ):
+        super().__init__(tie_word_embeddings=tie_word_embeddings, **kwargs)
+
+
+        self.vocab_size = vocab_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
diff --git a/fundamentals/huggingface/model/model.safetensors b/fundamentals/huggingface/model/model.safetensors
diff --git a/fundamentals/huggingface/model/modeling_model.py b/fundamentals/huggingface/model/modeling_model.py
@@ -0,0 +1,78 @@
+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel
+from transformers.modeling_outputs import CausalLMOutput
+from .configuration_model import ModelConfig
+
+class ModelLM(PreTrainedModel):
+    config_class = ModelConfig
+    base_model_prefix = "backbone"
+    ## Use the same tensor for input embeddings and output embeddings
+    _tied_weights_keys = ["lm_head.weight", "backbone.embed.weight"]
+
+
+    def __init__(self, config: ModelConfig):
+        super().__init__(config)
+
+        self.backbone = nn.Module()
+        self.backbone.embed = nn.Embedding(config.vocab_size, config.hidden_size)
+        self.backbone.mlp = nn.Sequential(
+            nn.Linear(config.hidden_size, config.hidden_size),
+            nn.Tanh(),
+        )
+
+        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
+
+        self.post_init()
+
+    def get_input_embeddings(self):
+        return self.backbone.embed
+
+    def set_input_embeddings(self, value):
+        self.backbone.embed = value
+
+    def get_output_embeddings(self):
+        return self.lm_head
+
+    def set_output_embeddings(self, new_emb):
+        self.lm_head = new_emb
+
+    def tie_weights(self):
+        out_emb = self.get_output_embeddings()   # lm_head (Linear)
+        in_emb  = self.get_input_embeddings()    # Embedding
+
+        # If either side is missing, do nothing
+        if out_emb is None or in_emb is None:
+            return
+
+        out_w = out_emb.weight
+        in_w  = in_emb.weight
+
+        if in_w.device.type == "meta" and out_w.device.type != "meta":
+            # IMPORTANT: rebind the Parameter, not just copy data
+            in_emb.weight = out_w
+            return
+
+        if out_w.device.type == "meta" and in_w.device.type != "meta":
+            out_emb.weight = in_w
+            return
+
+        # Default HF behavior (ties by reference or clones as needed)
+        self._tie_or_clone_weights(out_emb, in_emb)
+
+
+    def forward(self, input_ids=None, labels=None, **kwargs):
+        # input_ids: (batch, seq_len)
+        x = self.backbone.embed(input_ids)       # (B, T, H)
+        x = self.backbone.mlp(x)                 # (B, T, H)
+        logits = self.lm_head(x)                 # (B, T, V)
+
+        loss = None
+        if labels is not None:
+            # classic language-model loss with next-token prediction
+            shift_logits = logits[:, :-1, :].contiguous()
+            shift_labels = labels[:, 1:].contiguous()
+            loss = nn.CrossEntropyLoss()(shift_logits.view(-1, self.config.vocab_size),
+                                         shift_labels.view(-1))
+        return CausalLMOutput(loss=loss, logits=logits)
+
diff --git a/fundamentals/huggingface/test.py b/fundamentals/huggingface/test.py
@@ -0,0 +1,26 @@
+import torch
+from transformers import AutoConfig, AutoModelForCausalLM
+
+path = "./model"
+
+config = AutoConfig.from_pretrained(path, trust_remote_code=True)
+print("Loaded config:", config)
+
+model = AutoModelForCausalLM.from_pretrained(
+    path,
+    trust_remote_code=True,
+    low_cpu_mem_usage=False,
+    device_map=None,
+    dtype=torch.float32,
+)
+bad = [n for n,p in model.named_parameters() if p.device.type == "meta"]
+bad += [f"(buffer) {n}" for n,b in model.named_buffers() if b.device.type == "meta"]
+print("Left on meta:", bad)
+
+model.eval()
+
+input_ids = torch.randint(0, config.vocab_size, (1, 8))
+out = model(input_ids)
+print("Logits shape:", out.logits.shape)  # (1, 8, vocab_size)
+
+