NVIDIA
diff --git a/‎bionemo-recipes/recipes/llama3/example_small_llama_checkpoint/README.md‎
Lines changed: 35 additions & 0 deletions b/‎bionemo-recipes/recipes/llama3/example_small_llama_checkpoint/README.md‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎bionemo-recipes/recipes/llama3/example_small_llama_checkpoint/config.json‎
Lines changed: 26 additions & 0 deletions b/‎bionemo-recipes/recipes/llama3/example_small_llama_checkpoint/config.json‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎bionemo-recipes/recipes/llama3/example_small_llama_checkpoint/special_tokens_map.json‎
Lines changed: 6 additions & 0 deletions b/‎bionemo-recipes/recipes/llama3/example_small_llama_checkpoint/special_tokens_map.json‎
Lines changed: 6 additions & 0 deletions
@@ -0,0 +1,35 @@
+# Example Small Llama3 Checkpoint
+
+This directory contains the model and tokenizer configuration for a small Llama3 model (~10M parameters) optimized for genomic sequences. This checkpoint is designed for testing and development purposes, allowing unit tests to run without requiring external paths or complex configuration.
+
+## Contents
+
+- **config.json**: Model configuration for a small Llama3 model (4 layers, 2048 hidden size)
+- **tokenizer.json**: Fast tokenizer for nucleotide sequences (256 vocab size)
+- **tokenizer_config.json**: Tokenizer configuration
+- **special_tokens_map.json**: Special tokens mapping (EOS=0, PAD=1, BOS=2, UNK=3)
+
+## Usage
+
+Use this directory as the `model_tag` in your training configurations:
+
+```yaml
+# In your hydra config
+model_tag: ./example_small_llama_checkpoint
+
+dataset:
+  tokenizer_path: ./example_small_llama_checkpoint  # Same directory for tokenizer
+```
+
+This eliminates the need for absolute paths and makes configurations portable across different environments.
+
+## Model Parameters
+
+- Layers: 4
+- Hidden size: 2048
+- Attention heads: 16
+- Intermediate size: 8192
+- Vocabulary size: 256 (nucleotide tokenizer)
+- Max position embeddings: 8192
+
+
@@ -0,0 +1,26 @@
+{
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "eos_token_id": 0,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 4,
+  "num_key_value_heads": 16,
+  "pad_token_id": 1,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "vocab_size": 256
+}
@@ -0,0 +1,6 @@
+{
+  "bos_token": "<BOS>",
+  "eos_token": "<EOS>",
+  "pad_token": "<PAD>",
+  "unk_token": "<UNK>"
+}