Skip to content

Commit f2c4381

Browse files
committed
add apriel
1 parent 927130d commit f2c4381

File tree

7 files changed

+130
-3
lines changed

7 files changed

+130
-3
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
1313

1414
**What's New:**
1515

16+
* 2025-04-18: Apriel
1617
* 2025-04-15: GLM-4-0414
1718
* 2025-04-10: LlaMA4 (Language model)
1819
* 2025-03-27: Ling (Bailing)

convert.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1307,12 +1307,13 @@ def dump_config(f, config, ggml_type):
13071307
f.write(struct.pack("i" * len(config_values), *config_values))
13081308
config_values = [
13091309
config.rope_theta,
1310+
config.head_dim,
13101311
config.rope_scaling['original_max_position_embeddings'],
13111312
config.rope_scaling['beta_fast'],
13121313
config.rope_scaling['beta_slow'],
13131314
config.rope_scaling['factor'],
13141315
]
1315-
f.write(struct.pack("<fifff", *config_values))
1316+
f.write(struct.pack("<fiifff", *config_values))
13161317

13171318
@staticmethod
13181319
def get_weight_names(config):

docs/models.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
* Adept Persimmon (`PersimmonForCausalLM`)
66
* [x] [Chat-8B](https://huggingface.co/adept/persimmon-8b-chat)
77

8+
* Apriel (`AprielForCausalLM`)
9+
* [x] [Instruct-5B](https://huggingface.co/ServiceNow-AI/Apriel-5B-Instruct/tree/a9a4831718a2fad437f25ace0d0259953fcaaa26)
10+
811
* Aquila (`AquilaForCausalLM`)
912
* [x] [Chat2-7B](https://huggingface.co/BAAI/AquilaChat2-7B/tree/9905960de19ea9e573c0dc3fbdf54d4ddcc610d3), [Chat2-34B](https://huggingface.co/BAAI/AquilaChat2-34B/commit/5c7990b198c94b63dfbfa022462b9cf672dbcfa0), [Chat2-7B-16K](https://huggingface.co/BAAI/AquilaChat2-7B-16K/commit/fb46d48479d05086ccf6952f19018322fcbb54cd), [Chat2-34B-16K](https://huggingface.co/BAAI/AquilaChat2-34B-16K/tree/9f19774f3e7afad2fc3d51fe308eac5a2d88c8b1)
1013

models/apriel.cpp

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
struct Config : public llama::v3::Config
2+
{
3+
int head_dim;
4+
int rope_scaling_original_max_position_embeddings;
5+
float rope_scaling_beta_fast;
6+
float rope_scaling_beta_slow;
7+
float rope_scaling_factor;
8+
};
9+
10+
class ChatHistoryEncoder : public BaseHistoryEncoder
11+
{
12+
public:
13+
void append_sys_prompt(std::vector<int> &ids) const override
14+
{
15+
std::ostringstream oss;
16+
ids.push_back(tokenizer->bos_token_id);
17+
oss << "<|system|>\n" << tokenizer->get_system_prompt() << "\n<|end|>\n";
18+
tokenizer->encode(oss.str(), ids);
19+
}
20+
void append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const override
21+
{
22+
append_ai_opening(round_idx, ids);
23+
tokenizer->encode(ai, ids);
24+
tokenizer->encode("\n<|end|>\n", ids);
25+
}
26+
27+
void append_user(int round_idx, const std::string &user, std::vector<int> &ids) const override
28+
{
29+
append_user_opening(round_idx, ids);
30+
tokenizer->encode(user, ids);
31+
tokenizer->encode("\n<|end|>\n", ids);
32+
}
33+
34+
void append_ai_opening(int round_idx, std::vector<int> &ids) const override
35+
{
36+
tokenizer->encode("<|assistant|>\n", ids);
37+
}
38+
39+
void append_user_opening(int round_idx, std::vector<int> &ids) const override
40+
{
41+
tokenizer->encode("<|user|>\n", ids);
42+
}
43+
};
44+
45+
static ChatHistoryEncoder _chat_encoder;
46+
47+
class Tokenizer : public BaseTokenizer
48+
{
49+
public:
50+
Tokenizer(const BaseConfig &config)
51+
: Tokenizer(config, &_chat_encoder)
52+
{}
53+
54+
Tokenizer(const BaseConfig &config, BaseHistoryEncoder *encoder)
55+
: BaseTokenizer::BaseTokenizer(config, encoder)
56+
{
57+
sys_prompt = "You are a helpful AI assistant that provides accurate and concise information.";
58+
}
59+
60+
size_t load(tokenizer::DataReader *buffer, int n_vocab) override
61+
{
62+
tp = new tokenizer::BPEProcessor2(
63+
{
64+
"[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
65+
}
66+
);
67+
size_t size = tp->Load(buffer, n_vocab);
68+
69+
return size;
70+
}
71+
};
72+
73+
class ConditionalGeneration : public llama::v2::GenericConditionalGeneration<LlamaBlock>
74+
{
75+
public:
76+
ConditionalGeneration() = default;
77+
ConditionalGeneration(const Config &config, const RuntimeConfig &runtime_config, ModelType type = ModelType::MODEL_TYPE_APRIEL)
78+
: llama::v2::GenericConditionalGeneration<LlamaBlock>(config, runtime_config, type, config.num_key_value_heads, config.head_dim, config.max_length, 12, false)
79+
{
80+
auto transformer = Base::get_typed_transformer<ModelClass2>();
81+
for (int i = 0; i < config.num_hidden_layers; i++)
82+
{
83+
auto &attention = transformer->layers[i].attention;
84+
attention.freq_base = config.rope_theta;
85+
86+
attention.n_original_ctx = config.rope_scaling_original_max_position_embeddings;
87+
attention.beta_fast = config.rope_scaling_beta_fast;
88+
attention.beta_slow = config.rope_scaling_beta_slow;
89+
90+
attention.freq_scale = 1 / config.rope_scaling_factor;
91+
attention.attn_factor = 1.0f;
92+
attention.ext_factor = 1.0f;
93+
}
94+
}
95+
};

scripts/models.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2463,5 +2463,21 @@
24632463
}
24642464
}
24652465
}
2466+
},
2467+
"apriel": {
2468+
"brief": "Apriel is a family of models built for versatility, offering high throughput and efficiency across a wide range of tasks.",
2469+
"default": "5b",
2470+
"license": "MIT",
2471+
"variants": {
2472+
"5b": {
2473+
"default": "q8",
2474+
"quantized": {
2475+
"q8": {
2476+
"size": 5140774832,
2477+
"url": "chatllm_quantized_apriel/apriel-5b.bin"
2478+
}
2479+
}
2480+
}
2481+
}
24662482
}
24672483
}

src/layers.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1565,7 +1565,6 @@ namespace chatllm
15651565
beta_fast(0.0f),
15661566
beta_slow(0.0f),
15671567
rope_dim(head_dim),
1568-
n_ctx(0),
15691568
n_original_ctx(0),
15701569
mrope_sections(nullptr),
15711570
use_rope(true),
@@ -1586,7 +1585,6 @@ namespace chatllm
15861585
beta_fast(0.0f),
15871586
beta_slow(0.0f),
15881587
rope_dim(rope_dim),
1589-
n_ctx(0),
15901588
n_original_ctx(0),
15911589
mrope_sections(nullptr),
15921590
use_rope(true),
@@ -1958,6 +1956,10 @@ namespace chatllm
19581956
LlamaBlock(InitContext *ctx, int hidden_size, int num_attention_heads, int intermediate_size, int num_kv_heads, int max_length)
19591957
: LMBlock1(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, max_length)
19601958
{}
1959+
1960+
LlamaBlock(InitContext *ctx, int hidden_size, int num_attention_heads, int intermediate_size, int num_kv_heads, int head_dim, int max_length)
1961+
: LMBlock1(ctx, hidden_size, num_attention_heads, intermediate_size, num_kv_heads, head_dim, max_length)
1962+
{}
19611963
};
19621964

19631965
class Llama31SelfAttention : public RoPESelfAttention<BaseAttention>

src/models.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,8 @@ namespace chatllm
343343

344344
MODEL_TYPE_SOLARPRO = 0x2300,
345345

346+
MODEL_TYPE_APRIEL = 0x2400,
347+
346348
MODEL_TYPE_BCE_Embedding = 0x10000100,
347349
MODEL_TYPE_BCE_ReRanker = 0x10000101,
348350
MODEL_TYPE_BGE_M3 = 0x10000102,
@@ -2018,6 +2020,11 @@ namespace chatllm
20182020
#include "../models/kimi.cpp"
20192021
}
20202022

2023+
namespace apriel
2024+
{
2025+
#include "../models/apriel.cpp"
2026+
}
2027+
20212028
template <class Config>
20222029
void load_config(ModelLoader &loader, Config &config, const ModelObject::extra_args &args)
20232030
{
@@ -2407,6 +2414,8 @@ namespace chatllm
24072414
\
24082415
CASE(KIMI_VL, kimi::vl, 1) \
24092416
\
2417+
CASE(APRIEL, apriel, 1) \
2418+
\
24102419
CASE(BCE_Embedding, bce::embedding, 1) \
24112420
CASE(BCE_ReRanker, bce::ranker, 1) \
24122421
CASE(BGE_M3, bge::embedding, 1) \

0 commit comments

Comments
 (0)