Skip to content

Commit e2d5342

Browse files
author
Judd
committed
add index model
1 parent e97d1bc commit e2d5342

File tree

6 files changed

+122
-0
lines changed

6 files changed

+122
-0
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
1313

1414
**What's New:**
1515

16+
* 2024-06-23: Index
1617
* 2024-06-17: DeepSeek-Coder-V2 (Instruct & Base)
1718
* 2024-06-15: [Tool calling](./docs/tool_calling.md)
1819
* 2024-06-07: Qwen2

convert.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ class ModelType(Enum):
118118

119119
XVERSE = 0x1900
120120

121+
Index = 0x1a00
122+
121123
BCE_Embedding = 0x10000100
122124
BCE_ReRanker = 0x10000101
123125
BGE_M3 = 0x10000102
@@ -3144,6 +3146,26 @@ def get_weight_names(config):
31443146

31453147
return weight_names
31463148

3149+
class IndexConverter(BaseConverter):
3150+
MODEL_TYPE = ModelType.Index
3151+
3152+
@classmethod
3153+
def pp(cls, config, name: str, tensor):
3154+
if name == 'lm_head.weight':
3155+
return nn.Parameter(nn.functional.normalize(tensor)) if config.norm_head else 0
3156+
else:
3157+
return Llama3Converter.pp(config, name, tensor)
3158+
3159+
@staticmethod
3160+
def dump_config(f, config, ggml_type):
3161+
config.rope_theta = 10000.0
3162+
3163+
Llama3Converter.dump_config(f, config, ggml_type)
3164+
3165+
@staticmethod
3166+
def get_weight_names(config):
3167+
return Llama3Converter.get_weight_names(config)
3168+
31473169
def convert_grok_1_base(args, vocab, ggml_type):
31483170
def ffn_size(emb_size, widening_factor):
31493171
_ffn_size = int(widening_factor * emb_size) * 2 // 3
@@ -3476,6 +3498,8 @@ def main():
34763498
DeepSeekV2Converter.MODEL_TYPE = ModelType.DeepSeekV2
34773499
print("DeelseekV2 is not fully supported yet!!!!")
34783500
DeepSeekV2Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
3501+
elif arch == 'IndexForCausalLM':
3502+
IndexConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
34793503
else:
34803504
raise Exception(f'unknown model_type: {arch}')
34813505

docs/models.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
* [x] TigerBot: [Chat-7B](https://huggingface.co/TigerResearch/tigerbot-7b-chat), [Chat-13B](https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5) (`-a TigerBot`)
2020
* [x] CodeFuse-DeepSeek: [33B](https://huggingface.co/codefuse-ai/CodeFuse-DeepSeek-33B) (`-a CodeFuseDeepSeek`)
2121
* [x] MAP-Neo: [Instruct-7B](https://huggingface.co/m-a-p/neo_7b_instruct_v0.1) (`-a MAP-Neo`)
22+
* [*] Index: [Chat-1.9B](https://huggingface.co/IndexTeam/Index-1.9B-Chat), [Character-1.9B](https://huggingface.co/IndexTeam/Index-1.9B-Character)
2223

2324
For other models that using `LlamaForCausalLM` architecture, for example, [aiXcoder-7B](https://huggingface.co/aiXcoder/aixcoder-7b-base), try `-a Yi`.
2425

models.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,8 @@ namespace chatllm
156156

157157
MODEL_TYPE_XVERSE = 0x1900,
158158

159+
MODEL_TYPE_INDEX = 0x1a00,
160+
159161
MODEL_TYPE_BCE_Embedding = 0x10000100,
160162
MODEL_TYPE_BCE_ReRanker = 0x10000101,
161163
MODEL_TYPE_BGE_M3 = 0x10000102,
@@ -308,6 +310,8 @@ namespace chatllm
308310
return "StarCoder2";
309311
case MODEL_TYPE_XVERSE:
310312
return "XVERSE";
313+
case MODEL_TYPE_INDEX:
314+
return "Index";
311315
default:
312316
CHATLLM_THROW << "unknown model type: " << model_type;
313317
return "???";
@@ -1289,6 +1293,11 @@ namespace chatllm
12891293
#include "models/xverse.cpp"
12901294
}
12911295

1296+
namespace index
1297+
{
1298+
#include "models/index.cpp"
1299+
}
1300+
12921301
template <class Config>
12931302
void load_config(ModelLoader &loader, Config &config, const ModelObject::extra_args &args)
12941303
{
@@ -1558,6 +1567,8 @@ namespace chatllm
15581567
\
15591568
CASE(XVERSE, xverse::dense, 1) \
15601569
\
1570+
CASE(INDEX, index, 1) \
1571+
\
15611572
CASE(BCE_Embedding, bce::embedding, 1) \
15621573
CASE(BCE_ReRanker, bce::ranker, 1) \
15631574
CASE(BGE_M3, bge::embedding, 1) \

models/index.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
typedef llama::v3::Config Config;
2+
3+
class ChatHistoryEncoder : public BaseHistoryEncoder
4+
{
5+
public:
6+
void append_sys_prompt(std::vector<int> &ids) const override;
7+
void append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector<int> &ids) const override;
8+
void do_append_user(int round_idx, const std::string &user, std::vector<int> &ids) const override;
9+
};
10+
11+
static ChatHistoryEncoder _chat_encoder;
12+
13+
class Tokenizer : public llama::v2::Tokenizer
14+
{
15+
public:
16+
Tokenizer(const Config &config)
17+
: llama::v2::Tokenizer(config, &_chat_encoder)
18+
{
19+
sys_prompt = "";
20+
resevered_0_token_id = 3;
21+
resevered_1_token_id = 4;
22+
}
23+
public:
24+
int resevered_0_token_id;
25+
int resevered_1_token_id;
26+
};
27+
28+
class ConditionalGeneration : public llama::v3::ConditionalGeneration
29+
{
30+
public:
31+
ConditionalGeneration() = default;
32+
ConditionalGeneration(const Config &config)
33+
: llama::v3::ConditionalGeneration(config, ModelType::MODEL_TYPE_INDEX)
34+
{}
35+
};
36+
37+
void ChatHistoryEncoder::append_sys_prompt(std::vector<int> &ids) const
38+
{
39+
if (tokenizer->get_system_prompt().size() > 0)
40+
{
41+
ids.push_back(tokenizer->pad_token_id);
42+
tokenizer->encode(tokenizer->get_system_prompt(), ids);
43+
}
44+
45+
}
46+
47+
void ChatHistoryEncoder::append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector<int> &ids) const
48+
{
49+
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
50+
do_append_user(round_idx, user, ids);
51+
tok->encode(ai, ids);
52+
}
53+
54+
void ChatHistoryEncoder::do_append_user(int round_idx, const std::string &user, std::vector<int> &ids) const
55+
{
56+
Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
57+
ids.push_back(tok->resevered_0_token_id);
58+
tok->encode(user, ids);
59+
ids.push_back(tok->resevered_1_token_id);
60+
}

scripts/models.json

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,29 @@
11
{
2+
"index": {
3+
"brief": "LLM developed by Bilibili.",
4+
"default": "1.9b-chat",
5+
"license": "https://huggingface.co/IndexTeam/Index-1.9B-Chat/blob/main/LICENSE",
6+
"variants": {
7+
"1.9b-chat": {
8+
"default": "q8",
9+
"quantized": {
10+
"q8": {
11+
"size": 2309982912,
12+
"url": "chatllm_quantized_index/index.bin"
13+
}
14+
}
15+
},
16+
"1.9b-character": {
17+
"default": "q8",
18+
"quantized": {
19+
"q8": {
20+
"size": 2309982912,
21+
"url": "chatllm_quantized_index/index-ch.bin"
22+
}
23+
}
24+
}
25+
}
26+
},
227
"glm-4": {
328
"brief": "GLM-4-9B is the open-source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
429
"default": "9b",

0 commit comments

Comments
 (0)