add index model

Judd · Judd · commit e2d5342ba9cb · 2024-06-23T10:36:10.000+08:00
diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
 
 **What's New:**
 
+* 2024-06-23: Index
 * 2024-06-17: DeepSeek-Coder-V2 (Instruct & Base)
 * 2024-06-15: [Tool calling](./docs/tool_calling.md)
 * 2024-06-07: Qwen2
diff --git a/convert.py b/convert.py
@@ -118,6 +118,8 @@ class ModelType(Enum):
 
     XVERSE        = 0x1900
 
+    Index         = 0x1a00
+
     BCE_Embedding = 0x10000100
     BCE_ReRanker  = 0x10000101
     BGE_M3        = 0x10000102
@@ -3144,6 +3146,26 @@ def get_weight_names(config):
 
         return weight_names
 
+class IndexConverter(BaseConverter):
+    MODEL_TYPE = ModelType.Index
+
+    @classmethod
+    def pp(cls, config, name: str, tensor):
+        if name == 'lm_head.weight':
+            return nn.Parameter(nn.functional.normalize(tensor)) if config.norm_head else 0
+        else:
+            return Llama3Converter.pp(config, name, tensor)
+
+    @staticmethod
+    def dump_config(f, config, ggml_type):
+        config.rope_theta = 10000.0
+
+        Llama3Converter.dump_config(f, config, ggml_type)
+
+    @staticmethod
+    def get_weight_names(config):
+        return Llama3Converter.get_weight_names(config)
+
 def convert_grok_1_base(args, vocab, ggml_type):
     def ffn_size(emb_size, widening_factor):
         _ffn_size = int(widening_factor * emb_size) * 2 // 3
@@ -3476,6 +3498,8 @@ def main():
             DeepSeekV2Converter.MODEL_TYPE = ModelType.DeepSeekV2
             print("DeelseekV2 is not fully supported yet!!!!")
         DeepSeekV2Converter.convert(config, model_files, vocab, ggml_type, args.save_path)
+    elif arch == 'IndexForCausalLM':
+        IndexConverter.convert(config, model_files, vocab, ggml_type, args.save_path)
     else:
         raise Exception(f'unknown model_type: {arch}')
 
diff --git a/docs/models.md b/docs/models.md
@@ -19,6 +19,7 @@
     * [x] TigerBot: [Chat-7B](https://huggingface.co/TigerResearch/tigerbot-7b-chat), [Chat-13B](https://huggingface.co/TigerResearch/tigerbot-13b-chat-v5) (`-a TigerBot`)
     * [x] CodeFuse-DeepSeek: [33B](https://huggingface.co/codefuse-ai/CodeFuse-DeepSeek-33B) (`-a CodeFuseDeepSeek`)
     * [x] MAP-Neo: [Instruct-7B](https://huggingface.co/m-a-p/neo_7b_instruct_v0.1) (`-a MAP-Neo`)
+    * [*] Index: [Chat-1.9B](https://huggingface.co/IndexTeam/Index-1.9B-Chat), [Character-1.9B](https://huggingface.co/IndexTeam/Index-1.9B-Character)
 
     For other models that using `LlamaForCausalLM` architecture, for example, [aiXcoder-7B](https://huggingface.co/aiXcoder/aixcoder-7b-base), try `-a Yi`.
 
diff --git a/models.cpp b/models.cpp
@@ -156,6 +156,8 @@ namespace chatllm
 
         MODEL_TYPE_XVERSE           = 0x1900,
 
+        MODEL_TYPE_INDEX            = 0x1a00,
+
         MODEL_TYPE_BCE_Embedding = 0x10000100,
         MODEL_TYPE_BCE_ReRanker  = 0x10000101,
         MODEL_TYPE_BGE_M3        = 0x10000102,
@@ -308,6 +310,8 @@ namespace chatllm
             return "StarCoder2";
         case MODEL_TYPE_XVERSE:
             return "XVERSE";
+        case MODEL_TYPE_INDEX:
+            return "Index";
         default:
             CHATLLM_THROW << "unknown model type: " << model_type;
             return "???";
@@ -1289,6 +1293,11 @@ namespace chatllm
         #include "models/xverse.cpp"
     }
 
+    namespace index
+    {
+        #include "models/index.cpp"
+    }
+
     template <class Config>
     void load_config(ModelLoader &loader, Config &config, const ModelObject::extra_args &args)
     {
@@ -1558,6 +1567,8 @@ namespace chatllm
                                                                 \
         CASE(XVERSE,                xverse::dense, 1)           \
                                                                 \
+        CASE(INDEX,                 index, 1)                   \
+                                                                \
         CASE(BCE_Embedding,         bce::embedding, 1)          \
         CASE(BCE_ReRanker,          bce::ranker, 1)             \
         CASE(BGE_M3,                bge::embedding, 1)          \
diff --git a/models/index.cpp b/models/index.cpp
@@ -0,0 +1,60 @@
+typedef llama::v3::Config Config;
+
+class ChatHistoryEncoder : public BaseHistoryEncoder
+{
+public:
+    void append_sys_prompt(std::vector<int> &ids) const override;
+    void append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector<int> &ids) const override;
+    void do_append_user(int round_idx, const std::string &user, std::vector<int> &ids) const override;
+};
+
+static ChatHistoryEncoder _chat_encoder;
+
+class Tokenizer : public llama::v2::Tokenizer
+{
+public:
+    Tokenizer(const Config &config)
+            : llama::v2::Tokenizer(config, &_chat_encoder)
+    {
+        sys_prompt = "";
+        resevered_0_token_id = 3;
+        resevered_1_token_id = 4;
+    }
+public:
+    int resevered_0_token_id;
+    int resevered_1_token_id;
+};
+
+class ConditionalGeneration : public llama::v3::ConditionalGeneration
+{
+public:
+    ConditionalGeneration() = default;
+    ConditionalGeneration(const Config &config)
+        : llama::v3::ConditionalGeneration(config, ModelType::MODEL_TYPE_INDEX)
+    {}
+};
+
+void ChatHistoryEncoder::append_sys_prompt(std::vector<int> &ids) const
+{
+    if (tokenizer->get_system_prompt().size() > 0)
+    {
+        ids.push_back(tokenizer->pad_token_id);
+        tokenizer->encode(tokenizer->get_system_prompt(), ids);
+    }
+
+}
+
+void ChatHistoryEncoder::append_pair(int round_idx, const std::string &user, const std::string &ai, std::vector<int> &ids) const
+{
+    Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
+    do_append_user(round_idx, user, ids);
+    tok->encode(ai, ids);
+}
+
+void ChatHistoryEncoder::do_append_user(int round_idx, const std::string &user, std::vector<int> &ids) const
+{
+    Tokenizer *tok = dynamic_cast<Tokenizer *>(tokenizer);
+    ids.push_back(tok->resevered_0_token_id);
+    tok->encode(user, ids);
+    ids.push_back(tok->resevered_1_token_id);
+}
diff --git a/scripts/models.json b/scripts/models.json
@@ -1,4 +1,29 @@
 {
+    "index": {
+        "brief": "LLM developed by Bilibili.",
+        "default": "1.9b-chat",
+        "license": "https://huggingface.co/IndexTeam/Index-1.9B-Chat/blob/main/LICENSE",
+        "variants": {
+            "1.9b-chat": {
+                "default": "q8",
+                "quantized": {
+                    "q8": {
+                        "size": 2309982912,
+                        "url": "chatllm_quantized_index/index.bin"
+                    }
+                }
+            },
+            "1.9b-character": {
+                "default": "q8",
+                "quantized": {
+                    "q8": {
+                        "size": 2309982912,
+                        "url": "chatllm_quantized_index/index-ch.bin"
+                    }
+                }
+            }
+        }
+    },
     "glm-4": {
         "brief": "GLM-4-9B is the open-source version of the latest generation of pre-trained models in the GLM-4 series launched by Zhipu AI.",
         "default": "9b",