add Seed-Coder

foldl · foldl · commit bf4af66453d6 · 2025-05-11T10:45:47.000+08:00
diff --git a/README.md b/README.md
@@ -13,6 +13,7 @@ pure C++ implementation based on [@ggerganov](https://github.com/ggerganov)'s [g
 
 **What's New:**
 
+* 2025-05-11: Seed-Coder
 * 2025-04-30: QWen3, MiMo
 * 2025-02-17: [ggml updated](https://github.com/ggml-org/llama.cpp/tree/658987cfc9d752dca7758987390d5fb1a7a0a54a) again
 * 2025-04-18: Apriel
diff --git a/docs/models.md b/docs/models.md
@@ -128,6 +128,8 @@
     * [x] Reke-Flash: [Flash-3](https://huggingface.co/RekaAI/reka-flash-3/tree/69cea64942e4db4809b757ae2b0d312b4b610263) (`-a Reka-Flash-3`)
     * [x] Nemotron: [Llama-3.1-Nemotron-Nano-8B](https://huggingface.co/nvidia/Llama-3.1-Nemotron-Nano-8B-v1/tree/42f62a403ee352e019834442673256e3fe3de275)
     * [x] LlaMA-4: [Scout-Instruct](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct/tree/7dab2f5f854fe665b6b2f1eccbd3c48e5f627ad8), [Maverick-Instruct](https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct/tree/f0ee6477b90b7a6aaefd2cfdf1b4a05d36184137)
+    * [x] Seed-Coder: [Instruct-8B](https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct/tree/7b934eb8f2ce8f40191fa26d12236eb8bc3a77aa),
+    [Reasoning-8B](https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning/tree/47f18f877ea7d6aa5c0d87a474f30d420b22bd98) (`--name Seed-Coder`)
 
     For other models that using `LlamaForCausalLM` architecture, for example, [aiXcoder-7B](https://huggingface.co/aiXcoder/aixcoder-7b-base), try `-a Yi`.
 
@@ -256,7 +258,7 @@ Please use `--format completion` for these models.
 
 * LlaMA-like (`LlamaForCausalLM`):
     * [x] DeepSeek: [Coder-Base-1.3B](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base) (`-a DeepSeekCoder`), [Coder-Base-6.7B](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-base) (`-a DeepSeekCoder`)
-
+    * [x] Seed-Coder: [Base-8B](https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base/tree/44d3e28414b052f1fc8f9b58740edd2d8f2c2c31) (`--name Seed-Coder`)
 
 * Mistral (`MistralForCausalLM`, `MixtralForCausalLM`)
     * [x] Mistral: [Base-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1), [Base-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3)
diff --git a/models/llama.cpp b/models/llama.cpp
@@ -195,11 +195,23 @@ namespace v3
         {}
 
         Tokenizer(const BaseConfig &config, BaseHistoryEncoder *encoder)
-            : BaseTokenizer::BaseTokenizer(config, encoder)
+            : BaseTokenizer::BaseTokenizer(config, encoder), is_seed_coder(false)
         {
             sys_prompt = "";
         }
 
+        bool load_config(const json::JSON &config) override
+        {
+            auto name = config["model_name"];
+            if (name.IsString())
+            {
+                is_seed_coder = name.ToString() == "Seed-Coder";
+                if (is_seed_coder)
+                    sys_prompt = "You are an AI programming assistant, utilizing the Seed-Coder model, developed by ByteDance Seed, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\n\n";
+            }
+            return BaseTokenizer::load_config(config);
+        }
+
         size_t load(tokenizer::DataReader *buffer, int n_vocab) override
         {
             tp = new tokenizer::BPEProcessor2(
@@ -230,24 +242,37 @@ namespace v3
 
         void encode_header(const std::string &text, std::vector<int> &ids) const
         {
-            ids.push_back(start_header_id);
-            encode(text, ids);
-            ids.push_back(end_header_id);
-            ids.push_back(nl_token_id);
-            ids.push_back(nl_token_id);
+            // compatible with Seed-Coder
+            if (start_header_id >= 0)
+            {
+                ids.push_back(start_header_id);
+                encode(text, ids);
+                ids.push_back(end_header_id);
+                ids.push_back(nl_token_id);
+                ids.push_back(nl_token_id);
+            }
+            else
+            {
+                ids.push_back(bos_token_id);
+                encode(text, ids);
+                ids.push_back(nl_token_id);
+            }
         }
 
         void encode_content(const std::string &text, std::vector<int> &ids) const
         {
             encode(text, ids);
-            ids.push_back(eot_id);
+            ids.push_back(eot_id >= 0 ? eot_id : eos_token_id);
         }
 
     public:
         int start_header_id;
         int end_header_id;
         int eot_id;
         int nl_token_id;
+
+    private:
+        bool is_seed_coder;
     };
 
     void ChatHistoryEncoder::append_ai(int round_idx, const std::string &ai, std::vector<int> &ids) const
diff --git a/scripts/models.json b/scripts/models.json
@@ -2590,5 +2590,39 @@
                 }
             }
         }
+    },
+    "seed-coder": {
+        "brief": "Seed-Coder, a powerful, transparent, and parameter-efficient family of open-source code models at the 8B scale, featuring base, instruct, and reasoning variants.",
+        "default": "8b-it",
+        "license": "MIT",
+        "variants": {
+            "8b-it": {
+                "default": "q8",
+                "quantized": {
+                    "q8": {
+                        "size": 8771479824,
+                        "url": "chatllm_quantized_seed-coder/seed-coder-8b-it.bin"
+                    }
+                }
+            },
+            "8b-base": {
+                "default": "q8",
+                "quantized": {
+                    "q8": {
+                        "size": 8771479696,
+                        "url": "chatllm_quantized_seed-coder/seed-coder-8b-base.bin"
+                    }
+                }
+            },
+            "8b-reasoning": {
+                "default": "q8",
+                "quantized": {
+                    "q8": {
+                        "size": 8771479856,
+                        "url": "chatllm_quantized_seed-coder/seed-coder-8b-reasoning.bin"
+                    }
+                }
+            }
+        }
     }
 }
diff --git a/src/models.cpp b/src/models.cpp
@@ -2257,6 +2257,7 @@ namespace chatllm
         oss << "Model name  : " << loader.model_name;
         if (loader.model_native_name.size() > 0)
             oss << " (" << loader.model_native_name << ")";
+        oss << " (" << std::hex << std::setw(8) << std::setfill('0') << model_type << ")";
         oss << std::endl;
 
         oss << "Model type  : " << to_string(purpose);