diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
index ecd9fdae05..d54aca06b8 100644
--- a/lmdeploy/cli/serve.py
+++ b/lmdeploy/cli/serve.py
@@ -236,7 +236,7 @@ def api_server(args):
                                                    communicator=args.communicator,
                                                    enable_metrics=args.enable_metrics,
                                                    hf_overrides=args.hf_overrides)
-        chat_template_config = get_chat_template(args.chat_template)
+        chat_template_config = get_chat_template(args.chat_template, args.model_path)
 
         from lmdeploy.messages import VisionConfig
         vision_config = VisionConfig(args.vision_max_batch_size)
diff --git a/lmdeploy/cli/utils.py b/lmdeploy/cli/utils.py
index c165b01e1e..91161d3702 100644
--- a/lmdeploy/cli/utils.py
+++ b/lmdeploy/cli/utils.py
@@ -7,6 +7,10 @@
 from collections import defaultdict
 from typing import Any, List
 
+from lmdeploy.utils import get_logger
+
+logger = get_logger('lmdeploy')
+
 
 class DefaultsAndTypesHelpFormatter(argparse.HelpFormatter):
     """Formatter to output default value and type in help information."""
@@ -64,10 +68,12 @@ def get_lora_adapters(adapters: List[str]):
     return output
 
 
-def get_chat_template(chat_template: str):
+def get_chat_template(chat_template: str, model_path: str = None):
     """Get chat template config.
 
-    Args     chat_template(str): it could be a builtin chat template name,     or a chat template json file
+    Args:
+        chat_template(str): it could be a builtin chat template name, or a chat template json file
+        model_path(str): the model path, used to check deprecated chat template names
     """
     import os
 
@@ -76,12 +82,19 @@ def get_chat_template(chat_template: str):
         if os.path.isfile(chat_template):
             return ChatTemplateConfig.from_json(chat_template)
         else:
-            from lmdeploy.model import MODELS
+            from lmdeploy.model import DEPRECATED_CHAT_TEMPLATE_NAMES, MODELS, REMOVED_CHAT_TEMPLATE_NAMES
+            if chat_template in REMOVED_CHAT_TEMPLATE_NAMES:
+                raise ValueError(f"The chat template '{chat_template}' has been removed. "
+                                 f'Please refer to the latest chat templates in '
+                                 f'https://lmdeploy.readthedocs.io/en/latest/advance/chat_template.html')
+            if chat_template in DEPRECATED_CHAT_TEMPLATE_NAMES:
+                logger.warning(f"The chat template '{chat_template}' is deprecated and fallback to hf chat template.")
+                chat_template = 'hf'
             assert chat_template in MODELS.module_dict.keys(), \
                 f"chat template '{chat_template}' is not " \
                 f'registered. The builtin chat templates are: ' \
                 f'{MODELS.module_dict.keys()}'
-            return ChatTemplateConfig(model_name=chat_template)
+            return ChatTemplateConfig(model_name=chat_template, model_path=model_path)
     else:
         return None
 
diff --git a/lmdeploy/model.py b/lmdeploy/model.py
index 7e03d6344b..f2abb12020 100644
--- a/lmdeploy/model.py
+++ b/lmdeploy/model.py
@@ -12,6 +12,53 @@
 logger = get_logger('lmdeploy')
 MODELS = Registry('model', locations=['lmdeploy.model'])
 
+DEPRECATED_CHAT_TEMPLATE_NAMES = [
+    'deepseek-v3',
+    'deepseek-r1',
+    'deepseek-coder',
+    'cogvlm2',
+    'internlm2',
+    'internlm3',
+    'internvl-internlm2',
+    'internvl2-internlm2',
+    'internvl2_5',
+    'internvl-zh-hermes2',
+    'internvl2-phi3',
+    'internvl-phi3',
+    'llama3',
+    'llama3_1',
+    'llama3_2',
+    'llama4',
+    'minicpmv-2d6',
+    'minicpm3',
+    'qwen2d5',
+    'qwen2d5-vl',
+    'qwq_preview',
+    'qwq',
+    'qwen3',
+    'interns1',
+    'intern-s1',
+    'gemma',
+    'yi',
+    'yi-vl',
+    'phi-3',
+    'phi-4',
+    'chatglm3',
+    'glm4',
+    'codegeex4',
+    'molmo',
+]
+REMOVED_CHAT_TEMPLATE_NAMES = [
+    'llama',
+    'wizardlm',
+    'solar',
+    'internlm-xcomposer2',
+    'internlm-xcomposer2d5',
+    'puyu',
+    'ultracm',
+    'ultralm',
+]
+
 
 def random_uuid() -> str:
     """Return a random uuid."""
@@ -51,6 +98,7 @@ class ChatTemplateConfig:
     """  # noqa: E501
 
     model_name: str
+    model_path: Optional[str] = None
     system: Optional[str] = None
     meta_instruction: Optional[str] = None
     eosys: Optional[str] = None
@@ -68,6 +116,13 @@ class ChatTemplateConfig:
     def chat_template(self):
         attrs = {key: value for key, value in dataclasses.asdict(self).items() if value is not None}
         attrs.pop('model_name', None)
+        if self.model_name in REMOVED_CHAT_TEMPLATE_NAMES:
+            logger.warning(f'The builtin chat template {self.model_name} is removed and fallback to base model.')
+            self.model_name = 'base'
+        if self.model_name in DEPRECATED_CHAT_TEMPLATE_NAMES:
+            logger.warning(f'The builtin chat template {self.model_name} is deprecated. '
+                           '"AutoTokenizer.apply_chat_template" is used instead')
+            self.model_name = 'hf'
         if self.model_name in MODELS.module_dict.keys():
             model: BaseModel = MODELS.get(self.model_name)(**attrs)
         else:
@@ -106,7 +161,6 @@ def from_json(cls, file_or_string):
         return cls(**json_data)
 
 
-@MODELS.register_module(name='llama')
 @MODELS.register_module(name='base')
 class BaseModel:
     """Base model."""
@@ -234,54 +288,6 @@ def messages2prompt(self, messages, sequence_start=True, **kwargs):
         return ret
 
 
-@MODELS.register_module(name=['deepseek-v3'])
-class DeepseekV3(BaseChatTemplate):
-
-    def __init__(self, user='<｜User｜>', assistant='<｜Assistant｜>', eoa='<｜end▁of▁sentence｜>', **kwargs):
-        super().__init__(user=user, assistant=assistant, eoa=eoa, **kwargs)
-
-    def get_prompt(self, prompt, sequence_start=True):
-        if sequence_start:
-            return '<｜begin▁of▁sentence｜>' + super().get_prompt(prompt, sequence_start)
-        return super().get_prompt(prompt, sequence_start)
-
-    def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        if sequence_start and not isinstance(messages, str):
-            return '<｜begin▁of▁sentence｜>' + super().messages2prompt(messages, sequence_start, **kwargs)
-        return super().messages2prompt(messages, sequence_start, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'deepseek-v3' in path:
-            return 'deepseek-v3'
-
-
-@MODELS.register_module(name=['deepseek-r1'])
-class DeepseekR1(DeepseekV3):
-
-    def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        if sequence_start and not isinstance(messages, str):
-            return super().messages2prompt(messages, sequence_start, **kwargs) + '<think>\n'
-        return super().messages2prompt(messages, sequence_start, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'deepseek-r1' in path:
-            return 'deepseek-r1'
-
-
 @MODELS.register_module(name='cogvlm')
 class CogVLM(BaseChatTemplate):
     """Chat template of CogVLM model."""
@@ -318,26 +324,6 @@ def match(cls, model_path: str) -> Optional[str]:
             return 'cogvlm'
 
 
-@MODELS.register_module(name='cogvlm2')
-class CogVLM2(CogVLM):
-    """Chat template of CogVLM2 model."""
-
-    def __init__(self, eoa='<|end_of_text|>', stop_words=['<|end_of_text|>'], **kwargs):
-        super().__init__(eoa=eoa, stop_words=stop_words, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'cogvlm2' in path:
-            return 'cogvlm2'
-
-
-@MODELS.register_module(name='wizardlm')
 @MODELS.register_module(name='vicuna')
 class Vicuna(BaseChatTemplate):
     """Chat template of vicuna model."""
@@ -453,232 +439,6 @@ def match(cls, model_path: str) -> Optional[str]:
             return 'internlm'
 
 
-@MODELS.register_module(name='internlm3')
-@MODELS.register_module(name='internlm2')
-class InternLM2Chat7B(InternLMChat7B):
-    """Chat template and generation parameters of InternLM2-Chat-7B."""
-
-    def __init__(self,
-                 system='<|im_start|>system\n',
-                 user='<|im_start|>user\n',
-                 assistant='<|im_start|>assistant\n',
-                 environment='<|im_start|>environment\n',
-                 plugin='<|plugin|>',
-                 interpreter='<|interpreter|>',
-                 eosys='<|im_end|>\n',
-                 eoh='<|im_end|>\n',
-                 eoa='<|im_end|>',
-                 eoenv='<|im_end|>\n',
-                 separator='\n',
-                 stop_words=['<|im_end|>', '<|action_end|>'],
-                 **kwargs):
-        self.plugin = plugin
-        self.interpreter = interpreter
-        self.environment = environment
-        self.eoenv = eoenv
-        super(InternLM2Chat7B, self).__init__(system=system,
-                                              user=user,
-                                              assistant=assistant,
-                                              eosys=eosys,
-                                              eoh=eoh,
-                                              eoa=eoa,
-                                              separator=separator,
-                                              stop_words=stop_words,
-                                              **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'internlm2' in path and ('chat' in path or 'math' in path):
-            return 'internlm2'
-
-        if 'internlm3' in path and ('instruct' in path):
-            return 'internlm3'
-
-    def messages2prompt(self, messages, sequence_start=True, tools=None, **kwargs):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        box_map = dict(user=self.user,
-                       assistant=self.assistant,
-                       system=self.system,
-                       environment=self.environment,
-                       tool=self.environment)
-        eox_map = dict(user=self.eoh,
-                       assistant=self.eoa + self.separator,
-                       system=self.eosys,
-                       environment=self.eoenv,
-                       tool=self.eoenv)
-        name_map = dict(plugin=self.plugin, interpreter=self.interpreter)
-        ret = ''
-        if self.meta_instruction is not None and sequence_start:
-            if len(messages) and messages[0]['role'] != 'system':
-                ret += f'{self.system}{self.meta_instruction}{self.eosys}'
-
-        if tools:
-            tools_prompt = dict(
-                role='system',
-                name='plugin',  # only support internlm2
-                content=json.dumps(tools, ensure_ascii=False))
-            insert_index = 0
-            if messages[0]['role'] == 'system':
-                insert_index = 1
-            messages.insert(insert_index, tools_prompt)
-        for message in messages:
-            role = message['role']
-            content = get_text(message['content'])
-            if role == 'assistant' and message.get('tool_calls', None) is not None:
-                for tool_call in message['tool_calls']:
-                    function = tool_call.get('function', {})
-                    function['name'] = function.get('name', '')
-                    function['parameters'] = function.get('parameters', function.get('arguments', ''))
-                    function.pop('arguments')
-                    if isinstance(function['parameters'], str):
-                        function['parameters'] = json.loads(function['parameters'])
-                    content += f'<|action_start|><|plugin|>\n{json.dumps(function, ensure_ascii=False)}<|action_end|>'
-            if 'name' in message and message['name'] in name_map:
-                begin = box_map[role].strip() + f" name={name_map[message['name']]}\n"
-            else:
-                begin = box_map[role]
-            ret += f'{begin}{content}{eox_map[role]}'
-        if len(messages) and messages[-1]['role'] == 'assistant':
-            return ret[:-len(eox_map['assistant'])]  # prefix of response
-        ret += f'{self.assistant}'
-        return ret
-
-
-@MODELS.register_module(name='internvl-internlm2')
-class InternVLInternLM2Chat(InternLM2Chat7B):
-
-    def __init__(self, meta_instruction='You are an AI assistant whose name is InternLM (书生·浦语).', **kwargs):
-        super().__init__(meta_instruction=meta_instruction, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'internvl' in path and 'v1-5' in path:
-            if 'mini' in path and '4b' in path:
-                # use internvl-phi3 template
-                return None
-            return 'internvl-internlm2'
-
-        if 'chemvlm' in path:
-            return 'internvl-internlm2'
-
-
-@MODELS.register_module(name='internvl2-internlm2')
-class InternVL2InternLM2(InternLM2Chat7B):
-
-    def __init__(self,
-                 meta_instruction='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。',
-                 eosys='<|im_end|>',
-                 eoh='<|im_end|>',
-                 separator='',
-                 stop_words=['<|im_start|>', '<|im_end|>'],
-                 **kwargs):
-        super().__init__(meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         separator=separator,
-                         eoh=eoh,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if ('internvl2' in path and 'internvl2-4b' not in path) or 'mono-internvl' in path:
-            if 'internvl2.5' in path or 'internvl2_5' in path:
-                return None
-            return 'internvl2-internlm2'
-
-
-@MODELS.register_module(name='internvl2_5')
-class InternVL2_5(InternLM2Chat7B):
-
-    def __init__(
-            self,
-            meta_instruction='你是书生·万象，英文名是InternVL，是由上海人工智能实验室、清华大学及多家合作单位联合开发的多模态大语言模型。',  # noqa
-            **kwargs):
-        super().__init__(meta_instruction=meta_instruction, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'internvl2.5' in path or 'internvl2_5' in path or 'internvl3' in path:
-            return 'internvl2_5'
-
-
-@MODELS.register_module(name=['internlm-xcomposer2', 'internlm-xcomposer2d5'])
-class InternLMXComposer2Chat7B(InternLMChat7B):
-    """Chat template and generation parameters of InternLM-XComposer2-7b."""
-
-    def __init__(
-            self,
-            system='[UNUSED_TOKEN_146]system\n',
-            meta_instruction="""You are an AI assistant whose name is InternLM-XComposer (浦语·灵笔).
-- InternLM-XComposer (浦语·灵笔) is a multi-modality conversational language model that is developed by Shanghai AI Laboratory (上海人工智能实验室). It is designed to be helpful, honest, and harmless.
-- InternLM-XComposer (浦语·灵笔) can understand and communicate fluently in the language chosen by the user such as English and 中文.
-- InternLM-XComposer (浦语·灵笔) is capable of comprehending and articulating responses effectively based on the provided image.""",  # noqa
-            user='[UNUSED_TOKEN_146]user\n',
-            assistant='[UNUSED_TOKEN_146]assistant\n',
-            eosys='[UNUSED_TOKEN_145]\n',
-            eoh='[UNUSED_TOKEN_145]\n',
-            eoa='[UNUSED_TOKEN_145]\n',
-            separator='\n',
-            stop_words=['[UNUSED_TOKEN_145]'],
-            **kwargs):
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         user=user,
-                         assistant=assistant,
-                         eosys=eosys,
-                         eoh=eoh,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'internlm' in path and 'xcomposer2' in path:
-            if '2d5' in path:
-                return 'internlm-xcomposer2d5'
-            return 'internlm-xcomposer2'
-
-
 @MODELS.register_module(name='baichuan2')
 class Baichuan2(BaseChatTemplate):
     """Chat template and generation parameters of Baichuan2-7B-Base and
@@ -699,42 +459,6 @@ def match(cls, model_path: str) -> Optional[str]:
             return 'baichuan2'
 
 
-@MODELS.register_module(name='puyu')
-class Puyu(BaseChatTemplate):
-    """Chat template of puyu model.This is only for internal usage in Shanghai
-    AI Laboratory."""
-
-    def __init__(self,
-                 meta_instruction='',
-                 system='',
-                 eosys='',
-                 user='',
-                 eoh='',
-                 assistant='',
-                 eoa='',
-                 stop_words=None,
-                 **kwargs):
-        super().__init__(meta_instruction=meta_instruction,
-                         system=system,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        if 'puyu' in model_path.lower():
-            return 'puyu'
-
-
 @MODELS.register_module(name='llama2')
 class Llama2(BaseChatTemplate):
     """Chat template of LLaMA2 model."""
@@ -772,40 +496,32 @@ def match(cls, model_path: str) -> Optional[str]:
             return 'llama2'
 
 
-@MODELS.register_module(name='llama3')
-class Llama3(BaseChatTemplate):
-    """Chat template of LLaMA3 model."""
+@MODELS.register_module(name='qwen')
+class Qwen7BChat(BaseChatTemplate):
+    """Chat template for Qwen-7B-Chat."""
 
     def __init__(self,
-                 system='<|start_header_id|>system<|end_header_id|>\n\n',
-                 meta_instruction=None,
-                 eosys='<|eot_id|>',
-                 assistant='<|start_header_id|>assistant<|end_header_id|>\n\n',
-                 eoa='<|eot_id|>',
-                 user='<|start_header_id|>user<|end_header_id|>\n\n',
-                 eoh='<|eot_id|>',
-                 stop_words=['<|eot_id|>', '<|end_of_text|>'],
+                 system='<|im_start|>system\n',
+                 meta_instruction='You are a helpful assistant.',
+                 eosys='<|im_end|>\n',
+                 user='<|im_start|>user\n',
+                 eoh='<|im_end|>\n',
+                 assistant='<|im_start|>assistant\n',
+                 eoa='<|im_end|>',
+                 separator='\n',
+                 stop_words=['<|im_end|>'],
                  **kwargs):
         super().__init__(system=system,
                          meta_instruction=meta_instruction,
                          eosys=eosys,
-                         assistant=assistant,
-                         eoa=eoa,
                          user=user,
                          eoh=eoh,
+                         assistant=assistant,
+                         eoa=eoa,
+                         separator=separator,
                          stop_words=stop_words,
                          **kwargs)
 
-    def get_prompt(self, prompt, sequence_start=True):
-        if sequence_start:
-            return '<|begin_of_text|>' + super().get_prompt(prompt, sequence_start)
-        return super().get_prompt(prompt, sequence_start)
-
-    def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        if sequence_start and not isinstance(messages, str):
-            return '<|begin_of_text|>' + super().messages2prompt(messages, sequence_start, **kwargs)
-        return super().messages2prompt(messages, sequence_start, **kwargs)
-
     @classmethod
     def match(cls, model_path: str) -> Optional[str]:
         """Return the model_name that was registered to MODELS.
@@ -813,918 +529,44 @@ def match(cls, model_path: str) -> Optional[str]:
         Args:
             model_path (str): the model path used for matching.
         """
-        # reject InternVL2-Llama3-76B
-        if 'internvl2' in model_path.lower():
-            return None
-        if 'llama-3-' in model_path.lower() or 'llama3-' in model_path.lower():
-            return 'llama3'
-
-
-@MODELS.register_module(name=['llama3_1', 'llama3_2'])
-class Llama3_1(Llama3):
-    """Chat template of LLaMA3.1 model."""
+        model_path = model_path.lower()
+        if 'qwen' in model_path and not any(keyword in model_path for keyword in ('qwen2.5', 'qwq', 'qwen3')):
+            return 'qwen'
 
-    def __init__(
-            self,
-            tool="""# Tool Instructions
-- Always execute python code in messages that you share.
-- When looking for real time information use relevant functions if available else fallback to brave_search
 
+@MODELS.register_module(name='codellama')
+class CodeLlama(Llama2):
 
+    def __init__(self, meta_instruction='', suffix_first=False, stop_words=None, **kwargs):
+        super().__init__(meta_instruction=meta_instruction, stop_words=stop_words, **kwargs)
+        caps = ['completion', 'infilling', 'chat', 'python']
+        assert self.capability in caps, \
+            f'{self.capability} is not supported. ' \
+            f'The supported capabilities are: {caps}'
+        self.meta_instruction = meta_instruction
+        self.suffix_first = suffix_first
+        self.stop_words = stop_words
+        if self.capability == 'infilling':
+            if self.stop_words is None:
+                self.stop_words = ['<EOT>']
 
-You have access to the following functions:
+    def get_prompt(self, prompt, sequence_start=True):
+        if self.capability == 'infilling':
+            return self._infill_prompt(prompt)
+        elif self.capability == 'chat':
+            return super().get_prompt(prompt, sequence_start)
+        else:  # python speicalist
+            return prompt
 
-""",  # noqa
-            eotool="""
-
-If a you choose to call a function ONLY reply in the following format:
-<{start_tag}={function_name}>{parameters}{end_tag}
-where
-
-start_tag => `<function`
-parameters => a JSON dict with the function argument name as key and function argument value as value.
-end_tag => `</function>`
-
-Here is an example,
-<function=example_function_name>{"example_name": "example_value"}</function>
-
-Reminder:
-- Function calls MUST follow the specified format
-- Required parameters MUST be specified
-- Only call one function at a time
-- Put the entire function call reply on one line"
-- Always add your sources when using search results to answer the user query\n\n""",  # noqa
-            knowledge='Cutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\n',
-            meta_instruction='You are a helpful assistant.',
-            ipython='<|start_header_id|>ipython<|end_header_id|>\n\n',
-            eoi='<|eot_id|>',
-            stop_words=['<|eot_id|>', '<|end_of_text|>', '<|eom_id|>'],
-            **kwargs):
-        super().__init__(meta_instruction=meta_instruction, stop_words=stop_words, **kwargs)
-        self.ipython = ipython
-        self.eoi = eoi
-        self.tool = tool
-        self.eotool = eotool
-        self.knowledge = knowledge
-
-    def messages2prompt(self, messages, sequence_start=True, tools=None, **kwargs):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        box_map = dict(user=self.user,
-                       ipython=self.ipython,
-                       tool=self.ipython,
-                       assistant=self.assistant,
-                       system=self.system)
-        eox_map = dict(user=self.eoh,
-                       ipython=self.eoi,
-                       tool=self.eoi,
-                       assistant=self.eoa + self.separator,
-                       system=self.eosys)
-        ret = ''
-        tool_prompt = ''
-        if tools is not None:
-            for tool in tools:
-                tool_prompt += "Use the function '{}' to: {}\n{}\n".format(tool['name'], tool['description'],
-                                                                           json.dumps(tool, ensure_ascii=False))
-        if self.meta_instruction is not None and sequence_start:
-            if len(messages) and messages[0]['role'] != 'system':
-                if tools is None:
-                    ret += f'{self.system}{self.knowledge}{self.meta_instruction}{self.eosys}'
-                else:
-                    ret += f'{self.system}{self.knowledge}{self.tool}{tool_prompt}{self.eotool}{self.meta_instruction}{self.eosys}'  # noqa
-        for message in messages:
-            role = message['role']
-            content = get_text(message['content'])
-            if role == 'assistant' and ('<|python_tag|>' in content or '</function>' in content):
-                ret += f'{box_map[role]}{content}<|eom_id|>'
-            elif role == 'system' and tools is not None:
-                ret += f'{box_map[role]}{self.tool}{tool_prompt}{self.eotool}{content}{eox_map[role]}'
-            else:
-                ret += f'{box_map[role]}{content}{eox_map[role]}'
-        if sequence_start and not isinstance(messages, str):
-            ret = '<|begin_of_text|>' + ret
-        if len(messages) and messages[-1]['role'] == 'assistant':
-            return ret[:-len(eox_map['assistant'])]  # prefix of response
-        ret += f'{self.assistant}'
-        return ret
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        if 'llama-3.1-' in model_path.lower() or 'llama3.1-' in model_path.lower():
-            return 'llama3_1'
-        if 'llama-3.2-' in model_path.lower() or 'llama3.2-' in model_path.lower():
-            return 'llama3_1'
-
-
-@MODELS.register_module(name='minicpmv-2d6')
-@MODELS.register_module(name='minicpm3')
-@MODELS.register_module(name='qwen')
-class Qwen7BChat(BaseChatTemplate):
-    """Chat template for Qwen-7B-Chat."""
-
-    def __init__(self,
-                 system='<|im_start|>system\n',
-                 meta_instruction='You are a helpful assistant.',
-                 eosys='<|im_end|>\n',
-                 user='<|im_start|>user\n',
-                 eoh='<|im_end|>\n',
-                 assistant='<|im_start|>assistant\n',
-                 eoa='<|im_end|>',
-                 separator='\n',
-                 stop_words=['<|im_end|>'],
-                 **kwargs):
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        model_path = model_path.lower()
-        if 'qwen' in model_path and not any(keyword in model_path for keyword in ('qwen2.5', 'qwq', 'qwen3')):
-            return 'qwen'
-        if 'minicpm-v-2_6' in model_path:
-            return 'minicpmv-2d6'
-        if 'minicpm3-' in model_path:
-            return 'minicpm3'
-
-
-@MODELS.register_module(name='qwen2d5')
-class Qwen2d5Chat(Qwen7BChat):
-    """Chat template for Qwen2.5-Instruct series."""
-
-    def __init__(
-            self,
-            system='<|im_start|>system\n',
-            meta_instruction='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.',
-            eosys='<|im_end|>\n',
-            user='<|im_start|>user\n',
-            eoh='<|im_end|>\n',
-            assistant='<|im_start|>assistant\n',
-            eoa='<|im_end|>',
-            separator='\n',
-            tools="""\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>""",  # noqa
-            eotools="""\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{"name": <function-name>, "arguments": <args-json-object>}\n</tool_call>""",  # noqa
-            stop_words=['<|im_end|>'],
-            **kwargs):
-
-        self.tools = tools
-        self.eotools = eotools
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    def messages2prompt(self, messages, sequence_start=True, tools=None, **kwargs):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        box_map = dict(user=self.user, assistant=self.assistant, system=self.system)
-        ret = ''
-        tool_prompt = ''
-        if tools is not None and len(tools) > 0:
-            for tool in tools:
-                tool_prompt += self.separator
-                tool_prompt += f'{{"type": "function", "function": {json.dumps(tool, ensure_ascii=False)}}}'
-            if len(messages) and messages[0]['role'] == 'system':
-                ret += f"{self.system}{messages[0]['content']}{self.tools}{tool_prompt}{self.eotools}{self.eosys}"
-            else:
-                ret += f'{self.system}{self.meta_instruction}{self.tools}{tool_prompt}{self.eotools}{self.eosys}'
-        else:
-            if self.meta_instruction is not None and sequence_start:
-                if len(messages) and messages[0]['role'] == 'system':
-                    ret += f"{self.system}{messages[0]['content']}{self.eosys}"
-                else:
-                    ret += f'{self.system}{self.meta_instruction}{self.eosys}'
-
-        for index, message in enumerate(messages):
-            if (message['role'] == 'user' or (message['role'] == 'system' and index != 0)
-                    or (message['role'] == 'assistant' and message.get('tool_calls') is None)):
-                ret += f"{box_map[message['role']]}{get_text(message['content'])}{self.eosys}"
-            elif message['role'] == 'assistant':
-                ret += '<|im_start|>assistant'
-                if message.get('content') is not None:
-                    ret += f"{self.separator}{get_text(message['content'])}"
-
-                if message.get('tool_calls') is not None:
-                    tool_calls = message['tool_calls']
-                    for tool_call in tool_calls:
-                        if tool_call.get('function') is not None:
-                            tool_call = tool_call['function']
-                        if isinstance(tool_call['arguments'], str):
-                            tool_call['arguments'] = json.loads(tool_call['arguments'])
-                        ret += f'{self.separator}<tool_call>{self.separator}{{"name": "{tool_call["name"]}", "arguments": {json.dumps(tool_call["arguments"], ensure_ascii=False)}}}{self.separator}</tool_call>'  # noqa
-                ret += self.eosys
-            if message['role'] == 'tool':
-                if index == 0 or messages[index - 1]['role'] != 'tool':
-                    ret += '<|im_start|>user'
-                ret += f"{self.separator}<tool_response>{self.separator}{message['content']}{self.separator}</tool_response>"  # noqa
-                if index == len(messages) - 1 or messages[index + 1]['role'] != 'tool':
-                    ret += f'{self.eoh}'
-        ret += f'{self.assistant}'
-        return ret
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        lower_path = model_path.lower()
-        if ('qwen2.5' in lower_path or 'qwen2_5' in lower_path) and 'vl' not in lower_path:
-            return 'qwen2d5'
-
-
-@MODELS.register_module(name='qwen2d5-vl')
-class Qwen2d5VL(Qwen2d5Chat):
-
-    def __init__(self, meta_instruction='You are a helpful assistant.', **kwargs):
-        super().__init__(meta_instruction=meta_instruction, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        lower_path = model_path.lower()
-        if ('qwen2.5' in lower_path or 'qwen2_5' in lower_path) and 'vl' in lower_path:
-            return 'qwen2d5-vl'
-
-
-@MODELS.register_module(name='qwq_preview')
-class QwQPreview(Qwen2d5Chat):
-
-    def __init__(
-            self,
-            meta_instruction='You are a helpful and harmless assistant. You are Qwen developed by Alibaba. You should think step-by-step.',  # noqa
-            **kwargs):
-        super().__init__(meta_instruction=meta_instruction, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        lower_path = model_path.lower()
-        if 'qwq' in lower_path and 'preview' in lower_path:
-            return 'qwq_preview'
-
-
-@MODELS.register_module(name='qwq')
-class QwQ(Qwen2d5Chat):
-
-    def __init__(self, meta_instruction='', **kwargs):
-        super().__init__(meta_instruction=meta_instruction, **kwargs)
-
-    def messages2prompt(self, messages, sequence_start=True, tools=None, **kwargs):
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        return super().messages2prompt(messages, sequence_start, tools, **kwargs) + '<think>\n'
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        lower_path = model_path.lower()
-        if 'qwq' in lower_path and 'preview' not in lower_path:
-            return 'qwq'
-
-
-@MODELS.register_module(name='qwen3')
-class Qwen3(Qwen2d5Chat):
-
-    def __init__(self, meta_instruction='', **kwargs):
-        super().__init__(meta_instruction=meta_instruction, **kwargs)
-
-    def messages2prompt(self, messages, sequence_start=True, tools=None, enable_thinking=None, **kwargs):
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        prompt = super().messages2prompt(messages, sequence_start, tools, **kwargs)
-
-        if enable_thinking is False:
-            prompt += '<think>\n\n</think>\n\n'
-
-        return prompt
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        lower_path = model_path.lower()
-        if 'qwen3' in lower_path:
-            return 'qwen3'
-
-
-@MODELS.register_module(name='codellama')
-class CodeLlama(Llama2):
-
-    def __init__(self, meta_instruction='', suffix_first=False, stop_words=None, **kwargs):
-        super().__init__(meta_instruction=meta_instruction, stop_words=stop_words, **kwargs)
-        caps = ['completion', 'infilling', 'chat', 'python']
-        assert self.capability in caps, \
-            f'{self.capability} is not supported. ' \
-            f'The supported capabilities are: {caps}'
-        self.meta_instruction = meta_instruction
-        self.suffix_first = suffix_first
-        self.stop_words = stop_words
-        if self.capability == 'infilling':
-            if self.stop_words is None:
-                self.stop_words = ['<EOT>']
-
-    def get_prompt(self, prompt, sequence_start=True):
-        if self.capability == 'infilling':
-            return self._infill_prompt(prompt)
-        elif self.capability == 'chat':
-            return super().get_prompt(prompt, sequence_start)
-        else:  # python speicalist
-            return prompt
-
-    def _infill_prompt(self, prompt):
-        prefix, suffix = prompt.split('<FILL>')
-        if self.suffix_first:
-            # format as "<PRE> <SUF>{suf} <MID> {pre}"
-            prompt = f'<PRE> <SUF>{suffix} <MID> {prefix}'
-        else:
-            # format as "<PRE> {pre} <SUF>{suf} <MID>"
-            prompt = f'<PRE> {prefix} <SUF>{suffix} <MID>'
-        return prompt
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        if 'codellama' in model_path.lower():
-            return 'codellama'
-
-
-@MODELS.register_module(name='chatglm')
-class ChatGLM2(BaseModel):
-
-    def __init__(self, user='问：', eoh='\n\n', assistant='答：', eoa='\n\n', **kwargs):
-        super().__init__(**kwargs)
-        self._user = user
-        self._assistant = assistant
-        self._eoh = eoh
-        self._eoa = eoa
-        self.count = 0
-
-    def get_prompt(self, prompt, sequence_start=True):
-        """Get prompt."""
-        # need more check
-        # https://github.com/THUDM/ChatGLM2-6B/issues/48
-        # [64790, 64792] to be prepended
-        self.count += 1
-        ret = f'[Round {self.count}]\n\n'
-        ret += f'{self._user}{prompt}{self._eoh}'
-        ret += f'{self._assistant}'
-        return ret
-
-    def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        """Message to prompt."""
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        ret = ''
-        count = 0
-        for message in messages:
-            role = message['role']
-            content = get_text(message['content'])
-            if role == 'user':
-                count += 1
-                ret += f'[Round {count}]\n\n'
-                ret += f'{self._user}{content}{self._eoh}'
-                ret += f'{self._assistant}'
-            if role == 'assistant':
-                ret += f'{content}'
-        return ret
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'chatglm2' in path:
-            return 'chatglm'
-
-
-@MODELS.register_module(name='solar')
-class SOLAR(BaseChatTemplate):
-    """Chat template of SOLAR model.
-
-    `https://huggingface.co/upstage/SOLAR-0-70b-16bit`
-    """
-
-    def __init__(self,
-                 system='### System:\n',
-                 eosys='\n\n',
-                 user='### User:\n',
-                 eoh='\n\n',
-                 assistant='### Assistant:\n',
-                 meta_instruction='',
-                 **kwargs):
-        super().__init__(**kwargs)
-        self.system = system
-        self.eosys = eosys
-        self.user = user
-        self.eoh = eoh
-        self.assistant = assistant
-        self.meta_instruction = meta_instruction
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        if 'solar' in model_path.lower():
-            return 'solar'
-
-
-@MODELS.register_module(name=['ultracm', 'ultralm'])
-class UltraChat(BaseChatTemplate):
-    """Template of UltraCM and UltraLM models.
-
-    `https://huggingface.co/openbmb/UltraCM-13b` `https://huggingface.co/openbmb/UltraLM-13b`
-    """
-
-    def __init__(
-            self,
-            system='User: ',
-            meta_instruction="""A one-turn chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, very detailed, and polite answers to the user's questions.""",  # noqa: E501
-            eosys='</s>\n',
-            user='User: ',
-            eoh='</s>\n',
-            assistant='Assistant: ',
-            eoa='</s>',
-            separator='\n',
-            stop_words=['</s>'],
-            **kwargs):
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        if 'ultracm' in model_path.lower():
-            return 'ultracm'
-        if 'ultralm' in model_path.lower():
-            return 'ultralm'
-
-
-@MODELS.register_module(name=['yi'])
-class Yi(BaseChatTemplate):
-    """Chat template of Yi model."""
-
-    def __init__(self,
-                 system='<|im_start|>system\n',
-                 meta_instruction=None,
-                 eosys='<|im_end|>\n',
-                 user='<|im_start|>user\n',
-                 eoh='<|im_end|>\n',
-                 assistant='<|im_start|>assistant\n',
-                 eoa='<|im_end|>',
-                 separator='\n',
-                 stop_words=['<|im_end|>', '<|endoftext|>'],
-                 **kwargs):
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'yi' in path and 'vl' not in path:
-            return 'yi'
-
-
-@MODELS.register_module(name=['mistral', 'mixtral'])
-class MistralChat(BaseChatTemplate):
-    """Template of Mistral and Mixtral Instruct models.
-
-    `https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1`
-    `https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1`
-    """
-
-    def __init__(self, user='[INST] ', eoh=' [/INST]', eoa='</s>', **kwargs):
-        super().__init__(user=user, eoh=eoh, eoa=eoa, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        model_path = model_path.lower()
-        if 'instruct' in model_path or 'llava' in model_path:
-            if 'mistral' in model_path:
-                return 'mistral'
-            if 'mixtral' in model_path:
-                return 'mixtral'
-
-
-@MODELS.register_module(name=['gemma'])
-class Gemma(BaseChatTemplate):
-    """Template of Gemma models.
-
-    `https://huggingface.co/google/gemma-7b-it`
-    """
-
-    def __init__(self,
-                 user='<start_of_turn>user\n',
-                 eoh='<end_of_turn>\n',
-                 assistant='<start_of_turn>model\n',
-                 eoa='<end_of_turn>\n',
-                 stop_words=['<end_of_turn>'],
-                 **kwargs):
-        super().__init__(user=user, eoh=eoh, assistant=assistant, eoa=eoa, stop_words=stop_words, **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        if 'gemma' in model_path.lower():
-            return 'gemma'
-
-
-@MODELS.register_module(name=['deepseek'])
-class Deepseek(BaseChatTemplate):
-
-    def __init__(self,
-                 eosys='\n\n',
-                 user='User: ',
-                 eoh='\n\n',
-                 assistant='Assistant: ',
-                 eoa='<｜end▁of▁sentence｜>',
-                 **kwargs):
-        super().__init__(eosys=eosys, user=user, eoh=eoh, assistant=assistant, eoa=eoa, **kwargs)
-
-    def get_prompt(self, prompt, sequence_start=True):
-        if self.capability == 'chat':
-            return super().get_prompt(prompt, sequence_start)[:-1]
-        return super().get_prompt(prompt, sequence_start)
-
-    def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        return super().messages2prompt(messages, sequence_start, **kwargs)[:-1]
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'deepseek' in path and 'chat' in path and 'vl' not in path:
-            return 'deepseek'
-
-
-@MODELS.register_module(name=['internvl-zh'])
-class InternVLZH(BaseChatTemplate):
-
-    def __init__(self, user='<human>: ', eoh=' ', assistant='<bot>: ', eoa='</s>', **kwargs):
-        super().__init__(user=user, eoh=eoh, assistant=assistant, eoa=eoa, **kwargs)
-
-    def get_prompt(self, prompt, sequence_start=True):
-        if self.capability == 'chat':
-            return super().get_prompt(prompt, sequence_start)[:-1]
-        return super().get_prompt(prompt, sequence_start)
-
-    def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        return super().messages2prompt(messages, sequence_start, **kwargs)[:-1]
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'internvl-chat' in path and 'v1-1' in path:
-            return 'internvl-zh'
-
-
-@MODELS.register_module(name=['deepseek-vl'])
-class DeepseekVL(BaseChatTemplate):
-
-    def __init__(
-            self,
-            meta_instruction="""You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.""",  # noqa: E501
-            eosys='\n\n',
-            user='User: ',
-            eoh='\n\n',
-            assistant='Assistant: ',
-            eoa='<｜end▁of▁sentence｜>',
-            **kwargs):
-        super().__init__(meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         **kwargs)
-
-    def get_prompt(self, prompt, sequence_start=True):
-        if self.capability == 'chat':
-            return super().get_prompt(prompt, sequence_start)[:-1]
-        return super().get_prompt(prompt, sequence_start)
-
-    def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        return super().messages2prompt(messages, sequence_start, **kwargs)[:-1]
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'deepseek-vl' in path and 'chat' in path:
-            return 'deepseek-vl'
-
-
-@MODELS.register_module(name=['deepseek-vl2'])
-class DeepseekVL2(BaseChatTemplate):
-
-    def __init__(self,
-                 meta_instruction='',
-                 eosys='',
-                 user='<|User|>: ',
-                 eoh='\n\n',
-                 assistant='<|Assistant|>: ',
-                 eoa='<｜end▁of▁sentence｜>',
-                 **kwargs):
-        super().__init__(meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         **kwargs)
-
-    def get_prompt(self, prompt, sequence_start=True):
-        return super().get_prompt(prompt, sequence_start)[:-1]
-
-    def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        if isinstance(messages, str):
-            return self.get_prompt(messages, sequence_start)
-        return super().messages2prompt(messages, sequence_start, **kwargs)[:-1]
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'deepseek-vl2' in path:
-            return 'deepseek-vl2'
-
-
-@MODELS.register_module(name='deepseek-coder')
-class DeepSeek(BaseChatTemplate):
-    """Chat template of deepseek model."""
-
-    def __init__(
-            self,
-            system='',
-            meta_instruction="""You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer\n""",  # noqa: E501
-            eosys='',
-            user='### Instruction:\n',
-            eoh='\n',
-            assistant='### Response:\n',
-            eoa='\n<|EOT|>',
-            separator='\n',
-            stop_words=['<|EOT|>'],
-            **kwargs):
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'deepseek-coder' in path:
-            return 'deepseek-coder'
-
-
-@MODELS.register_module(name=['yi-vl'])
-class YiVL(BaseChatTemplate):
-
-    def __init__(
-            self,
-            meta_instruction="""This is a chat between an inquisitive human and an AI assistant. Assume the role of the AI assistant. Read all the images carefully, and respond to the human's questions with informative, helpful, detailed and polite answers. 这是一个好奇的人类和一个人工智能助手之间的对话。假设你扮演这个AI助手的角色。仔细阅读所有的图像，并对人类的问题做出信息丰富、有帮助、详细的和礼貌的回答。\n\n""",  # noqa: E501
-            user='### Human: ',
-            eoh='\n',
-            assistant='### Assistant:',
-            eoa='\n',
-            stop_words=['###'],
-            **kwargs):
-        super().__init__(meta_instruction=meta_instruction,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'yi-vl' in path:
-            return 'yi-vl'
-
-
-@MODELS.register_module(name=['llava-chatml', 'internvl-zh-hermes2'])
-class ChatmlDirect(BaseChatTemplate):
-
-    def __init__(self,
-                 system='<|im_start|>system\n',
-                 meta_instruction='Answer the questions.',
-                 eosys='<|im_end|>',
-                 user='<|im_start|>user\n',
-                 eoh='<|im_end|>',
-                 assistant='<|im_start|>assistant\n',
-                 eoa='<|im_end|>',
-                 separator='',
-                 **kwargs):
-        super().__init__(system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'llava' in path and 'v1.6-34b' in path:
-            return 'llava-chatml'
-        if 'internvl-chat' in path and 'v1-2' in path:
-            return 'internvl-zh-hermes2'
-
-
-@MODELS.register_module(name='phi-4')
-@MODELS.register_module(name='phi-3')
-class Phi3Instruct(BaseChatTemplate):
-    """Chat template of InternLM model."""
-
-    def __init__(self,
-                 system='<|system|>\n',
-                 meta_instruction=None,
-                 eosys='<|end|>\n',
-                 user='<|user|>\n',
-                 eoh='<|end|>\n',
-                 assistant='<|assistant|>\n',
-                 eoa='<|end|>\n',
-                 separator='',
-                 stop_words=['<|end|>', '<|endoftext|>', '<|assistant|>'],
-                 **kwargs):
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
-
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if all([c in path for c in ['phi-3', 'instruct']]):
-            return 'phi-3'
-        if all([c in path for c in ['phi-4', 'instruct']]):
-            return 'phi-4'
-
-
-@MODELS.register_module(name='internvl2-phi3')
-class InternVL2Phi3(Phi3Instruct):
-
-    def __init__(self, meta_instruction='你是由上海人工智能实验室联合商汤科技开发的书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。', **kwargs):
-        super().__init__(meta_instruction=meta_instruction, **kwargs)
+    def _infill_prompt(self, prompt):
+        prefix, suffix = prompt.split('<FILL>')
+        if self.suffix_first:
+            # format as "<PRE> <SUF>{suf} <MID> {pre}"
+            prompt = f'<PRE> <SUF>{suffix} <MID> {prefix}'
+        else:
+            # format as "<PRE> {pre} <SUF>{suf} <MID>"
+            prompt = f'<PRE> {prefix} <SUF>{suffix} <MID>'
+        return prompt
 
     @classmethod
     def match(cls, model_path: str) -> Optional[str]:
@@ -1733,66 +575,49 @@ def match(cls, model_path: str) -> Optional[str]:
         Args:
             model_path (str): the model path used for matching.
         """
-        path = model_path.lower()
-        if 'internvl2-4b' in path:
-            return 'internvl2-phi3'
+        if 'codellama' in model_path.lower():
+            return 'codellama'
 
 
-@MODELS.register_module(name='chatglm3')
-class ChatGLM3(BaseChatTemplate):
-    """Chat template of chatglm3 model."""
+@MODELS.register_module(name='chatglm')
+class ChatGLM2(BaseModel):
 
-    def __init__(self,
-                 system='<|system|>\n ',
-                 meta_instruction=None,
-                 eosys='',
-                 user='<|user|>\n ',
-                 eoh='',
-                 assistant='<|assistant|>\n ',
-                 eoa='',
-                 separator='',
-                 stop_words=['<eos>'],
-                 **kwargs):
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
-        self.start = '[gMASK]sop'
+    def __init__(self, user='问：', eoh='\n\n', assistant='答：', eoa='\n\n', **kwargs):
+        super().__init__(**kwargs)
+        self._user = user
+        self._assistant = assistant
+        self._eoh = eoh
+        self._eoa = eoa
+        self.count = 0
 
     def get_prompt(self, prompt, sequence_start=True):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            prompt (str): user's input prompt
-            sequence_start (bool): indicator for the first round chat of a
-               session sequence
-        Returns:
-            str: the concatenated prompt
-        """
-        prompt = super().get_prompt(prompt, sequence_start)
-        if sequence_start:
-            prompt = self.start + prompt
-        return prompt
+        """Get prompt."""
+        # need more check
+        # https://github.com/THUDM/ChatGLM2-6B/issues/48
+        # [64790, 64792] to be prepended
+        self.count += 1
+        ret = f'[Round {self.count}]\n\n'
+        ret += f'{self._user}{prompt}{self._eoh}'
+        ret += f'{self._assistant}'
+        return ret
 
     def messages2prompt(self, messages, sequence_start=True, **kwargs):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
-
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
+        """Message to prompt."""
         if isinstance(messages, str):
             return self.get_prompt(messages, sequence_start)
-        return self.start + super().messages2prompt(messages, sequence_start, **kwargs)
+        ret = ''
+        count = 0
+        for message in messages:
+            role = message['role']
+            content = get_text(message['content'])
+            if role == 'user':
+                count += 1
+                ret += f'[Round {count}]\n\n'
+                ret += f'{self._user}{content}{self._eoh}'
+                ret += f'{self._assistant}'
+            if role == 'assistant':
+                ret += f'{content}'
+        return ret
 
     @classmethod
     def match(cls, model_path: str) -> Optional[str]:
@@ -1802,22 +627,20 @@ def match(cls, model_path: str) -> Optional[str]:
             model_path (str): the model path used for matching.
         """
         path = model_path.lower()
-        if 'chatglm3' in path:
-            return 'chatglm3'
+        if 'chatglm2' in path:
+            return 'chatglm'
 
 
-@MODELS.register_module(name='glm4')
-class Glm4Chat(ChatGLM3):
-    """Chat template of glm-4 model."""
+@MODELS.register_module(name=['mistral', 'mixtral'])
+class MistralChat(BaseChatTemplate):
+    """Template of Mistral and Mixtral Instruct models.
 
-    def __init__(self,
-                 system='<|system|>\n',
-                 user='<|user|>\n',
-                 assistant='<|assistant|>\n',
-                 stop_words=['<|user|>', '<|endoftext|>', '<|observation|>'],
-                 **kwargs):
-        super().__init__(system=system, user=user, assistant=assistant, stop_words=stop_words, **kwargs)
-        self.start = '[gMASK]<sop>'
+    `https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1`
+    `https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1`
+    """
+
+    def __init__(self, user='[INST] ', eoh=' [/INST]', eoa='</s>', **kwargs):
+        super().__init__(user=user, eoh=eoh, eoa=eoa, **kwargs)
 
     @classmethod
     def match(cls, model_path: str) -> Optional[str]:
@@ -1826,36 +649,29 @@ def match(cls, model_path: str) -> Optional[str]:
         Args:
             model_path (str): the model path used for matching.
         """
-        path = model_path.lower()
-        if 'glm-4' in path:
-            return 'glm4'
+        model_path = model_path.lower()
+        if 'instruct' in model_path or 'llava' in model_path:
+            if 'mistral' in model_path:
+                return 'mistral'
+            if 'mixtral' in model_path:
+                return 'mixtral'
 
 
-@MODELS.register_module(name='codegeex4')
-class CodeGeeX4Chat(BaseChatTemplate):
-    """Chat template of THUDM/codegeex4-all-9b model."""
+@MODELS.register_module(name=['internvl-zh'])
+class InternVLZH(BaseChatTemplate):
 
-    def __init__(self,
-                 system='<|system|>\n',
-                 meta_instruction='你是一位智能编程助手，你叫CodeGeeX。你会为用户回答关于编程、代码、计算机方面的任何问题，并提供格式规范、可以执行、准确安全的代码，并在必要时提供详细的解释。',
-                 eosys='',
-                 user='<|user|>\n',
-                 eoh='',
-                 assistant='<|assistant|>\n',
-                 eoa='',
-                 separator='',
-                 stop_words=['<|endoftext|>', '<|user|>', '<|observation|>'],
-                 **kwargs):
-        super().__init__(system=system,
-                         meta_instruction=meta_instruction,
-                         eosys=eosys,
-                         user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
+    def __init__(self, user='<human>: ', eoh=' ', assistant='<bot>: ', eoa='</s>', **kwargs):
+        super().__init__(user=user, eoh=eoh, assistant=assistant, eoa=eoa, **kwargs)
+
+    def get_prompt(self, prompt, sequence_start=True):
+        if self.capability == 'chat':
+            return super().get_prompt(prompt, sequence_start)[:-1]
+        return super().get_prompt(prompt, sequence_start)
+
+    def messages2prompt(self, messages, sequence_start=True, **kwargs):
+        if isinstance(messages, str):
+            return self.get_prompt(messages, sequence_start)
+        return super().messages2prompt(messages, sequence_start, **kwargs)[:-1]
 
     @classmethod
     def match(cls, model_path: str) -> Optional[str]:
@@ -1865,58 +681,39 @@ def match(cls, model_path: str) -> Optional[str]:
             model_path (str): the model path used for matching.
         """
         path = model_path.lower()
-        if 'codegeex4' in path:
-            return 'codegeex4'
+        if 'internvl-chat' in path and 'v1-1' in path:
+            return 'internvl-zh'
 
 
-@MODELS.register_module(name='internvl-phi3')
-class InternVLPhi3(Phi3Instruct):
-    """Chat template of InternVL Chat 4B model."""
+@MODELS.register_module(name=['deepseek-vl'])
+class DeepseekVL(BaseChatTemplate):
 
-    def __init__(self,
-                 meta_instruction='You are an AI assistant whose name is Phi-3.',
-                 eosys='<|end|>',
-                 eoh='<|end|>',
-                 eoa='<|end|>',
-                 separator='',
-                 **kwargs):
+    def __init__(
+            self,
+            meta_instruction="""You are a helpful language and vision assistant. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language.""",  # noqa: E501
+            eosys='\n\n',
+            user='User: ',
+            eoh='\n\n',
+            assistant='Assistant: ',
+            eoa='<｜end▁of▁sentence｜>',
+            **kwargs):
         super().__init__(meta_instruction=meta_instruction,
                          eosys=eosys,
+                         user=user,
                          eoh=eoh,
+                         assistant=assistant,
                          eoa=eoa,
-                         separator=separator,
                          **kwargs)
 
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if all([c in path for c in ['mini-internvl-chat', '4b', 'v1-5']]):
-            return 'internvl-phi3'
-
-
-@MODELS.register_module(name='molmo')
-class Molmo(BaseChatTemplate):
+    def get_prompt(self, prompt, sequence_start=True):
+        if self.capability == 'chat':
+            return super().get_prompt(prompt, sequence_start)[:-1]
+        return super().get_prompt(prompt, sequence_start)
 
-    def __init__(self,
-                 user=' User: ',
-                 eoh='',
-                 assistant=' Assistant:',
-                 eoa='',
-                 separator=' ',
-                 stop_words=['<|endoftext|>'],
-                 **kwargs):
-        super().__init__(user=user,
-                         eoh=eoh,
-                         assistant=assistant,
-                         eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
-                         **kwargs)
+    def messages2prompt(self, messages, sequence_start=True, **kwargs):
+        if isinstance(messages, str):
+            return self.get_prompt(messages, sequence_start)
+        return super().messages2prompt(messages, sequence_start, **kwargs)[:-1]
 
     @classmethod
     def match(cls, model_path: str) -> Optional[str]:
@@ -1926,142 +723,36 @@ def match(cls, model_path: str) -> Optional[str]:
             model_path (str): the model path used for matching.
         """
         path = model_path.lower()
-        if 'molmo' in path:
-            return 'molmo'
+        if 'deepseek-vl' in path and 'chat' in path:
+            return 'deepseek-vl'
 
 
-@MODELS.register_module(name='llama4')
-class Llama4(BaseChatTemplate):
+@MODELS.register_module(name=['deepseek-vl2'])
+class DeepseekVL2(BaseChatTemplate):
 
     def __init__(self,
-                 system='<|header_start|>system<|header_end|>\n\n',
-                 user='<|header_start|>user<|header_end|>\n\n',
-                 assistant='<|header_start|>assistant<|header_end|>\n\n',
-                 eosys='<|eot|>',
-                 eoh='<|eot|>',
-                 eoa='<|eot|>',
-                 separator='',
-                 stop_words=['<|end_of_text|>', '<|eom|>', '<|eot|>'],
+                 meta_instruction='',
+                 eosys='',
+                 user='<|User|>: ',
+                 eoh='\n\n',
+                 assistant='<|Assistant|>: ',
+                 eoa='<｜end▁of▁sentence｜>',
                  **kwargs):
-        super().__init__(system=system,
+        super().__init__(meta_instruction=meta_instruction,
                          eosys=eosys,
                          user=user,
                          eoh=eoh,
                          assistant=assistant,
                          eoa=eoa,
-                         separator=separator,
-                         stop_words=stop_words,
                          **kwargs)
 
-    @classmethod
-    def match(cls, model_path: str) -> Optional[str]:
-        """Return the model_name that was registered to MODELS.
-
-        Args:
-            model_path (str): the model path used for matching.
-        """
-        path = model_path.lower()
-        if 'llama-4' in path:
-            return 'llama4'
-
-
-@MODELS.register_module(name='intern-s1')
-@MODELS.register_module(name='interns1')
-class InternS1(InternVL2_5):
-
-    def __init__(
-            self,
-            tool='\n\nYour response should consist of a reasoning step (**thought**) followed immediately by a function call in valid JSON format. Wrap each function call using the `<|action_start|><|plugin|>` and `<|action_end|>` tags.\n\n**Format example:**\n\n```\n(Your thought goes here...)\n\n<|action_start|><|plugin|>\n{\n    "name": "tool_name",\n    "parameters": {\n        "parameter1": "value1",\n        "parameter2": "value2"\n    }\n}\n<|action_end|>\n```\n\n# External Tools\nYou have access to these tools:\n',  # noqa: E501
-            eotool='',
-            meta_instruction='You are an expert reasoner with extensive experience in all areas. You approach problems through systematic thinking and rigorous reasoning. Your response should reflect deep understanding and precise logical thinking, making your solution path and reasoning clear to others. Please put your thinking process within <think>...</think> tags.',  # noqa: E501
-            **kwargs):
-        super(InternVL2_5, self).__init__(meta_instruction=meta_instruction, **kwargs)
-
-        self.tool = tool or ''
-        self.eotool = eotool or ''
-
-    def messages2prompt(self, messages, sequence_start=True, tools=None, enable_thinking=None, **kwargs):
-        """Return the prompt that is concatenated with other elements in the
-        chat template.
+    def get_prompt(self, prompt, sequence_start=True):
+        return super().get_prompt(prompt, sequence_start)[:-1]
 
-        Args:
-            messages (str | List): user's input prompt
-        Returns:
-            str: the concatenated prompt
-        """
+    def messages2prompt(self, messages, sequence_start=True, **kwargs):
         if isinstance(messages, str):
             return self.get_prompt(messages, sequence_start)
-        box_map = dict(user=self.user,
-                       assistant=self.assistant,
-                       system=self.system,
-                       environment=self.environment,
-                       tool=self.environment)
-        eox_map = dict(user=self.eoh,
-                       assistant=self.eoa + self.separator,
-                       system=self.eosys,
-                       environment=self.eoenv,
-                       tool=self.eoenv)
-        name_map = dict(plugin=self.plugin, interpreter=self.interpreter)
-
-        ret = ''
-
-        if tools:
-            tools_prompt = dict(
-                role='system',
-                name='plugin',  # only support internlm2
-                content=f'{self.tool}{json.dumps(tools, ensure_ascii=False, indent=2)}{self.eotool}')
-
-            if messages[0]['role'] == 'system':
-                tools_prompt['content'] = messages[0]['content'] + tools_prompt['content']
-                messages[0] = tools_prompt
-            else:
-                if self.meta_instruction is not None and sequence_start and enable_thinking is not False:
-                    tools_prompt['content'] = self.meta_instruction + tools_prompt['content']
-                else:
-                    tools_prompt['content'] = tools_prompt['content'].lstrip('\n')
-                messages.insert(0, tools_prompt)
-        elif self.meta_instruction is not None and sequence_start:
-            if len(messages):
-                if messages[0]['role'] != 'system' and enable_thinking is not False:
-                    ret += f'{self.system}{self.meta_instruction}{eox_map["system"]}'
-        # find index of last user input section
-        last_user_idx = -1
-        for idx in range(len(messages) - 1, -1, -1):
-            if messages[idx]['role'] == 'user':
-                last_user_idx = idx
-                break
-
-        for idx, message in enumerate(messages):
-            role = message['role']
-            content = get_text(message['content'])
-            if last_user_idx != -1 and idx > last_user_idx and message.get('reasoning_content', None) is not None:
-                content = f'<think>\n{message["reasoning_content"]}\n</think>\n{content}'
-            if role == 'assistant' and message.get('tool_calls', None) is not None:
-                for tool_call in message['tool_calls']:
-                    function = tool_call.get('function', {})
-                    function['name'] = function.get('name', '')
-                    function['parameters'] = function.get('parameters', function.get('arguments', ''))
-                    function.pop('arguments')
-                    if isinstance(function['parameters'], str):
-                        function['parameters'] = json.loads(function['parameters'])
-                    content += f'<|action_start|><|plugin|>\n{json.dumps(function, ensure_ascii=False)}\n<|action_end|>'
-
-            if 'name' in message:
-                begin = box_map[role].strip()
-                if message['name'] in name_map:
-                    begin = begin + f" name={name_map[message['name']]}\n"
-                elif role == 'tool':
-                    begin = begin + f" name={name_map['plugin']}\n"
-            else:
-                begin = box_map[role]
-            ret += f'{begin}{content}{eox_map[role]}'
-        if len(messages) and messages[-1]['role'] == 'assistant':
-            return ret[:-len(eox_map['assistant'])]  # prefix of response
-        ret += f'{self.assistant}'
-
-        if enable_thinking is not False:
-            ret += '<think>'
-        return ret
+        return super().messages2prompt(messages, sequence_start, **kwargs)[:-1]
 
     @classmethod
     def match(cls, model_path: str) -> Optional[str]:
@@ -2071,29 +762,24 @@ def match(cls, model_path: str) -> Optional[str]:
             model_path (str): the model path used for matching.
         """
         path = model_path.lower()
-        if 'intern-s1' in path or 'interns1' in path:
-            return 'intern-s1'
+        if 'deepseek-vl2' in path:
+            return 'deepseek-vl2'
 
 
-@MODELS.register_module(name='gpt-oss')
-class GptOss(BaseChatTemplate):
+@MODELS.register_module(name=['llava-chatml'])
+class ChatmlDirect(BaseChatTemplate):
 
     def __init__(self,
-                 system='<|start|>system<|message|>',
-                 meta_instruction=(
-                     'You are ChatGPT, a large language model trained by OpenAI.\n'
-                     'Knowledge cutoff: 2024-06\nCurrent date: 2025-08-06\n\n'
-                     'Reasoning: medium\n\n'
-                     '# Valid channels: analysis, commentary, final. Channel must be included for every message.'),
-                 user='<|start|>user<|message|>',
-                 assistant='<|start|>assistant',
-                 eosys='<|end|>',
-                 eoh='<|end|>',
-                 eoa='<|end|>',
+                 system='<|im_start|>system\n',
+                 meta_instruction='Answer the questions.',
+                 eosys='<|im_end|>',
+                 user='<|im_start|>user\n',
+                 eoh='<|im_end|>',
+                 assistant='<|im_start|>assistant\n',
+                 eoa='<|im_end|>',
                  separator='',
-                 stop_words=['<|return|>'],
                  **kwargs):
-        super().__init__(system=system,
+        super().__init__(system,
                          meta_instruction=meta_instruction,
                          eosys=eosys,
                          user=user,
@@ -2101,7 +787,6 @@ def __init__(self,
                          assistant=assistant,
                          eoa=eoa,
                          separator=separator,
-                         stop_words=stop_words,
                          **kwargs)
 
     @classmethod
@@ -2112,8 +797,78 @@ def match(cls, model_path: str) -> Optional[str]:
             model_path (str): the model path used for matching.
         """
         path = model_path.lower()
-        if 'gpt-oss' in path:
-            return 'gpt-oss'
+        if 'llava' in path and 'v1.6-34b' in path:
+            return 'llava-chatml'
+
+
+@MODELS.register_module(name=['hf'])
+class HFChatTemplate(BaseChatTemplate):
+    """Chat template for HuggingFace models with `apply_chat_template`
+    method."""
+
+    def __init__(self, model_path: str = '', **kwargs):
+        try:
+            from transformers import AutoTokenizer
+            self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+            self.system_start, self.system_end = self._role_instruction('system')
+            self.user_start, self.user_end = self._role_instruction('user')
+            self.assistant_start, self.assistant_end = self._role_instruction('assistant')
+            self.stop_words = self.tokenizer.eos_token
+        except Exception as e:
+            raise ValueError(f'Try apply_chat_template failed: {e}')
+
+    def get_prompt(self, prompt, sequence_start=True, **kwargs):
+        messages = [{'role': 'user', 'content': prompt}]
+        return self.messages2prompt(messages, sequence_start, **kwargs)
+
+    def messages2prompt(self, messages, sequence_start=True, **kwargs):
+        if isinstance(messages, str):
+            messages = [{'role': 'user', 'content': messages}]
+        assert all(isinstance(m, dict) and 'role' in m and 'content' in m for m in messages), \
+            'Each message should be a dict with "role" and "content" keys.'
+
+        if 'enable_thinking' in kwargs and kwargs['enable_thinking'] is None:
+            # Workaround for internlm/Intern-S1: the chat template expects a <think> tag appended,
+            # but when enable_thinking=None is specified, the <think> tag is omitted.
+            kwargs.pop('enable_thinking')
+        add_generation_prompt = messages[-1]['role'] != 'assistant'
+        if sequence_start:
+            prompt = self.tokenizer.apply_chat_template(messages,
+                                                        tokenize=False,
+                                                        add_generation_prompt=add_generation_prompt,
+                                                        **kwargs)
+        else:
+            # Use a sentinel position to avoid the influence of default system role in the tokenizer's chat template
+            sentinel_messages = [{'role': 'system', 'content': 'This is a sentinel position'}]
+            sentinel_prompt = self.tokenizer.apply_chat_template(sentinel_messages,
+                                                                 tokenize=False,
+                                                                 add_generation_prompt=False)
+            prompt = self.tokenizer.apply_chat_template(sentinel_messages + messages,
+                                                        tokenize=False,
+                                                        add_generation_prompt=add_generation_prompt,
+                                                        **kwargs)
+            # remove the sentinel part
+            prompt = prompt[len(sentinel_prompt):]
+
+        if messages[-1]['role'] == 'assistant' and len(self.assistant_end) > 0:
+            prompt = prompt[:-len(self.assistant_end)]  # prefix of response to let the model complete the response
+        return prompt
+
+    def _role_instruction(self, role):
+        messages = [{'role': role, 'content': 'sentinel'}]
+        prompt = self.tokenizer.apply_chat_template(messages, tokenize=False)
+        role_pos = prompt.find('sentinel')
+        role_start = prompt[:role_pos]
+        role_end = prompt[role_pos + len('sentinel'):]
+        return role_start, role_end
+
+    @classmethod
+    def match(cls, model_path: str) -> Optional[str]:
+        try:
+            cls(model_path)
+        except Exception:
+            return False
+        return True
 
 
 def best_match_model(query: str) -> Optional[str]:
@@ -2123,8 +878,16 @@ def best_match_model(query: str) -> Optional[str]:
         query (str): the input query. Could be a model path.
 
     Return:
-        str: the possible model name.
+        str: the possible builtin chat template name.
     """
+
+    priorities = ['deepseek-vl2', 'codellama', 'hf']
+
+    for name in priorities:
+        chat_template = MODELS.module_dict[name]
+        if chat_template.match(query):
+            return name
+
     for name, model in MODELS.module_dict.items():
         matched_name = model.match(query)  # cache the result to avoid matching twice
         if matched_name:
diff --git a/tests/test_lmdeploy/test_model.py b/tests/test_lmdeploy/test_model.py
index e8c0cf665b..03323bdba6 100644
--- a/tests/test_lmdeploy/test_model.py
+++ b/tests/test_lmdeploy/test_model.py
@@ -2,64 +2,113 @@
 
 from lmdeploy.model import MODELS, best_match_model
 
+HF_MODELS_WITH_CHAT_TEMPLATES = [
+    'Qwen/Qwen1.5-7B-Chat',
+    'Qwen/Qwen2.5-7B-Instruct',
+    'Qwen/Qwen3-8B',
+    'Qwen/QwQ-32B',
+    'Qwen/QwQ-32B-Preview',
+    'Qwen/QwQ-32B-AWQ',
+    'Qwen/Qwen2.5-VL-7B-Instruct',
+    'Qwen/Qwen2-VL-7B-Instruct',
+    'internlm/internlm2-chat-7b',
+    'internlm/internlm2_5-7b-chat',
+    'internlm/internlm3-8b-instruct',
+    'internlm/Intern-S1',
+    'internlm/Intern-S1-mini',
+    'OpenGVLab/InternVL-Chat-V1-2',
+    'OpenGVLab/InternVL-Chat-V1-5',
+    'OpenGVLab/Mini-InternVL-Chat-2B-V1-5',
+    'OpenGVLab/InternVL2-2B',
+    'OpenGVLab/InternVL2-4B',
+    'OpenGVLab/InternVL2-8B',
+    'OpenGVLab/InternVL2_5-2B',
+    'OpenGVLab/InternVL2_5-4B',
+    'OpenGVLab/InternVL2_5-8B',
+    'OpenGVLab/InternVL3-2B',
+    'OpenGVLab/InternVL3-8B',
+    'OpenGVLab/InternVL3-9B',
+    'OpenGVLab/InternVL3_5-1B',
+    'OpenGVLab/InternVL3_5-4B',
+    'OpenGVLab/InternVL3_5-8B',
+    'OpenGVLab/InternVL3_5-GPT-OSS-20B-A4B-Preview',
+    'AI4Chem/ChemVLM-8B',
+    'deepseek-ai/DeepSeek-V2-Lite',
+    'deepseek-ai/DeepSeek-V3',
+    'deepseek-ai/DeepSeek-R1',
+    'deepseek-ai/DeepSeek-R1-Zero',
+    'deepseek-ai/DeepSeek-V3.1',
+    'deepseek-ai/deepseek-coder-1.3b-instruct',
+    'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
+    'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B',
+    'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B',
+    'zai-org/chatglm3-6b',
+    'zai-org/glm-4-9b-chat',
+    'zai-org/codegeex4-all-9b',
+    'zai-org/cogvlm2-llama3-chat-19B',
+    'microsoft/Phi-3-mini-128k-instruct',
+    'microsoft/Phi-3-vision-128k-instruct',
+    'microsoft/Phi-3.5-mini-instruct',
+    'microsoft/Phi-3.5-vision-instruct',
+    'microsoft/Phi-3.5-MoE-instruct',
+    '01-ai/Yi-1.5-34B-Chat',
+    # Accessing the following models is supposed to be authenticated
+    # 'openbmb/MiniCPM-V-2_6',
+    # 'google/gemma-3-4b-it',
+]
+
+
+@pytest.mark.parametrize('model_path', HF_MODELS_WITH_CHAT_TEMPLATES)
+def test_HFChatTemplate_get_prompt_sequence_start_True(model_path):
+    model = MODELS.get('hf')(model_path=model_path)
+    prompt = 'How to apply chat template using transformers?'
+    messages = [{'role': 'user', 'content': prompt}]
 
-@pytest.mark.parametrize('model_path_and_name', [
-    ('internlm/internlm-chat-7b', ['internlm']),
-    ('internlm/internlm2-1_8b', ['base']),
-    ('models--internlm--internlm-chat-7b/snapshots/1234567', ['internlm']),
-    ('Qwen/Qwen-7B-Chat', ['qwen']),
-    ('Qwen/Qwen2.5-7B-Instruct', ['qwen2d5']),
-    ('Qwen/Qwen2.5-VL-7B-Instruct', ['qwen2d5-vl']),
-    ('Qwen/Qwen3-32B', ['qwen3']),
-    ('Qwen/Qwen3-235B-A22B', ['qwen3']),
-    ('codellama/CodeLlama-7b-hf', ['codellama']),
-    ('upstage/SOLAR-0-70b', ['solar', 'solar-70b']),
-    ('meta-llama/Llama-2-7b-chat-hf', ['llama2']),
-    ('THUDM/chatglm2-6b', ['chatglm']),
-    ('01-ai/Yi-6B-200k', ['yi', 'yi-200k']),
-    ('01-ai/Yi-34B-Chat', ['yi']),
-    ('01-ai/Yi-6B-Chat', ['yi', 'yi-chat']),
-    ('WizardLM/WizardLM-70B-V1.0', ['wizardlm']),
-    ('codellama/CodeLlama-34b-Instruct-hf', ['codellama']),
-    ('deepseek-ai/deepseek-coder-6.7b-instruct', ['deepseek-coder']),
-    ('deepseek-ai/deepseek-vl-7b-chat', ['deepseek-vl']),
-    ('deepseek-ai/deepseek-moe-16b-chat', ['deepseek']),
-    ('internlm/internlm-xcomposer2-4khd-7b', ['internlm-xcomposer2']),
-    ('internlm/internlm-xcomposer2d5-7b', ['internlm-xcomposer2d5']),
-    ('workspace', ['base']),
-    ('OpenGVLab/InternVL2_5-1B', ['internvl2_5']),
-    ('OpenGVLab/InternVL3-1B', ['internvl2_5']),
-])
-@pytest.mark.parametrize('suffix', ['', '-w4', '-4bit', '-16bit'])
-def test_best_match_model(model_path_and_name, suffix):
-    if model_path_and_name[0] == 'internlm/internlm2-1_8b' and suffix:
-        return  # internlm/internlm2-1_8b-suffix will got None
-    deduced_name = best_match_model(model_path_and_name[0] + suffix)
+    from transformers import AutoTokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    expected = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    assert model.get_prompt(prompt, sequence_start=True) == expected
+
+
+@pytest.mark.parametrize('model_path', HF_MODELS_WITH_CHAT_TEMPLATES)
+def test_HFChatTemplate_message2prompt_sequence_start_True(model_path):
+    model = MODELS.get('hf')(model_path=model_path)
+    prompt = 'How to apply chat template using transformers?'
+    messages = [{'role': 'user', 'content': prompt}]
+
+    from transformers import AutoTokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    expected = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    assert model.messages2prompt(prompt, sequence_start=True) == expected
+    assert model.messages2prompt(messages, sequence_start=True) == expected
+
+
+@pytest.mark.parametrize('model_path', HF_MODELS_WITH_CHAT_TEMPLATES)
+def test_best_match_model_hf(model_path):
+    assert best_match_model(model_path) == 'hf'
+
+
+@pytest.mark.parametrize(
+    'model_path_and_name',
+    [
+        ('internlm/internlm-chat-7b', ['internlm']),
+        ('internlm/internlm2-1_8b', ['base']),
+        ('Qwen/Qwen-7B-Chat', ['qwen']),
+        ('codellama/CodeLlama-7b-hf', ['codellama']),
+        # ('meta-llama/Llama-2-7b-chat-hf', ['llama2']),
+        ('THUDM/chatglm2-6b', ['chatglm']),
+        ('codellama/CodeLlama-34b-Instruct-hf', ['codellama']),
+        ('deepseek-ai/deepseek-vl-7b-chat', ['deepseek-vl']),
+    ])
+def test_best_match_model(model_path_and_name):
+    deduced_name = best_match_model(model_path_and_name[0])
     if deduced_name is not None:
         assert deduced_name in model_path_and_name[1], f'expect {model_path_and_name[1]}, but got {deduced_name}'
     else:
         assert deduced_name in model_path_and_name[1], f'expect {model_path_and_name[1]}, but got {deduced_name}'
 
 
-@pytest.mark.parametrize('model_name', ['llama2', 'base', 'yi', 'qwen-7b', 'vicuna'])
-@pytest.mark.parametrize('meta_instruction', ['[fake meta_instruction]'])
-def test_model_config(model_name, meta_instruction):
-    from lmdeploy.model import ChatTemplateConfig
-    chat_template = ChatTemplateConfig(model_name, meta_instruction=meta_instruction).chat_template
-    prompt = chat_template.get_prompt('')
-    if model_name == 'base':
-        assert prompt == ''
-    else:
-        assert meta_instruction in prompt
-
-
 def test_base_model():
-    model = MODELS.get('llama')()
-    assert model is not None
-    assert model.capability == 'chat'
-    assert model.get_prompt('test') == 'test'
-    assert model.stop_words is None
-
     model = MODELS.get('internlm')(capability='completion')
     assert model.capability == 'completion'
     assert model.get_prompt('hi') == 'hi'
@@ -85,7 +134,7 @@ def test_vicuna():
 
 
 def test_prefix_response():
-    model = MODELS.get('internlm2')()
+    model = MODELS.get('hf')(model_path='Qwen/Qwen3-8B')
     messages = [dict(role='assistant', content='prefix test')]
     prompt = model.messages2prompt(messages)
     assert prompt[-len('prefix test'):] == 'prefix test'
@@ -111,142 +160,6 @@ def test_internlm_chat():
         assert _prompt is None
 
 
-def test_internlm_tool_call():
-    messages = []
-    messages.append({
-        'role':
-        'system',
-        'name':
-        'plugin',
-        'content':
-        '[{"description": "Compute the sum of two numbers", "name": "add", "parameters": {"type": "object", "properties": {"a": {"type": "int", "description": "A number"}, "b": {"type": "int", "description": "A number"}}, "required": ["a", "b"]}}, {"description": "Calculate the product of two numbers", "name": "mul", "parameters": {"type": "object", "properties": {"a": {"type": "int", "description": "A number"}, "b": {"type": "int", "description": "A number"}}, "required": ["a", "b"]}}]'  # noqa
-    })
-    messages.append({'role': 'user', 'content': 'Compute (3+5)*2'})
-    messages.append({
-        'content':
-        '(3+5)*2 = 8*2 =',
-        'role':
-        'assistant',
-        'tool_calls': [{
-            'id': '1',
-            'function': {
-                'arguments': '{"a": 8, "b": 2}',
-                'name': 'mul'
-            },
-            'type': 'function'
-        }]
-    })
-    messages.append({'role': 'tool', 'content': '3+5=16', 'tool_call_id': '1'})
-    model = MODELS.get('internlm2')(capability='chat')
-    assert model.messages2prompt(
-        messages
-    ) == """<|im_start|>system name=<|plugin|>\n[{"description": "Compute the sum of two numbers", "name": "add", "parameters": {"type": "object", "properties": {"a": {"type": "int", "description": "A number"}, "b": {"type": "int", "description": "A number"}}, "required": ["a", "b"]}}, {"description": "Calculate the product of two numbers", "name": "mul", "parameters": {"type": "object", "properties": {"a": {"type": "int", "description": "A number"}, "b": {"type": "int", "description": "A number"}}, "required": ["a", "b"]}}]<|im_end|>\n<|im_start|>user\nCompute (3+5)*2<|im_end|>\n<|im_start|>assistant\n(3+5)*2 = 8*2 =<|action_start|><|plugin|>\n{"name": "mul", "parameters": {"a": 8, "b": 2}}<|action_end|><|im_end|>\n<|im_start|>environment\n3+5=16<|im_end|>\n<|im_start|>assistant\n"""  # noqa
-
-
-def test_messages2prompt4internlm2_chat():
-    model = MODELS.get('internlm2')()
-    # Test with a single message
-    messages = [
-        {
-            'role': 'system',
-            'name': 'interpreter',
-            'content': 'You have access to python environment.'
-        },
-        {
-            'role': 'user',
-            'content': 'use python drwa a line'
-        },
-        {
-            'role': 'assistant',
-            'content': '<|action_start|><|interpreter|>\ncode<|action_end|>\n'
-        },
-        {
-            'role': 'environment',
-            'name': 'interpreter',
-            'content': "[{'type': 'image', 'content': 'image url'}]"
-        },
-    ]
-    tools = [{
-        'type': 'function',
-        'function': {
-            'name': 'add',
-            'description': 'Compute the sum of two numbers',
-            'parameters': {
-                'type': 'object',
-                'properties': {
-                    'a': {
-                        'type': 'int',
-                        'description': 'A number',
-                    },
-                    'b': {
-                        'type': 'int',
-                        'description': 'A number',
-                    },
-                },
-                'required': ['a', 'b'],
-            },
-        }
-    }]
-    import json
-    expected_prompt = (model.system.strip() + ' name=<|interpreter|>\nYou have access to python environment.' +
-                       model.eosys + model.system.strip() +
-                       f' name={model.plugin}\n{json.dumps(tools, ensure_ascii=False)}' + model.eosys + model.user +
-                       'use python drwa a line' + model.eoh + model.assistant +
-                       '<|action_start|><|interpreter|>\ncode<|action_end|>\n' + model.eoa + model.separator +
-                       model.environment.strip() +
-                       " name=<|interpreter|>\n[{'type': 'image', 'content': 'image url'}]" + model.eoenv +
-                       model.assistant)
-    actual_prompt = model.messages2prompt(messages, tools=tools)
-    assert actual_prompt == expected_prompt
-
-    # Test with a message where 'name' is not in name_map
-    messages_invalid_name = [
-        {
-            'role': 'system',
-            'name': 'invalid_name',
-            'content': 'You have access to python environment.'
-        },
-        {
-            'role': 'user',
-            'content': 'use python draw a line'
-        },
-        {
-            'role': 'assistant',
-            'content': '\ncode\n'
-        },
-        {
-            'role': 'environment',
-            'name': 'invalid_name',
-            'content': "[{'type': 'image', 'content': 'image url'}]"
-        },
-    ]
-    expected_prompt_invalid_name = (model.system.strip() + '\nYou have access to python environment.' + model.eosys +
-                                    model.user + 'use python draw a line' + model.eoh + model.assistant + '\ncode\n' +
-                                    model.eoa + model.separator + model.environment.strip() +
-                                    "\n[{'type': 'image', 'content': 'image url'}]" + model.eoenv + model.assistant)
-    actual_prompt_invalid_name = model.messages2prompt(messages_invalid_name)
-    assert actual_prompt_invalid_name == expected_prompt_invalid_name
-
-
-def test_llama3_1():
-    model = MODELS.get('llama3_1')()
-    messages = [dict(role='user', content='Can you check the top 5 trending songs on spotify?')]
-    tools = [{
-        'name': 'spotify_trending_songs',
-        'description': 'Get top trending songs on Spotify',
-        'parameters': {
-            'n': {
-                'param_type': 'int',
-                'description': 'Number of trending songs to get',
-                'required': True
-            }
-        },
-    }]
-    actual_prompt = model.messages2prompt(messages, tools=tools)
-    expected_prompt = '<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nCutting Knowledge Date: December 2023\nToday Date: 26 Jul 2024\n\n# Tool Instructions\n- Always execute python code in messages that you share.\n- When looking for real time information use relevant functions if available else fallback to brave_search\n\n\n\nYou have access to the following functions:\n\nUse the function \'spotify_trending_songs\' to: Get top trending songs on Spotify\n{"name": "spotify_trending_songs", "description": "Get top trending songs on Spotify", "parameters": {"n": {"param_type": "int", "description": "Number of trending songs to get", "required": true}}}\n\n\nIf a you choose to call a function ONLY reply in the following format:\n<{start_tag}={function_name}>{parameters}{end_tag}\nwhere\n\nstart_tag => `<function`\nparameters => a JSON dict with the function argument name as key and function argument value as value.\nend_tag => `</function>`\n\nHere is an example,\n<function=example_function_name>{"example_name": "example_value"}</function>\n\nReminder:\n- Function calls MUST follow the specified format\n- Required parameters MUST be specified\n- Only call one function at a time\n- Put the entire function call reply on one line"\n- Always add your sources when using search results to answer the user query\n\nYou are a helpful assistant.<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nCan you check the top 5 trending songs on spotify?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'  # noqa
-    assert actual_prompt == expected_prompt
-
-
 def test_baichuan():
     prompt = 'hello, can u introduce yourself'
     model = MODELS.get('baichuan2')(capability='completion')
@@ -279,15 +192,6 @@ def test_llama2():
         assert _prompt is None
 
 
-def test_llama3():
-    conversation = [{'role': 'user', 'content': 'Are you ok?'}]
-
-    from lmdeploy.model import Llama3
-    t = Llama3(model_name='llama', capability='chat')
-    prompt = t.messages2prompt(conversation)
-    assert prompt == '<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\nAre you ok?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n'  # noqa
-
-
 def test_qwen():
     prompt = 'hello, can u introduce yourself'
     model = MODELS.get('qwen')(capability='completion')
@@ -306,310 +210,6 @@ def test_qwen():
         assert _prompt is None
 
 
-def test_qwen2d5():
-    prompt = 'hello, can u introduce yourself'
-    model = MODELS.get('qwen2d5')(capability='completion')
-    assert model.get_prompt(prompt, sequence_start=True) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-
-    model = MODELS.get('qwen2d5')(capability='chat')
-
-    # No tool call
-    messages = [dict(role='user', content='What\'s the temperature in San Francisco now?')]
-    no_tool_prompt = ('<|im_start|>system\nYou are Qwen, created by Alibaba '
-                      'Cloud. You are a helpful '
-                      "assistant.<|im_end|>\n<|im_start|>user\nWhat's the "
-                      'temperature in San Francisco '
-                      'now?<|im_end|>\n<|im_start|>assistant\n')
-    assert model.messages2prompt(messages) == no_tool_prompt
-    assert model.messages2prompt(messages, tools=[]) == no_tool_prompt
-
-    messages.append({'role': 'assistant', 'content': 'I don\'t know.'})
-    no_tool_prompt = ('<|im_start|>system\nYou are Qwen, created by Alibaba '
-                      'Cloud. You are a helpful '
-                      "assistant.<|im_end|>\n<|im_start|>user\nWhat's the "
-                      'temperature in San Francisco '
-                      "now?<|im_end|>\n<|im_start|>assistant\nI don't "
-                      'know.<|im_end|>\n<|im_start|>assistant\n')
-    assert model.messages2prompt(messages) == no_tool_prompt
-    # Single tool call
-    tools = [{
-        'name': 'get_current_temperature',
-        'description': 'Get current temperature at a location.',
-        'parameters': {
-            'type': 'object',
-            'properties': {
-                'location': {
-                    'type': 'string',
-                    'description': 'The location to get the temperature for,'
-                    ' in the format \'City, State, Country\'.'
-                },
-                'unit': {
-                    'type': 'string',
-                    'enum': ['celsius', 'fahrenheit'],
-                    'description': 'The unit to return the temperature in. Defaults to '
-                    '\'celsius\'.'
-                }
-            },
-            'required': ['location']
-        }
-    }]
-
-    messages = [dict(role='user', content='What\'s the temperature in San Francisco now?')]
-    tool_prompt = ('<|im_start|>system\nYou are Qwen, created by Alibaba '
-                   'Cloud. You are a helpful assistant.\n\n# Tools\n\nYou '
-                   'may call one or more functions to assist with the user '
-                   'query.\n\nYou are provided with function signatures '
-                   "within <tools></tools> XML tags:\n<tools>\n{\"type\": "
-                   "\"function\", \"function\": {\"name\": "
-                   "\"get_current_temperature\", \"description\": \"Get "
-                   "current temperature at a location.\", \"parameters\": {"
-                   "\"type\": \"object\", \"properties\": {\"location\": {"
-                   "\"type\": \"string\", \"description\": \"The location to "
-                   "get the temperature for, in the format 'City, State, "
-                   "Country'.\"}, \"unit\": {\"type\": \"string\", \"enum\": "
-                   "[\"celsius\", \"fahrenheit\"], \"description\": \"The "
-                   'unit to return the temperature in. Defaults to '
-                   "'celsius'.\"}}, \"required\": ["
-                   "\"location\"]}}}\n</tools>\n\nFor each function call, "
-                   'return a json object with function name and arguments '
-                   'within <tool_call></tool_call> XML tags:\n<tool_call>\n{'
-                   "\"name\": <function-name>, \"arguments\": "
-                   '<args-json-object>}\n</tool_call><|im_end|>\n<|im_start'
-                   "|>user\nWhat's the temperature in San Francisco "
-                   'now?<|im_end|>\n<|im_start|>assistant\n')
-    assert model.messages2prompt(messages, tools=tools) == tool_prompt
-    # tool call send back
-    messages.append(
-        dict(role='assistant',
-             content='',
-             tool_calls=[{
-                 'id': '0',
-                 'function': {
-                     'arguments': '{"location": "San Francisco, CA, USA", "unit": "celsius"}',
-                     'name': 'get_current_temperature'
-                 },
-                 'type': 'function'
-             }]))
-    messages.append(
-        dict(role='tool',
-             name='get_current_temperature',
-             content={
-                 'temperature': 26.1,
-                 'location': 'San Francisco, California, USA',
-                 'unit': 'celsius'
-             },
-             tool_call_id='0'))
-    tool_prompt = ('<|im_start|>system\nYou are Qwen, created by Alibaba '
-                   'Cloud. You are a helpful assistant.\n\n# Tools\n\nYou '
-                   'may call one or more functions to assist with the user '
-                   'query.\n\nYou are provided with function signatures '
-                   "within <tools></tools> XML tags:\n<tools>\n{\"type\": "
-                   "\"function\", \"function\": {\"name\": "
-                   "\"get_current_temperature\", \"description\": \"Get "
-                   "current temperature at a location.\", \"parameters\": {"
-                   "\"type\": \"object\", \"properties\": {\"location\": {"
-                   "\"type\": \"string\", \"description\": \"The location to "
-                   "get the temperature for, in the format 'City, State, "
-                   "Country'.\"}, \"unit\": {\"type\": \"string\", \"enum\": "
-                   "[\"celsius\", \"fahrenheit\"], \"description\": \"The "
-                   'unit to return the temperature in. Defaults to '
-                   "'celsius'.\"}}, \"required\": ["
-                   "\"location\"]}}}\n</tools>\n\nFor each function call, "
-                   'return a json object with function name and arguments '
-                   'within <tool_call></tool_call> XML tags:\n<tool_call>\n{'
-                   "\"name\": <function-name>, \"arguments\": "
-                   '<args-json-object>}\n</tool_call><|im_end|>\n<|im_start'
-                   "|>user\nWhat's the temperature in San Francisco "
-                   'now?<|im_end|>\n<|im_start|>assistant\n\n<tool_call>\n'
-                   '{"name": "get_current_temperature", "arguments": '
-                   '{"location": "San Francisco, CA, USA", "unit": '
-                   '"celsius"}}\n</tool_call><|im_end|>\n<|im_start|>'
-                   'user\n<tool_response>\n{'
-                   "'temperature': 26.1, 'location': 'San Francisco, "
-                   "California, USA', 'unit': "
-                   "'celsius'}\n</tool_response><|im_end|>\n<|im_start"
-                   '|>assistant\n')
-    assert model.messages2prompt(messages, tools=tools) == tool_prompt
-    # Multi tool calling
-    tools = [{
-        'name': 'get_current_temperature',
-        'description': 'Get current temperature at a location.',
-        'parameters': {
-            'type': 'object',
-            'properties': {
-                'location': {
-                    'type': 'string',
-                    'description': 'The location to get the temperature for, in the format '
-                    '\'City, State, Country\'.'
-                },
-                'unit': {
-                    'type': 'string',
-                    'enum': ['celsius', 'fahrenheit'],
-                    'description': 'The unit to return the temperature in.'
-                    ' Defaults to \'celsius\'.'
-                }
-            },
-            'required': ['location']
-        }
-    }, {
-        'name': 'get_temperature_date',
-        'description': 'Get temperature at a location and date.',
-        'parameters': {
-            'type': 'object',
-            'properties': {
-                'location': {
-                    'type': 'string',
-                    'description': 'The location to get the temperature for,'
-                    ' in the format \'City, State, Country\'.'
-                },
-                'date': {
-                    'type': 'string',
-                    'description': 'The date to get the temperature for,'
-                    ' in the format \'Year-Month-Day\'.'
-                },
-                'unit': {
-                    'type': 'string',
-                    'enum': ['celsius', 'fahrenheit'],
-                    'description': 'The unit to return the temperature in.'
-                    ' Defaults to \'celsius\'.'
-                }
-            },
-            'required': ['location', 'date']
-        }
-    }]
-    messages = [
-        dict(role='user',
-             content='Today is 2024-11-14, What\'s the temperature in'
-             ' San Francisco now? How about tomorrow?')
-    ]
-    tool_prompt = ('<|im_start|>system\nYou are Qwen, created by Alibaba '
-                   'Cloud. You are a helpful assistant.\n\n# Tools\n\nYou '
-                   'may call one or more functions to assist with the user '
-                   'query.\n\nYou are provided with function signatures '
-                   "within <tools></tools> XML tags:\n<tools>\n{\"type\": "
-                   "\"function\", \"function\": {\"name\": "
-                   "\"get_current_temperature\", \"description\": \"Get "
-                   "current temperature at a location.\", \"parameters\": {"
-                   "\"type\": \"object\", \"properties\": {\"location\": {"
-                   "\"type\": \"string\", \"description\": \"The location to "
-                   "get the temperature for, in the format 'City, State, "
-                   "Country'.\"}, \"unit\": {\"type\": \"string\", \"enum\": "
-                   "[\"celsius\", \"fahrenheit\"], \"description\": \"The "
-                   'unit to return the temperature in. Defaults to '
-                   "'celsius'.\"}}, \"required\": [\"location\"]}}}\n{"
-                   "\"type\": \"function\", \"function\": {\"name\": "
-                   "\"get_temperature_date\", \"description\": \"Get "
-                   "temperature at a location and date.\", \"parameters\": {"
-                   "\"type\": \"object\", \"properties\": {\"location\": {"
-                   "\"type\": \"string\", \"description\": \"The location to "
-                   "get the temperature for, in the format 'City, State, "
-                   "Country'.\"}, \"date\": {\"type\": \"string\", "
-                   "\"description\": \"The date to get the temperature for, "
-                   "in the format 'Year-Month-Day'.\"}, \"unit\": {\"type\": "
-                   "\"string\", \"enum\": [\"celsius\", \"fahrenheit\"], "
-                   "\"description\": \"The unit to return the temperature "
-                   "in. Defaults to 'celsius'.\"}}, \"required\": ["
-                   "\"location\", \"date\"]}}}\n</tools>\n\nFor each "
-                   'function call, return a json object with function name '
-                   'and arguments within <tool_call></tool_call> XML '
-                   "tags:\n<tool_call>\n{\"name\": <function-name>, "
-                   "\"arguments\": "
-                   '<args-json-object>}\n</tool_call><|im_end|>\n<|im_start'
-                   "|>user\nToday is 2024-11-14, What's the temperature in "
-                   'San Francisco now? How about '
-                   'tomorrow?<|im_end|>\n<|im_start|>assistant\n')
-    assert model.messages2prompt(messages, tools=tools) == tool_prompt
-
-    messages.append(
-        dict(role='tool',
-             name='get_current_temperature',
-             content={
-                 'temperature': 26.1,
-                 'location': 'San Francisco, California, USA',
-                 'unit': 'celsius'
-             },
-             tool_call_id='0'))
-    messages.append(
-        dict(role='tool',
-             name='get_temperature_date',
-             content={
-                 'temperature': 25.9,
-                 'location': 'San Francisco, California, USA',
-                 'date': '2024-11-15',
-                 'unit': 'celsius'
-             },
-             tool_call_id='1'))
-    tool_prompt = ('<|im_start|>system\nYou are Qwen, created by Alibaba '
-                   'Cloud. You are a helpful assistant.\n\n# Tools\n\nYou '
-                   'may call one or more functions to assist with the user '
-                   'query.\n\nYou are provided with function signatures '
-                   "within <tools></tools> XML tags:\n<tools>\n{\"type\": "
-                   "\"function\", \"function\": {\"name\": "
-                   "\"get_current_temperature\", \"description\": \"Get "
-                   "current temperature at a location.\", \"parameters\": {"
-                   "\"type\": \"object\", \"properties\": {\"location\": {"
-                   "\"type\": \"string\", \"description\": \"The location to "
-                   "get the temperature for, in the format 'City, State, "
-                   "Country'.\"}, \"unit\": {\"type\": \"string\", \"enum\": "
-                   "[\"celsius\", \"fahrenheit\"], \"description\": \"The "
-                   'unit to return the temperature in. Defaults to '
-                   "'celsius'.\"}}, \"required\": [\"location\"]}}}\n{"
-                   "\"type\": \"function\", \"function\": {\"name\": "
-                   "\"get_temperature_date\", \"description\": \"Get "
-                   "temperature at a location and date.\", \"parameters\": {"
-                   "\"type\": \"object\", \"properties\": {\"location\": {"
-                   "\"type\": \"string\", \"description\": \"The location to "
-                   "get the temperature for, in the format 'City, State, "
-                   "Country'.\"}, \"date\": {\"type\": \"string\", "
-                   "\"description\": \"The date to get the temperature for, "
-                   "in the format 'Year-Month-Day'.\"}, \"unit\": {\"type\": "
-                   "\"string\", \"enum\": [\"celsius\", \"fahrenheit\"], "
-                   "\"description\": \"The unit to return the temperature "
-                   "in. Defaults to 'celsius'.\"}}, \"required\": ["
-                   "\"location\", \"date\"]}}}\n</tools>\n\nFor each "
-                   'function call, return a json object with function name '
-                   'and arguments within <tool_call></tool_call> XML '
-                   "tags:\n<tool_call>\n{\"name\": <function-name>, "
-                   "\"arguments\": "
-                   '<args-json-object>}\n</tool_call><|im_end|>\n<|im_start'
-                   "|>user\nToday is 2024-11-14, What's the temperature in "
-                   'San Francisco now? How about '
-                   'tomorrow?<|im_end|>\n<|im_start|>user\n<tool_response'
-                   ">\n{'temperature': 26.1, 'location': 'San Francisco, "
-                   "California, USA', 'unit': "
-                   "'celsius'}\n</tool_response>\n<tool_response>\n{"
-                   "'temperature': 25.9, 'location': 'San Francisco, "
-                   "California, USA', 'date': '2024-11-15', 'unit': "
-                   "'celsius'}\n</tool_response><|im_end|>\n<|im_start"
-                   '|>assistant\n')
-    assert model.messages2prompt(messages, tools=tools) == tool_prompt
-
-
-def test_qwen2d5_vl():
-    prompt = 'hello, can u introduce yourself'
-    model = MODELS.get('qwen2d5-vl')(capability='completion')
-    assert model.get_prompt(prompt, sequence_start=True) == prompt
-    assert model.get_prompt(prompt, sequence_start=False) == prompt
-
-    model = MODELS.get('qwen2d5-vl')(capability='chat')
-
-    messages = [dict(role='user', content='What\'s the temperature in San Francisco now?')]
-    res = ('<|im_start|>system\nYou are a helpful '
-           "assistant.<|im_end|>\n<|im_start|>user\nWhat's the "
-           'temperature in San Francisco '
-           'now?<|im_end|>\n<|im_start|>assistant\n')
-    assert model.messages2prompt(messages) == res
-
-    messages.append({'role': 'assistant', 'content': 'I don\'t know.'})
-    res = ('<|im_start|>system\nYou are a helpful '
-           "assistant.<|im_end|>\n<|im_start|>user\nWhat's the "
-           'temperature in San Francisco '
-           "now?<|im_end|>\n<|im_start|>assistant\nI don't "
-           'know.<|im_end|>\n<|im_start|>assistant\n')
-    assert model.messages2prompt(messages) == res
-
-
 def test_codellama_completion():
     model = MODELS.get('codellama')(capability='completion')
     prompt = """\
@@ -664,257 +264,6 @@ def test_codellama_others():
     assert model is None
 
 
-def test_deepseek():
-    model = MODELS.get('deepseek')()
-    messages = [{
-        'role': 'system',
-        'content': 'you are a helpful assistant'
-    }, {
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'hi'
-    }]
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained('deepseek-ai/DeepSeek-V2-Lite', trust_remote_code=True)
-    ref = tokenizer.apply_chat_template(messages, tokenize=False)
-    res = '<｜begin▁of▁sentence｜>' + model.messages2prompt(messages)
-    assert res.startswith(ref)
-
-
-def test_deepseek_coder():
-    model = MODELS.get('deepseek-coder')()
-    messages = [{
-        'role': 'system',
-        'content': 'you are a helpful assistant'
-    }, {
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'hi'
-    }]
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained('deepseek-ai/deepseek-coder-1.3b-instruct', trust_remote_code=True)
-    ref = tokenizer.apply_chat_template(messages, tokenize=False)
-    res = '<｜begin▁of▁sentence｜>' + model.messages2prompt(messages)
-    assert res.startswith(ref)
-
-
-def test_chatglm3():
-    model_path_and_name = 'THUDM/chatglm3-6b'
-    deduced_name = best_match_model(model_path_and_name)
-    assert deduced_name == 'chatglm3'
-    model = MODELS.get(deduced_name)()
-    messages = [{
-        'role': 'system',
-        'content': 'you are a helpful assistant'
-    }, {
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'AGI is?'
-    }]
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_path_and_name, trust_remote_code=True)
-    ref = tokenizer.apply_chat_template(messages, tokenize=False)
-    res = model.messages2prompt(messages)
-    assert res.startswith(ref)
-
-
-def test_glm4():
-    model_path_and_name = 'THUDM/glm-4-9b-chat'
-    deduced_name = best_match_model(model_path_and_name)
-    assert deduced_name == 'glm4'
-
-    model = MODELS.get(deduced_name)()
-    # check stop words
-    assert model.stop_words == ['<|user|>', '<|endoftext|>', '<|observation|>']
-    messages = [{
-        'role': 'system',
-        'content': 'you are a helpful assistant'
-    }, {
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'AGI is?'
-    }]
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_path_and_name, trust_remote_code=True)
-    ref = tokenizer.apply_chat_template(messages, tokenize=False)
-    res = model.messages2prompt(messages)
-    assert res.startswith(ref)
-
-
-def test_internvl_phi3():
-    assert best_match_model('OpenGVLab/InternVL-Chat-V1-5') == 'internvl-internlm2'
-    assert best_match_model('OpenGVLab/Mini-InternVL-Chat-2B-V1-5') == 'internvl-internlm2'
-
-    model_path_and_name = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
-    deduced_name = best_match_model(model_path_and_name)
-    assert deduced_name == 'internvl-phi3'
-
-    model = MODELS.get(deduced_name)()
-    messages = [{
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'hi'
-    }]
-    res = model.messages2prompt(messages)
-    from huggingface_hub import hf_hub_download
-    hf_hub_download(repo_id=model_path_and_name, filename='conversation.py', local_dir='.')
-
-    try:
-        import os
-
-        from conversation import get_conv_template
-        template = get_conv_template('phi3-chat')
-        template.append_message(template.roles[0], messages[0]['content'])
-        template.append_message(template.roles[1], messages[1]['content'])
-        ref = template.get_prompt()
-        assert res.startswith(ref)
-        if os.path.exists('conversation.py'):
-            os.remove('conversation.py')
-    except ImportError:
-        pass
-
-
-def test_internvl2():
-    model = MODELS.get('internvl2-internlm2')()
-    messages = [{'role': 'user', 'content': 'who are you'}, {'role': 'assistant', 'content': 'I am an AI'}]
-    expected = '<|im_start|>system\n你是由上海人工智能实验室联合商汤科技开发的'\
-        '书生多模态大模型，英文名叫InternVL, 是一个有用无害的人工智能助手。'\
-        '<|im_end|><|im_start|>user\nwho are you<|im_end|><|im_start|>'\
-        'assistant\nI am an AI'
-    res = model.messages2prompt(messages)
-    assert res == expected
-
-
-def test_chemvlm():
-    deduced_name = best_match_model('AI4Chem/ChemVLM-8B')
-
-    assert deduced_name == 'internvl-internlm2'
-    model = MODELS.get(deduced_name)()
-    messages = [{'role': 'user', 'content': 'who are you'}, {'role': 'assistant', 'content': 'I am an AI'}]
-    expected = '<|im_start|>system\nYou are an AI assistant whose name is '\
-        'InternLM (书生·浦语).<|im_end|>\n<|im_start|>user\nwho are you'\
-        '<|im_end|>\n<|im_start|>assistant\nI am an AI'
-    res = model.messages2prompt(messages)
-    assert res == expected
-
-
-def test_codegeex4():
-    model_path_and_name = 'THUDM/codegeex4-all-9b'
-    deduced_name = best_match_model(model_path_and_name)
-    assert deduced_name == 'codegeex4'
-    model = MODELS.get(deduced_name)()
-    messages = [{
-        'role': 'system',
-        'content': 'you are a helpful assistant'
-    }, {
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'AGI is?'
-    }]
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_path_and_name, trust_remote_code=True)
-    ref = tokenizer.apply_chat_template(messages, tokenize=False)
-    res = model.messages2prompt(messages)
-    assert res.startswith(ref)
-
-
-@pytest.mark.parametrize('model_path_and_name', [
-    'microsoft/Phi-3-mini-128k-instruct',
-    'microsoft/Phi-3-vision-128k-instruct',
-    'microsoft/Phi-3.5-mini-instruct',
-    'microsoft/Phi-3.5-vision-instruct',
-    'microsoft/Phi-3.5-MoE-instruct',
-])
-def test_phi3(model_path_and_name):
-    deduced_name = best_match_model(model_path_and_name)
-    assert deduced_name == 'phi-3'
-    model = MODELS.get(deduced_name)()
-    messages = [{
-        'role': 'system',
-        'content': 'you are a helpful assistant'
-    }, {
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'AGI is?'
-    }]
-    from transformers import AutoTokenizer
-    tokenizer = AutoTokenizer.from_pretrained(model_path_and_name, trust_remote_code=True)
-    ref = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    res = model.messages2prompt(messages)
-    assert res.startswith(ref)
-
-
-@pytest.mark.parametrize('model_path_or_name', [
-    'deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
-    'deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
-    'deepseek-ai/DeepSeek-R1',
-    'deepseek-ai/DeepSeek-R1-Zero',
-    'deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B',
-    'deepseek-ai/DeepSeek-R1-Distill-Qwen-7B',
-    'deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
-    'deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
-    'deepseek-ai/DeepSeek-V3',
-])
-def test_deepseek_r1(model_path_or_name):
-    from transformers import AutoTokenizer
-
-    tokenizer = AutoTokenizer.from_pretrained(model_path_or_name, trust_remote_code=True)
-    deduced_name = best_match_model(model_path_or_name)
-    chat_template = MODELS.get(deduced_name)()
-
-    messages = [{
-        'role': 'system',
-        'content': 'you are a helpful assistant'
-    }, {
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'AGI is?'
-    }]
-    ref = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    lm_res = chat_template.messages2prompt(messages)
-    assert ref == lm_res
-
-
 @pytest.mark.parametrize(
     'model_path_or_name',
     ['deepseek-ai/deepseek-vl2-tiny', 'deepseek-ai/deepseek-vl2-small', 'deepseek-ai/deepseek-vl2'])
@@ -944,36 +293,6 @@ def test_deepseek_vl2(model_path_or_name):
     assert ref == lm_res
 
 
-@pytest.mark.parametrize('model_path_or_name', [
-    'Qwen/QwQ-32B',
-    'Qwen/QwQ-32B-Preview',
-    'Qwen/QwQ-32B-AWQ',
-])
-def test_qwq(model_path_or_name):
-    from transformers import AutoTokenizer
-
-    tokenizer = AutoTokenizer.from_pretrained(model_path_or_name, trust_remote_code=True)
-    deduced_name = best_match_model(model_path_or_name)
-    chat_template = MODELS.get(deduced_name)()
-
-    messages = [{
-        'role': 'system',
-        'content': 'you are a helpful assistant'
-    }, {
-        'role': 'user',
-        'content': 'who are you'
-    }, {
-        'role': 'assistant',
-        'content': 'I am an AI'
-    }, {
-        'role': 'user',
-        'content': 'AGI is?'
-    }]
-    ref = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-    lm_res = chat_template.messages2prompt(messages)
-    assert ref == lm_res
-
-
 @pytest.mark.parametrize('model_path', ['Qwen/Qwen3-30B-A3B', 'Qwen/Qwen2.5-7B-Instruct'])
 @pytest.mark.parametrize('enable_thinking', [True, False, None])
 def test_qwen3(model_path, enable_thinking):
@@ -981,7 +300,8 @@ def test_qwen3(model_path, enable_thinking):
 
     tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
     chat_template_name = best_match_model(model_path)
-    chat_template = MODELS.get(chat_template_name)()
+    assert chat_template_name == 'hf'
+    chat_template = MODELS.get(chat_template_name)(model_path)
 
     messages = [{
         'role': 'system',
@@ -1018,7 +338,7 @@ def test_interns1(model_path, enable_thinking, has_user_sys):
         pytest.skip(reason=f'{model_path} not exists')
 
     chat_template_name = best_match_model(model_path)
-    chat_template = MODELS.get(chat_template_name)()
+    chat_template = MODELS.get(chat_template_name)(model_path)
 
     messages = [{
         'role': 'system',
@@ -1047,118 +367,45 @@ def test_interns1(model_path, enable_thinking, has_user_sys):
     assert ref == lm_res
 
 
-@pytest.mark.parametrize('model_path', ['internlm/Intern-S1'])
-@pytest.mark.parametrize('enable_thinking', [None, True, False])
-@pytest.mark.parametrize('has_user_sys', [True, False])
-def test_interns1_tools(model_path, enable_thinking, has_user_sys):
-    from transformers import AutoTokenizer
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-    except OSError:
-        pytest.skip(reason=f'{model_path} not exists')
+@pytest.mark.parametrize('model_path', ['Qwen/Qwen1.5-7B-Chat', 'Qwen/Qwen2.5-7B-Instruct', 'Qwen/Qwen3-8B'])
+def test_HFChatTemplate_get_prompt_sequence_start_False_Qwen(model_path):
+    model = MODELS.get('hf')(model_path=model_path)
+    assert model.stop_words == '<|im_end|>'
 
-    chat_template_name = best_match_model(model_path)
-    chat_template = MODELS.get(chat_template_name)()
-
-    tools = [
-        {
-            'type': 'function',
-            'function': {
-                'name': 'find_user_id_by_name_zip',
-                'description':
-                'Find user id by first name, last name, and zip code. If the user is not found, the function will return an error message. By default, find user id by email, and only call this function if the user is not found by email or cannot remember email.',  # noqa: E501
-                'parameters': {
-                    'type': 'object',
-                    'properties': {
-                        'first_name': {
-                            'type': 'string',
-                            'description': "The first name of the customer, such as 'John'."
-                        },
-                        'last_name': {
-                            'type': 'string',
-                            'description': "The last name of the customer, such as 'Doe'."
-                        },
-                        'zip': {
-                            'type': 'string',
-                            'description': "The zip code of the customer, such as '12345'."
-                        }
-                    },
-                    'required': ['first_name', 'last_name', 'zip']
-                }
-            }
-        },
-        {
-            'type': 'function',
-            'function': {
-                'name': 'get_order_details',
-                'description': 'Get the status and details of an order.',
-                'parameters': {
-                    'type': 'object',
-                    'properties': {
-                        'order_id': {
-                            'type':
-                            'string',
-                            'description':
-                            "The order id, such as '#W0000000'. Be careful there is a '#' symbol at the beginning of the order id."  # noqa: E501
-                        }
-                    },
-                    'required': ['order_id']
-                }
-            }
-        }
-    ]
-    messages = [
-        {
-            'role': 'system',
-            'content': 'You are a helpful assistant'
-        },
-        {
-            'role': 'user',
-            'content': "Hi there! I'm looking to return a couple of items from a recent order."
-        },
-        {
-            'role':
-            'assistant',
-            'content':
-            'Could you please provide your email address associated with the account, or share your first name, last name, and zip code?',  # noqa: E501
-            'reasoning_content':
-            'Okay, the user wants to return some items from a recent order. Let me start by authenticating their identity...'  # noqa: E501
-        },
-        {
-            'role': 'user',
-            'content': 'Sure, my name is Omar Anderson and my zip code is 19031.'
-        },
-        {
-            'role':
-            'assistant',
-            'content':
-            '<content>',
-            'reasoning_content':
-            "Since he didn't provide an email, I should use the find_user_id_by_name_zip function. Let me...",  # noqa: E501
-            'tool_calls': [{
-                'function': {
-                    'arguments': '{"first_name": "Omar", "last_name": "Anderson", "zip": "19031"}',
-                    'name': 'find_user_id_by_name_zip'
-                },
-                'id': 'chatcmpl-tool-a9f439084bfc4af29fee2e5105050a38',
-                'type': 'function'
-            }]
-        },
-        {
-            'content': 'omar_anderson_3203',
-            'name': 'find_user_id_by_name_zip',
-            'role': 'tool'
-        }
-    ]
-    if not has_user_sys:
-        messages = messages[1:]
-    if enable_thinking is None:
-        ref = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True, tools=tools)
-    else:
-        ref = tokenizer.apply_chat_template(messages,
-                                            tokenize=False,
-                                            add_generation_prompt=True,
-                                            tools=tools,
-                                            enable_thinking=enable_thinking)
-    lm_res = chat_template.messages2prompt(messages, enable_thinking=enable_thinking, tools=tools)
-    assert ref == lm_res
+    prompt = 'How to apply chat template using transformers?'
+    assert model.get_prompt(prompt,
+                            sequence_start=False) == f'<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n'
+
+
+@pytest.mark.parametrize('model_path', ['internlm/Intern-S1', 'internlm/Intern-S1-mini'])
+def test_InternS1_thinking(model_path):
+    pass
+
+
+@pytest.mark.parametrize('model_path', [''])
+def test_InternVL(model_path):
+    pass
+
+
+@pytest.mark.parametrize('model_path', [''])
+def test_HFChatTemplate_llama(model_path):
+    # TODO: add a huggingface token to github
+    pass
+
+
+@pytest.mark.parametrize('model_path', ['deepseek-ai/DeepSeek-V3'])
+def test_HFChatTemplate_DeepSeek_V3(model_path):
+    model = MODELS.get('hf')(model_path=model_path)
+    assert model.stop_words == '<｜end▁of▁sentence｜>'
+
+    prompt = 'How to apply chat template using transformers?'
+    assert model.get_prompt(prompt, sequence_start=False) == f'<｜User｜>{prompt}<｜Assistant｜>'
+
+
+@pytest.mark.parametrize('model_path', ['deepseek-ai/DeepSeek-R1'])
+def test_HFChatTemplate_DeepSeek_thinking(model_path):
+    model = MODELS.get('hf')(model_path=model_path)
+    assert model.stop_words == '<｜end▁of▁sentence｜>'
+
+    prompt = 'How to apply chat template using transformers?'
+    assert model.get_prompt(prompt, sequence_start=False) == f'<｜User｜>{prompt}<｜Assistant｜><think>\n'