diff --git a/examples/tool_chat_template_deepseekv31.jinja b/examples/tool_chat_template_deepseekv31.jinja
new file mode 100644
index 000000000000..863be69d60b6
--- /dev/null
+++ b/examples/tool_chat_template_deepseekv31.jinja
@@ -0,0 +1,91 @@
+{% if not add_generation_prompt is defined %}
+ {% set add_generation_prompt = false %}
+{% endif %}
+{% if not thinking is defined %}
+ {% set thinking = false %}
+{% endif %}
+{% set ns = namespace(is_first=false, is_tool=false, system_prompt='', is_first_sp=true, is_last_user=false) %}
+{%- for message in messages %}
+ {%- if message['role'] == 'system' %}
+ {%- if ns.is_first_sp %}
+ {% set ns.system_prompt = ns.system_prompt + message['content'] %}
+ {% set ns.is_first_sp = false %}
+ {%- else %}
+ {% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}
+ {%- endif %}
+ {%- endif %}
+{%- endfor %}
+
+{% if tools is defined and tools is not none %}
+ {% set tool_ns = namespace(text='## Tools\nYou have access to the following tools:\n') %}
+ {% for tool in tools %}
+ {% set tool_ns.text = tool_ns.text + '\n### ' + tool.function.name + '\nDescription: ' + tool.function.description + '\n\nParameters: ' + (tool.function.parameters | tojson) + '\n' %}
+ {% endfor %}
+ {% set tool_ns.text = tool_ns.text + "\nIMPORTANT: ALWAYS adhere to this exact format for tool use:\n<|tool▁calls▁begin|><|tool▁call▁begin|>tool_call_name<|tool▁sep|>tool_call_arguments<|tool▁call▁end|>{{additional_tool_calls}}<|tool▁calls▁end|>\n\nWhere:\n\n- `tool_call_name` must be an exact match to one of the available tools\n- `tool_call_arguments` must be valid JSON that strictly follows the tool's Parameters Schema\n- For multiple tool calls, chain them directly without separators or spaces\n" %}
+ {% set ns.system_prompt = ns.system_prompt + '\n\n' + tool_ns.text %}
+{% endif %}
+
+{{ bos_token }}{{ ns.system_prompt }}
+{%- for message in messages %}
+ {%- if message['role'] == 'user' %}
+ {%- set ns.is_tool = false -%}
+ {%- set ns.is_first = false -%}
+ {%- set ns.is_last_user = true -%}
+ {{'<|User|>' + message['content']}}
+ {%- endif %}
+ {%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}
+ {%- if ns.is_last_user %}
+ {{'<|Assistant|>'}}
+ {%- endif %}
+ {%- set ns.is_last_user = false -%}
+ {%- set ns.is_first = false %}
+ {%- set ns.is_tool = false -%}
+ {%- for tool in message['tool_calls'] %}
+ {%- if not ns.is_first %}
+ {%- if message['content'] is none %}
+ {{'<|tool▁calls▁begin|><|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
+ {%- else %}
+ {{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
+ {%- endif %}
+ {%- set ns.is_first = true -%}
+ {%- else %}
+ {{'<|tool▁call▁begin|>'+ tool['function']['name'] + '<|tool▁sep|>' + tool['function']['arguments']|tojson + '<|tool▁call▁end|>'}}
+ {%- endif %}
+ {%- endfor %}
+ {{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}
+ {%- endif %}
+ {%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none) %}
+ {%- if ns.is_last_user %}
+ {{'<|Assistant|>'}}
+ {%- if message['prefix'] is defined and message['prefix'] and thinking %}
+ {{''}}
+ {%- else %}
+ {{''}}
+ {%- endif %}
+ {%- endif %}
+ {%- set ns.is_last_user = false -%}
+ {%- if ns.is_tool %}
+ {{message['content'] + '<|end▁of▁sentence|>'}}
+ {%- set ns.is_tool = false -%}
+ {%- else %}
+ {%- set content = message['content'] -%}
+ {%- if '' in content %}
+ {%- set content = content.split('', 1)[1] -%}
+ {%- endif %}
+ {{content + '<|end▁of▁sentence|>'}}
+ {%- endif %}
+ {%- endif %}
+ {%- if message['role'] == 'tool' %}
+ {%- set ns.is_last_user = false -%}
+ {%- set ns.is_tool = true -%}
+ {{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}
+ {%- endif %}
+{%- endfor -%}
+{%- if add_generation_prompt and ns.is_last_user and not ns.is_tool %}
+ {{'<|Assistant|>'}}
+ {%- if not thinking %}
+ {{''}}
+ {%- else %}
+ {{''}}
+ {%- endif %}
+{% endif %}
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index f04902ae1c76..16edb9134d80 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -38,6 +38,8 @@
from vllm.multimodal.utils import MediaConnector
from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer
+from vllm.utils import random_uuid
+
logger = init_logger(__name__)
@@ -1005,3 +1007,11 @@ def apply_mistral_chat_template(
messages=messages,
**kwargs,
)
+
+
+def make_tool_call_id(id_type: str = "random", func_name=None, idx=None):
+ if id_type == "kimi_k2":
+ return f"functions.{func_name}:{idx}"
+ else:
+ # by default return random
+ return f"chatcmpl-tool-{random_uuid()}"
diff --git a/vllm/entrypoints/openai/tool_parsers/__init__.py b/vllm/entrypoints/openai/tool_parsers/__init__.py
index d1c3afa64b96..4e646ff7e3fc 100644
--- a/vllm/entrypoints/openai/tool_parsers/__init__.py
+++ b/vllm/entrypoints/openai/tool_parsers/__init__.py
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
from .abstract_tool_parser import ToolParser, ToolParserManager
+from .deepseekv31_tool_parser import DeepSeekV31ToolParser
from .granite_20b_fc_tool_parser import Granite20bFCToolParser
from .granite_tool_parser import GraniteToolParser
from .hermes_tool_parser import Hermes2ProToolParser
@@ -14,5 +15,5 @@
"ToolParser", "ToolParserManager", "Granite20bFCToolParser",
"GraniteToolParser", "Hermes2ProToolParser", "MistralToolParser",
"Internlm2ToolParser", "Llama3JsonToolParser", "JambaToolParser",
- "PythonicToolParser"
+ "PythonicToolParser","DeepSeekV31ToolParser",
]
diff --git a/vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py
new file mode 100644
index 000000000000..ff9188190f3f
--- /dev/null
+++ b/vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py
@@ -0,0 +1,367 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from collections.abc import Sequence
+from typing import Union
+
+import regex as re
+
+from vllm.entrypoints.chat_utils import make_tool_call_id
+from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
+ DeltaFunctionCall, DeltaMessage,
+ DeltaToolCall,
+ ExtractedToolCallInformation,
+ FunctionCall, ToolCall)
+from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
+ ToolParser, ToolParserManager)
+from vllm.logger import init_logger
+from vllm.transformers_utils.tokenizer import AnyTokenizer
+
+logger = init_logger(__name__)
+
+
+@ToolParserManager.register_module("deepseek_v31")
+class DeepSeekV31ToolParser(ToolParser):
+
+ def __init__(self, tokenizer: AnyTokenizer):
+ super().__init__(tokenizer)
+
+ self.current_tool_name_sent: bool = False
+ self.prev_tool_call_arr: list[dict] = []
+ self.current_tool_id: int = -1
+ self.streamed_args_for_tool: list[str] = (
+ []) # map what has been streamed for each tool so far to a list
+
+ self.tool_calls_start_token: str = "<|tool▁calls▁begin|>"
+ self.tool_calls_end_token: str = "<|tool▁calls▁end|>"
+
+ self.tool_call_start_token: str = "<|tool▁call▁begin|>"
+ self.tool_call_end_token: str = "<|tool▁call▁end|>"
+
+ self.tool_call_regex = re.compile(
+ r"<|tool▁call▁begin|>(?P.*)<|tool▁sep|>(?P.*)<|tool▁call▁end|>"
+ )
+
+ self.stream_tool_call_portion_regex = re.compile(
+ r"(?P.*)<|tool▁sep|>(?P.*)")
+
+ self.stream_tool_call_name_regex = re.compile(
+ r"(?P.*)<|tool▁sep|>")
+
+ if not self.model_tokenizer:
+ raise ValueError(
+ "The model tokenizer must be passed to the ToolParser "
+ "constructor during construction.")
+ self.tool_calls_start_token_id = self.vocab.get(
+ self.tool_calls_start_token)
+ self.tool_calls_end_token_id = self.vocab.get(
+ self.tool_calls_end_token)
+
+ self.tool_call_start_token_id = self.vocab.get(
+ self.tool_call_start_token)
+ self.tool_call_end_token_id = self.vocab.get(self.tool_call_end_token)
+
+ if (self.tool_calls_start_token_id is None
+ or self.tool_calls_end_token_id is None):
+ raise RuntimeError(
+ "DeepSeek-V3.1 Tool parser could not locate tool call "
+ "start/end tokens in the tokenizer!")
+
+ def extract_tool_calls(
+ self,
+ model_output: str,
+ request: ChatCompletionRequest,
+ ) -> ExtractedToolCallInformation:
+
+ # sanity check; avoid unnecessary processing
+ if self.tool_calls_start_token not in model_output:
+ return ExtractedToolCallInformation(tools_called=False,
+ tool_calls=[],
+ content=model_output)
+
+ else:
+ try:
+ # there are two possible captures - between tags, or between a
+ # tag and end-of-string so the result of
+ # findall is an array of tuples where one is a function call and
+ # the other is None
+ function_call_tuples = self.tool_call_regex.findall(
+ model_output)
+
+ tool_calls = []
+ for match in function_call_tuples:
+ function_name, function_args = match
+ tool_calls.append(
+ ToolCall(
+ type="function",
+ function=FunctionCall(name=function_name,
+ arguments=function_args),
+ ))
+
+ content = model_output[:model_output.
+ find(self.tool_calls_start_token)]
+ return ExtractedToolCallInformation(
+ tools_called=True,
+ tool_calls=tool_calls,
+ content=content if content else None,
+ )
+
+ except Exception:
+ logger.exception(
+ "Error in extracting tool call from response.")
+ return ExtractedToolCallInformation(tools_called=False,
+ tool_calls=[],
+ content=model_output)
+
+ def extract_tool_calls_streaming(
+ self,
+ previous_text: str,
+ current_text: str,
+ delta_text: str,
+ previous_token_ids: Sequence[int],
+ current_token_ids: Sequence[int],
+ delta_token_ids: Sequence[int],
+ request: ChatCompletionRequest,
+ ) -> Union[DeltaMessage, None]:
+
+ logger.debug("delta_text: %s", delta_text)
+ logger.debug("delta_token_ids: %s", delta_token_ids)
+ # check to see if we should be streaming a tool call - is there a
+ if self.tool_calls_start_token_id not in current_token_ids:
+ logger.debug("No tool call tokens found!")
+ return DeltaMessage(content=delta_text)
+ delta_text = delta_text.replace(self.tool_calls_start_token,
+ "").replace(self.tool_calls_end_token,
+ "")
+ try:
+
+ # figure out where we are in the parsing by counting tool call
+ # start & end tags
+ prev_tool_start_count = previous_token_ids.count(
+ self.tool_call_start_token_id)
+ prev_tool_end_count = previous_token_ids.count(
+ self.tool_call_end_token_id)
+ cur_tool_start_count = current_token_ids.count(
+ self.tool_call_start_token_id)
+ cur_tool_end_count = current_token_ids.count(
+ self.tool_call_end_token_id)
+ tool_call_portion = None
+ text_portion = None
+
+ # case: if we're generating text, OR rounding out a tool call
+ if (cur_tool_start_count == cur_tool_end_count
+ and prev_tool_end_count == cur_tool_end_count
+ and self.tool_call_end_token not in delta_text):
+ logger.debug("Generating text content! skipping tool parsing.")
+ return DeltaMessage(content=delta_text)
+
+ if self.tool_call_end_token in delta_text:
+ logger.debug("tool_call_end_token in delta_text")
+ full_text = current_text + delta_text
+ tool_call_portion = full_text.split(
+ self.tool_call_start_token)[-1].split(
+ self.tool_call_end_token)[0].rstrip()
+ delta_text = delta_text.split(
+ self.tool_call_end_token)[0].rstrip()
+ text_portion = delta_text.split(
+ self.tool_call_end_token)[-1].lstrip()
+
+ # case -- we're starting a new tool call
+ if (cur_tool_start_count > cur_tool_end_count
+ and cur_tool_start_count > prev_tool_start_count):
+ if len(delta_token_ids) > 1:
+ tool_call_portion = current_text.split(
+ self.tool_call_start_token)[-1]
+ else:
+ tool_call_portion = None
+ delta = None
+
+ text_portion = None
+
+ # set cursors and state appropriately
+ self.current_tool_id += 1
+ self.current_tool_name_sent = False
+ self.streamed_args_for_tool.append("")
+ logger.debug("Starting on a new tool %s", self.current_tool_id)
+
+ # case -- we're updating an existing tool call
+ elif (cur_tool_start_count > cur_tool_end_count
+ and cur_tool_start_count == prev_tool_start_count):
+
+ # get the portion of the text that's the tool call
+ tool_call_portion = current_text.split(
+ self.tool_call_start_token)[-1]
+ text_portion = None
+
+ # case -- the current tool call is being closed.
+ elif (cur_tool_start_count == cur_tool_end_count
+ and cur_tool_end_count >= prev_tool_end_count):
+ if self.prev_tool_call_arr is None or len(
+ self.prev_tool_call_arr) == 0:
+ logger.debug(
+ "attempting to close tool call, but no tool call")
+ return None
+ diff = self.prev_tool_call_arr[self.current_tool_id].get(
+ "arguments")
+ if diff:
+ diff = (diff.encode("utf-8").decode("unicode_escape")
+ if diff is str else diff)
+ if '"}' not in delta_text:
+ return None
+ end_loc = delta_text.rindex('"}')
+ diff = delta_text[:end_loc] + '"}'
+ logger.debug(
+ "Finishing tool and found diff that had not "
+ "been streamed yet: %s",
+ diff,
+ )
+ self.streamed_args_for_tool[self.current_tool_id] += diff
+ return DeltaMessage(tool_calls=[
+ DeltaToolCall(
+ index=self.current_tool_id,
+ function=DeltaFunctionCall(
+ arguments=diff).model_dump(exclude_none=True),
+ )
+ ])
+
+ # case -- otherwise we're just generating text
+ else:
+ text = delta_text.replace(self.tool_call_start_token, "")
+ text = text.replace(self.tool_call_end_token, "")
+ delta = DeltaMessage(tool_calls=[], content=text)
+ return delta
+
+ current_tool_call = dict()
+ if tool_call_portion:
+ current_tool_call_matches = (
+ self.stream_tool_call_portion_regex.match(
+ tool_call_portion))
+ if current_tool_call_matches:
+ tool_name, tool_args = current_tool_call_matches.groups()
+ current_tool_call["name"] = tool_name
+ current_tool_call["arguments"] = tool_args
+ else:
+ current_tool_call_name_matches = (
+ self.stream_tool_call_name_regex.match(
+ tool_call_portion))
+ if current_tool_call_name_matches:
+ tool_name = current_tool_call_name_matches.groups()
+ current_tool_call["name"] = tool_name
+ current_tool_call["arguments"] = ""
+ else:
+ logger.debug("Not enough token")
+ return None
+
+ # case - we haven't sent the tool name yet. If it's available, send
+ # it. otherwise, wait until it's available.
+ if not self.current_tool_name_sent:
+ if current_tool_call is None:
+ return None
+ function_name: Union[str, None] = current_tool_call.get("name")
+ if function_name:
+ self.current_tool_name_sent = True
+ return DeltaMessage(tool_calls=[
+ DeltaToolCall(
+ index=self.current_tool_id,
+ type="function",
+ id=make_tool_call_id(),
+ function=DeltaFunctionCall(
+ name=function_name).model_dump(
+ exclude_none=True),
+ )
+ ])
+ else:
+ return None
+
+ # case -- otherwise, send the tool call delta
+
+ # if the tool call portion is None, send the delta as text
+ if tool_call_portion is None:
+ # if there's text but not tool calls, send that -
+ # otherwise None to skip chunk
+ delta = (DeltaMessage(
+ content=delta_text) if text_portion is not None else None)
+ return delta
+
+ # now, the nitty-gritty of tool calls
+ # now we have the portion to parse as tool call.
+
+ logger.debug("Trying to parse current tool call with ID %s",
+ self.current_tool_id)
+
+ # if we're starting a new tool call, push an empty object in as
+ # a placeholder for the arguments
+ if len(self.prev_tool_call_arr) <= self.current_tool_id:
+ self.prev_tool_call_arr.append({})
+
+ # main logic for tool parsing here - compare prev. partially-parsed
+ # JSON to the current partially-parsed JSON
+ prev_arguments = self.prev_tool_call_arr[self.current_tool_id].get(
+ "arguments")
+ cur_arguments = current_tool_call.get("arguments")
+
+ logger.debug("diffing old arguments: %s", prev_arguments)
+ logger.debug("against new ones: %s", cur_arguments)
+
+ # case -- no arguments have been created yet. skip sending a delta.
+ if not cur_arguments and not prev_arguments:
+ logger.debug("Skipping text %s - no arguments", delta_text)
+ delta = None
+
+ # case -- prev arguments are defined, but non are now.
+ # probably impossible, but not a fatal error - just keep going
+ elif not cur_arguments and prev_arguments:
+ logger.error("should be impossible to have arguments reset "
+ "mid-call. skipping streaming anything.")
+ delta = None
+
+ # case -- we now have the first info about arguments available from
+ # autocompleting the JSON
+ elif cur_arguments and not prev_arguments:
+
+ delta = DeltaMessage(tool_calls=[
+ DeltaToolCall(
+ index=self.current_tool_id,
+ function=DeltaFunctionCall(
+ arguments=cur_arguments).model_dump(
+ exclude_none=True),
+ )
+ ])
+ self.streamed_args_for_tool[
+ self.current_tool_id] = cur_arguments
+
+ # last case -- we have an update to existing arguments.
+ elif cur_arguments and prev_arguments:
+ if (isinstance(delta_text, str)
+ and cur_arguments != prev_arguments
+ and len(cur_arguments) > len(prev_arguments)
+ and cur_arguments.startswith(prev_arguments)):
+ delta_arguments = cur_arguments[len(prev_arguments):]
+ logger.debug("got diff %s", delta_text)
+
+ delta = DeltaMessage(tool_calls=[
+ DeltaToolCall(
+ index=self.current_tool_id,
+ function=DeltaFunctionCall(
+ arguments=delta_arguments).model_dump(
+ exclude_none=True),
+ )
+ ])
+ self.streamed_args_for_tool[
+ self.current_tool_id] = cur_arguments
+ else:
+ delta = None
+
+ # handle saving the state for the current tool into
+ # the "prev" list for use in diffing for the next iteration
+ if self.current_tool_id == len(self.prev_tool_call_arr) - 1:
+ self.prev_tool_call_arr[
+ self.current_tool_id] = current_tool_call
+ else:
+ self.prev_tool_call_arr.append(current_tool_call)
+
+ return delta
+
+ except Exception:
+ logger.exception("Error trying to handle streaming tool call.")
+ return None # do not stream a delta. skip this token ID.