update from Add ToolParser and MoE Config for Hunyuan A13B vllm-project#20820

tianyuan211 · tianyuan211 · commit 9aac495503aa · 2025-08-06T17:23:18.000+08:00
diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py
@@ -585,6 +585,11 @@ def main(args: argparse.Namespace):
         topk = config.num_experts_per_tok
         intermediate_size = config.moe_intermediate_size
         shard_intermediate_size = 2 * intermediate_size // args.tp_size
+    elif config.architectures[0] in ("HunYuanMoEV1ForCausalLM"):
+        E = config.num_experts
+        topk = config.moe_topk[0]
+        intermediate_size = config.moe_intermediate_size[0]
+        shard_intermediate_size = 2 * intermediate_size // args.tp_size
     else:
         # Support for llama4
         config = config.get_text_config()
@@ -741,3 +746,4 @@ def _distribute(method: str, inputs: list[Any]) -> list[Any]:
     args = parser.parse_args()
 
     main(args)
+    
diff --git a/examples/tool_chat_template_hunyuan_a13b.jinja b/examples/tool_chat_template_hunyuan_a13b.jinja
@@ -0,0 +1,113 @@
+{% set loop_messages = messages %}
+{% if tools %}
+    {% set weekday_map = {'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日'} %}
+    {% set weekday_cn = weekday_map[strftime_now('%A')] %}
+    {% set datetime_str = strftime_now('%Y-%m-%d %H:%M:%S') %}
+    {% set datetime_str = datetime_str + ' ' + weekday_cn %}
+    {% for message in loop_messages %}
+        {% if 'content' in message %}
+            {% set content = message['content'] %}
+        {% else %}
+            {% set content = '' %}
+        {% endif %}
+        {% if loop.index0 == 0 %}
+            {% set content_tmp = '你是一位函数组合专家。你会得到一个问题和一组可能的函数。根据问题，你需要进行一个或多个函数/工具调用以实现目的。
+如果没有一个函数可以使用，请直接使用自然语言回复用户，以助手：开头。
+如果给定的问题缺少函数所需的参数，请使用自然语言进行提问，向用户询问必要信息，以助手：开头。
+如果调用结果已经足够回答用户问题，请对历史结果进行总结，使用自然语言回复用户，以助手：开头。
+你应该只在工具调用部分返回函数调用。如果你决定调用任何函数，你必须将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>。你不应该在回复中包含任何其他文本。以下是你可以调用的函数列表，格式为JSON。
+' %}
+            {% set content_tmp = content_tmp + '
+' + tools | tojson + '
+' %}
+            {% if message['role'] == 'system' %}
+                {% set content_tmp = content_tmp + '
+额外要求：
+' + content + '
+
+如果你决定返回函数调用，请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>，不得包含其他文本。如果额外要求里有格式要求，请忽略，以此处为准。
+否则，请参考开头说的三种情况，以助手：开头进行回复。
+
+如果额外要求里有时间信息，就以额外要求里的时间为准，否则，参考当前时间：' + datetime_str %}
+                {% set content = '<|startoftext|>' + content_tmp + '<|extra_4|>' %}
+            {% elif message['role'] == 'user' %}
+                {% set content_tmp = content_tmp + '
+如果你决定返回函数调用，请将其格式化为<tool_calls>[{"name": "func_name1", "arguments": {"argument1": "value1", "argument2": "value2"}},...]</tool_calls>，不得包含其他文本。
+否则，请参考开头说的三种情况，以助手：开头进行回复。
+
+当前时间：' + datetime_str %}
+                {% set content_tmp = '<|startoftext|>' + content_tmp + '<|extra_4|>'%}
+                {% set content = content_tmp + '用户：' + content + '<|extra_0|>' %}
+            {% endif %}
+        {% else %}
+            {% if message['role'] == 'user' %}
+                {% set content = '用户：' + content + '<|extra_0|>' %}
+            {% elif message['role'] == 'assistant' %}
+                {% if 'tool_calls' in message %}
+                    {% set tool_calls = message['tool_calls'] %}
+                    {% set ns = namespace(tool_calls="[") %}
+                    {% for tool_call in tool_calls %}
+                        {% set function = tool_call['function'] %}
+                        {% set name = function['name'] %}
+                        {% set ns.tool_calls = ns.tool_calls + '{"name": "' + name + '", '%}
+                        {% set arguments = function['arguments'] %}
+                        {% if arguments is not string %}
+                            {% set arguments = arguments | tojson %}
+                        {% endif %}
+                        {% set ns.tool_calls = ns.tool_calls + '"arguments": ' + arguments + '}' %}
+                        {% if not loop.last %}
+                            {% set ns.tool_calls = ns.tool_calls + ', '%}
+                        {% endif %}
+                    {% endfor %}
+                    {% set ns.tool_calls = ns.tool_calls + ']' %}
+                    {% set content = content + '<tool_calls>' + ns.tool_calls + '</tool_calls>' %}
+                {% else %}
+                    {% set content = '助手：' + content %}
+                {% endif %}
+                {% set content = content + '<|eos|>' %}
+            {% elif message['role'] == 'tool' %}
+                {% if content is not string %}
+                    {set content = content | tojson }
+                {% endif %}
+                {% set content = '<tool_response>' + content + '</tool_response>' %}
+                {% set content = content + '<|extra_0|>' %}
+            {% endif %}
+        {% endif %}
+    {{- content -}}
+    {% endfor %}
+{% else %}
+    {% set context = {'has_head': true} %}
+    {% for message in loop_messages %}
+        {% if 'content' in message %}
+            {% set content = message['content'] %}
+        {% else %}
+            {% set content = '' %}
+        {% endif %}
+        {% if loop.index0 == 0 %}
+            {% if content == '' %}
+                {% set _ = context.update({'has_head': false}) %}
+            {% elif message['role'] == 'system' %}
+                {% set content = '<|startoftext|>' + content + '<|extra_4|>' %}
+            {% endif %}
+        {% endif %}
+        {% if message['role'] == 'user' %}
+            {% if loop.index0 == 1 and not context.has_head %}
+                {% set content = '<|startoftext|>' + content %}
+            {% endif %}
+            {% if loop.index0 == 1 and context.has_head %}
+                {% set content = content + '<|extra_0|>' %}
+            {% else %}
+                {% set content = '<|startoftext|>' + content + '<|extra_0|>' %}
+            {% endif %}
+        {% elif message['role'] == 'assistant' %}
+            {% set content = content + '<|eos|>' %}
+        {% elif message['role'] == 'tool' %}
+            {% set content = content + '<|extra_0|>' %}
+        {% endif %}
+        {{- content -}}
+    {% endfor %}
+{% endif %}
+{%- if enable_thinking is defined and enable_thinking is false %}
+    {{- '<think>\n\n</think>\n' }}
+{%- endif %}
+
diff --git a/tests/entrypoints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_hunyuan_a13b_tool_parser.py
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+# ruff: noqa: E501
+
+import json
+from unittest.mock import MagicMock
+
+import pytest
+
+from tests.entrypoints.openai.tool_parsers.utils import (
+    run_tool_extraction, run_tool_extraction_streaming)
+from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
+from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
+
+
+def make_tool_call(name, arguments):
+    return ToolCall(type="function",
+                    function=FunctionCall(name=name,
+                                          arguments=json.dumps(arguments)))
+
+
+# TODO: add reason prefix and suffix.
+
+
+@pytest.mark.parametrize(
+    "model_output,expected_tool_calls,expected_content",
+    [
+        # No tool call
+        ("How can I help you today?", [], "How can I help you today?"),
+        # Single tool call, no content
+        (
+            "<tool_calls>[{\"name\": \"get_weather\", \"arguments\": {\"city\": \"San Francisco\", \"metric\": \"celsius\"}}]</tool_calls>",  #noqa: E501
+            [
+                make_tool_call("get_weather", {
+                    "city": "San Francisco",
+                    "metric": "celsius"
+                })
+            ],
+            None),
+        # Multiple tool calls
+        (
+            "<tool_calls>[{\"name\": \"get_weather\", \"arguments\": {\"city\": \"San Francisco\", \"metric\": \"celsius\"}}, {\"name\": \"register_user\", \"arguments\": {\"name\": \"John Doe\", \"age\": 37, \"address\": {\"city\": \"San Francisco\", \"state\": \"CA\"}, \"role\": null, \"passed_test\": true, \"aliases\": [\"John\", \"Johnny\"]}}]</tool_calls>",  #noqa: E501
+            [
+                make_tool_call("get_weather", {
+                    "city": "San Francisco",
+                    "metric": "celsius"
+                }),
+                make_tool_call(
+                    "register_user", {
+                        "name": "John Doe",
+                        "age": 37,
+                        "address": {
+                            "city": "San Francisco",
+                            "state": "CA"
+                        },
+                        "role": None,
+                        "passed_test": True,
+                        "aliases": ["John", "Johnny"]
+                    })
+            ],
+            None),
+        # Content before tool call
+        (
+            "I will call the tool now. <tool_calls>[{\"name\": \"get_weather\", \"arguments\": {\"city\": \"Boston\"}}]</tool_calls>",  #noqa: E501
+            [make_tool_call("get_weather", {"city": "Boston"})],
+            "I will call the tool now. "),
+        # Content after tool call (should be stripped)
+        (
+            "<tool_calls>[{\"name\": \"get_weather\", \"arguments\": {\"city\": \"Seattle\"}}]</tool_calls>\nThank you!",  #noqa: E501
+            [make_tool_call("get_weather", {"city": "Seattle"})],
+            None),
+        (
+            "<tool_calls>[{\"name\": \"complex_tool\", \"arguments\": {\"level1\": {\"level2\": {\"level3\": {\"value\": 123}}}}}]</tool_calls>",
+            [
+                make_tool_call(
+                    "complex_tool",
+                    {"level1": {
+                        "level2": {
+                            "level3": {
+                                "value": 123
+                            }
+                        }
+                    }})
+            ],
+            None,
+        ),
+    ])
+def test_hunyuan_a13b_tool_parser_extract(model_output, expected_tool_calls,
+                                          expected_content):
+    mock_tokenizer = MagicMock()
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser(
+        "hunyuan_a13b")(mock_tokenizer)
+    content, tool_calls = run_tool_extraction(tool_parser,
+                                              model_output,
+                                              streaming=False)
+
+    # align the random id.
+    for idx in range(len(tool_calls)):
+        tool_calls[idx].id = expected_tool_calls[idx].id
+    assert tool_calls == expected_tool_calls
+    assert content == expected_content
+
+
+# Streaming test: simulate incremental output
+@pytest.mark.parametrize("model_deltas,expected_tool_calls", [
+    ([
+        "<tool_calls>[{\"name\": \"get_weather\", ",
+        "\"arguments\": {\"city\": \"San Francisco\", ",
+        "\"metric\": \"celsius\"}}]", "</tool_calls>"
+    ], [
+        make_tool_call("get_weather", {
+            "city": "San Francisco",
+            "metric": "celsius"
+        })
+    ]),
+    ([
+        "<tool_calls>[{\"name\":", " \"get_weather\",", " \"arguments\":",
+        " {\"city\": \"Boston\"}", "}]", "</tool_calls>"
+    ], [make_tool_call("get_weather", {"city": "Boston"})]),
+    ([
+        "", "<tool_calls>[{\"name\":", " \"get_weather\",", " \"arguments\":",
+        " {\"city\": \"Boston\"}", "}]", "</tool_calls>", "\n</answer>"
+    ], [make_tool_call("get_weather", {"city": "Boston"})]),
+    pytest.param([
+        "<tool_calls>[{\"name\": \"complex_tool\",", " \"arguments\": ",
+        " {\"level1\": {\"level2\": ", "{\"level3\": {\"value\": 123}}}}}",
+        "]</tool_calls>"
+    ], [
+        make_tool_call("complex_tool",
+                       {"level1": {
+                           "level2": {
+                               "level3": {
+                                   "value": 123
+                               }
+                           }
+                       }})
+    ],
+                 marks=pytest.mark.xfail(
+                     reason="stream parsing not support nested json yet.")),
+])
+def test_hunyuan_a13b_tool_parser_streaming(model_deltas, expected_tool_calls):
+    mock_tokenizer = MagicMock()
+
+    tool_parser: ToolParser = ToolParserManager.get_tool_parser(
+        "hunyuan_a13b")(mock_tokenizer)
+    reconstructor = run_tool_extraction_streaming(
+        tool_parser, model_deltas, assert_one_tool_per_delta=False)
+
+    # align the random id.
+    for idx in range(len(reconstructor.tool_calls)):
+        reconstructor.tool_calls[idx].id = expected_tool_calls[idx].id
+
+    assert reconstructor.tool_calls == expected_tool_calls
diff --git a/vllm/entrypoints/openai/tool_parsers/__init__.py b/vllm/entrypoints/openai/tool_parsers/__init__.py
@@ -6,6 +6,7 @@
 from .granite_20b_fc_tool_parser import Granite20bFCToolParser
 from .granite_tool_parser import GraniteToolParser
 from .hermes_tool_parser import Hermes2ProToolParser
+from .hunyuan_a13b_tool_parser import HunyuanA13BToolParser
 from .internlm2_tool_parser import Internlm2ToolParser
 from .jamba_tool_parser import JambaToolParser
 from .llama4_pythonic_tool_parser import Llama4PythonicToolParser
@@ -19,5 +20,5 @@
     "GraniteToolParser", "Hermes2ProToolParser", "MistralToolParser",
     "Internlm2ToolParser", "Llama3JsonToolParser", "JambaToolParser",
     "Llama4PythonicToolParser", "PythonicToolParser", "Phi4MiniJsonToolParser",
-    "DeepSeekV3ToolParser"
+    "DeepSeekV3ToolParser", "HunyuanA13BToolParser"
 ]
diff --git a/vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py