Skip to content

Commit 34a9842

Browse files
[Misc] Refactor tokenizer interface (vllm-project#29693)
Signed-off-by: DarkLight1337 <[email protected]>
1 parent f223ed4 commit 34a9842

File tree

119 files changed

+758
-827
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

119 files changed

+758
-827
lines changed

.buildkite/test-amd.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -316,15 +316,15 @@ steps:
316316
source_file_dependencies:
317317
- vllm/
318318
- tests/engine
319-
- tests/tokenization
319+
- tests/tokenizers_
320320
- tests/test_sequence
321321
- tests/test_config
322322
- tests/test_logger
323323
- tests/test_vllm_port
324324
commands:
325325
- pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
326326
# OOM in the CI unless we run this separately
327-
- pytest -v -s tokenization
327+
- pytest -v -s tokenizers_
328328

329329
- label: V1 Test e2e + engine # 30min
330330
timeout_in_minutes: 45

.buildkite/test-pipeline.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -282,15 +282,15 @@ steps:
282282
source_file_dependencies:
283283
- vllm/
284284
- tests/engine
285-
- tests/tokenization
285+
- tests/tokenizers_
286286
- tests/test_sequence
287287
- tests/test_config
288288
- tests/test_logger
289289
- tests/test_vllm_port
290290
commands:
291291
- pytest -v -s engine test_sequence.py test_config.py test_logger.py test_vllm_port.py
292292
# OOM in the CI unless we run this separately
293-
- pytest -v -s tokenization
293+
- pytest -v -s tokenizers_
294294

295295
- label: V1 Test e2e + engine # 30min
296296
timeout_in_minutes: 45

benchmarks/backend_request_func.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ def get_tokenizer(
620620
kwargs["use_fast"] = False
621621
if tokenizer_mode == "mistral":
622622
try:
623-
from vllm.transformers_utils.tokenizer import MistralTokenizer
623+
from vllm.tokenizers import MistralTokenizer
624624
except ImportError as e:
625625
raise ImportError(
626626
"MistralTokenizer requires vllm package.\n"

docs/features/reasoning_outputs.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,13 @@ You can add a new `ReasoningParser` similar to [vllm/reasoning/deepseek_r1_reaso
216216
# import the required packages
217217

218218
from vllm.reasoning import ReasoningParser, ReasoningParserManager
219-
from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
220-
DeltaMessage)
219+
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
221220

222221
# define a reasoning parser and register it to vllm
223222
# the name list in register_module can be used
224223
# in --reasoning-parser.
225224
class ExampleParser(ReasoningParser):
226-
def __init__(self, tokenizer: AnyTokenizer):
225+
def __init__(self, tokenizer: TokenizerLike):
227226
super().__init__(tokenizer)
228227

229228
def extract_reasoning_streaming(

docs/features/tool_calling.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ Here is a summary of a plugin file:
422422
# in --tool-call-parser. you can define as many
423423
# tool parsers as you want here.
424424
class ExampleToolParser(ToolParser):
425-
def __init__(self, tokenizer: AnyTokenizer):
425+
def __init__(self, tokenizer: TokenizerLike):
426426
super().__init__(tokenizer)
427427

428428
# adjust request. e.g.: set skip special tokens

tests/entrypoints/openai/test_serving_engine.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from vllm.config import ModelConfig
1111
from vllm.entrypoints.openai.serving_engine import OpenAIServing
1212
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
13-
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
13+
from vllm.tokenizers import MistralTokenizer
1414

1515

1616
@pytest.fixture()

tests/entrypoints/openai/tool_parsers/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
import pytest
55
from transformers import AutoTokenizer
66

7-
from vllm.transformers_utils.tokenizer import AnyTokenizer
7+
from vllm.tokenizers import TokenizerLike
88

99

1010
@pytest.fixture(scope="function")
11-
def default_tokenizer() -> AnyTokenizer:
11+
def default_tokenizer() -> TokenizerLike:
1212
return AutoTokenizer.from_pretrained("gpt2")

tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
99
from vllm.entrypoints.openai.tool_parsers.hermes_tool_parser import Hermes2ProToolParser
10-
from vllm.transformers_utils.tokenizer import AnyTokenizer
10+
from vllm.tokenizers import TokenizerLike
1111

1212
from ....utils import RemoteOpenAIServer
1313

@@ -270,14 +270,14 @@ async def test_streaming_product_tool_call():
270270

271271

272272
@pytest.fixture
273-
def qwen_tokenizer() -> AnyTokenizer:
273+
def qwen_tokenizer() -> TokenizerLike:
274274
from vllm.transformers_utils.tokenizer import get_tokenizer
275275

276276
return get_tokenizer("Qwen/Qwen3-32B")
277277

278278

279279
@pytest.fixture
280-
def hermes_parser(qwen_tokenizer: AnyTokenizer) -> Hermes2ProToolParser:
280+
def hermes_parser(qwen_tokenizer: TokenizerLike) -> Hermes2ProToolParser:
281281
return Hermes2ProToolParser(qwen_tokenizer)
282282

283283

@@ -291,7 +291,7 @@ def any_chat_request() -> ChatCompletionRequest:
291291

292292

293293
def test_hermes_parser_streaming_just_forward_text(
294-
qwen_tokenizer: AnyTokenizer,
294+
qwen_tokenizer: TokenizerLike,
295295
hermes_parser: Hermes2ProToolParser,
296296
any_chat_request: ChatCompletionRequest,
297297
) -> None:
@@ -323,7 +323,7 @@ def test_hermes_parser_streaming_just_forward_text(
323323

324324

325325
def test_hermes_parser_streaming_failure_case_bug_19056(
326-
qwen_tokenizer: AnyTokenizer,
326+
qwen_tokenizer: TokenizerLike,
327327
hermes_parser: Hermes2ProToolParser,
328328
any_chat_request: ChatCompletionRequest,
329329
) -> None:
@@ -357,7 +357,7 @@ def test_hermes_parser_streaming_failure_case_bug_19056(
357357

358358

359359
def test_hermes_parser_streaming(
360-
qwen_tokenizer: AnyTokenizer,
360+
qwen_tokenizer: TokenizerLike,
361361
hermes_parser: Hermes2ProToolParser,
362362
any_chat_request: ChatCompletionRequest,
363363
) -> None:

tests/entrypoints/openai/tool_parsers/test_llama3_json_tool_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77

88
from vllm.entrypoints.openai.protocol import ExtractedToolCallInformation
99
from vllm.entrypoints.openai.tool_parsers.llama_tool_parser import Llama3JsonToolParser
10-
from vllm.transformers_utils.tokenizer import AnyTokenizer
10+
from vllm.tokenizers import TokenizerLike
1111

1212

1313
@pytest.fixture
14-
def parser(default_tokenizer: AnyTokenizer):
14+
def parser(default_tokenizer: TokenizerLike):
1515
return Llama3JsonToolParser(default_tokenizer)
1616

1717

tests/entrypoints/openai/tool_parsers/test_llama4_pythonic_tool_parser.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
)
1212
from vllm.entrypoints.openai.protocol import FunctionCall
1313
from vllm.entrypoints.openai.tool_parsers import ToolParser, ToolParserManager
14-
from vllm.transformers_utils.tokenizer import AnyTokenizer
14+
from vllm.tokenizers import TokenizerLike
1515

1616
# Test cases similar to pythonic parser but with Llama4 specific format
1717
SIMPLE_FUNCTION_OUTPUT = "[get_weather(city='LA', metric='C')]"
@@ -64,7 +64,7 @@
6464

6565

6666
@pytest.mark.parametrize("streaming", [True, False])
67-
def test_no_tool_call(streaming: bool, default_tokenizer: AnyTokenizer):
67+
def test_no_tool_call(streaming: bool, default_tokenizer: TokenizerLike):
6868
tool_parser: ToolParser = ToolParserManager.get_tool_parser("llama4_pythonic")(
6969
default_tokenizer
7070
)
@@ -208,7 +208,7 @@ def test_tool_call(
208208
streaming: bool,
209209
model_output: str,
210210
expected_tool_calls: list[FunctionCall],
211-
default_tokenizer: AnyTokenizer,
211+
default_tokenizer: TokenizerLike,
212212
):
213213
tool_parser: ToolParser = ToolParserManager.get_tool_parser("llama4_pythonic")(
214214
default_tokenizer
@@ -224,7 +224,7 @@ def test_tool_call(
224224
assert actual.function == expected
225225

226226

227-
def test_streaming_tool_call_with_large_steps(default_tokenizer: AnyTokenizer):
227+
def test_streaming_tool_call_with_large_steps(default_tokenizer: TokenizerLike):
228228
tool_parser: ToolParser = ToolParserManager.get_tool_parser("llama4_pythonic")(
229229
default_tokenizer
230230
)
@@ -246,7 +246,7 @@ def test_streaming_tool_call_with_large_steps(default_tokenizer: AnyTokenizer):
246246

247247

248248
@pytest.mark.parametrize("streaming", [False])
249-
def test_regex_timeout_handling(streaming: bool, default_tokenizer: AnyTokenizer):
249+
def test_regex_timeout_handling(streaming: bool, default_tokenizer: TokenizerLike):
250250
"""test regex timeout is handled gracefully"""
251251
tool_parser: ToolParser = ToolParserManager.get_tool_parser("llama4_pythonic")(
252252
default_tokenizer

0 commit comments

Comments
 (0)