Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ keywords = ["machine learning", "inference"]
requires-python = ">=3.8, <4"
dependencies = [
"pydantic",
"sentencepiece",
"tiktoken",
"torch>=1.10.0",
"transformers>=4.38.0",
"triton; platform_system == 'Linux' and platform_machine == 'x86_64'",
Expand Down
17 changes: 15 additions & 2 deletions python/xgrammar/tokenizer_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,15 @@
from enum import Enum
from typing import Any, Dict, List, Optional, Union

import sentencepiece
import tiktoken
try:
import sentencepiece
except ImportError:
sentencepiece = None
try:
import tiktoken
except ImportError:
tiktoken = None

from transformers import PreTrainedTokenizerBase, PreTrainedTokenizerFast

from .base import XGRObject, _core
Expand Down Expand Up @@ -95,6 +102,9 @@ def __init__(

@staticmethod
def _is_tiktoken_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool:
if tiktoken is None:
return False

# helper to check if tokenizer is a tiktoken tokenizer
has_tiktoken_encoding = hasattr(tokenizer, "tokenizer") and isinstance(
tokenizer.tokenizer, tiktoken.Encoding
Expand All @@ -110,6 +120,9 @@ def _is_tiktoken_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool:

@staticmethod
def _is_sentencepiece_tokenizer(tokenizer: PreTrainedTokenizerBase) -> bool:
if sentencepiece is None:
return False

# helper to check if tokenizer is a sentence piece tokenizer
has_sp_model_attr = hasattr(tokenizer, "sp_model") and isinstance(
tokenizer.sp_model, sentencepiece.SentencePieceProcessor
Expand Down
Loading