Skip to content

Commit 3591ff1

Browse files
committed
Sync codebase
1 parent 4560a88 commit 3591ff1

File tree

5 files changed

+41
-3
lines changed

5 files changed

+41
-3
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "tiktoken"
3-
version = "0.9.0"
3+
version = "0.10.0"
44
edition = "2021"
55
rust-version = "1.57.0"
66

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "tiktoken"
3-
version = "0.9.0"
3+
version = "0.10.0"
44
description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
55
readme = "README.md"
66
license = { file = "LICENSE" }

tiktoken/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55
from .registry import get_encoding as get_encoding
66
from .registry import list_encoding_names as list_encoding_names
77

8-
__version__ = "0.9.0"
8+
__version__ = "0.10.0"

tiktoken/model.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,16 @@
77
MODEL_PREFIX_TO_ENCODING: dict[str, str] = {
88
"o1-": "o200k_base",
99
"o3-": "o200k_base",
10+
"o4-mini-": "o200k_base",
1011
# chat
12+
"gpt-4.5-": "o200k_base",
13+
"gpt-4.1-": "o200k_base",
1114
"chatgpt-4o-": "o200k_base",
1215
"gpt-4o-": "o200k_base", # e.g., gpt-4o-2024-05-13
1316
"gpt-4-": "cl100k_base", # e.g., gpt-4-0314, etc., plus gpt-4-32k
1417
"gpt-3.5-turbo-": "cl100k_base", # e.g, gpt-3.5-turbo-0301, -0401, etc.
1518
"gpt-35-turbo-": "cl100k_base", # Azure deployment name
19+
"gpt-oss-": "o200k_harmony",
1620
# fine-tuned
1721
"ft:gpt-4o": "o200k_base",
1822
"ft:gpt-4": "cl100k_base",
@@ -25,7 +29,9 @@
2529
# reasoning
2630
"o1": "o200k_base",
2731
"o3": "o200k_base",
32+
"o4-mini": "o200k_base",
2833
# chat
34+
"gpt-4.1": "o200k_base",
2935
"gpt-4o": "o200k_base",
3036
"gpt-4": "cl100k_base",
3137
"gpt-3.5-turbo": "cl100k_base",

tiktoken_ext/openai_public.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,43 @@ def o200k_base():
120120
}
121121

122122

123+
def o200k_harmony():
124+
base_enc = o200k_base()
125+
name = "o200k_harmony"
126+
pat_str = base_enc["pat_str"]
127+
mergeable_ranks = base_enc["mergeable_ranks"]
128+
special_tokens = {
129+
**base_enc["special_tokens"],
130+
"<|startoftext|>": 199998,
131+
"<|endoftext|>": 199999,
132+
"<|reserved_200000|>": 200000,
133+
"<|reserved_200001|>": 200001,
134+
"<|return|>": 200002,
135+
"<|constrain|>": 200003,
136+
"<|reserved_200004|>": 200004,
137+
"<|channel|>": 200005,
138+
"<|start|>": 200006,
139+
"<|end|>": 200007,
140+
"<|message|>": 200008,
141+
"<|reserved_200009|>": 200009,
142+
"<|reserved_200010|>": 200010,
143+
"<|reserved_200011|>": 200011,
144+
"<|call|>": 200012,
145+
} | {f"<|reserved_{i}|>": i for i in range(200013, 201088)}
146+
return {
147+
"name": name,
148+
"pat_str": pat_str,
149+
"mergeable_ranks": mergeable_ranks,
150+
"special_tokens": special_tokens,
151+
}
152+
153+
123154
ENCODING_CONSTRUCTORS = {
124155
"gpt2": gpt2,
125156
"r50k_base": r50k_base,
126157
"p50k_base": p50k_base,
127158
"p50k_edit": p50k_edit,
128159
"cl100k_base": cl100k_base,
129160
"o200k_base": o200k_base,
161+
"o200k_harmony": o200k_harmony,
130162
}

0 commit comments

Comments
 (0)