Skip to content

Commit c031d57

Browse files
committed
feat(GPT Token Estimator): expose all models of tiktoken (and remove price thus)
Fix CorentinTh#1109, CorentinTh#1334
1 parent 2b06b5b commit c031d57

File tree

5 files changed

+207
-32
lines changed

5 files changed

+207
-32
lines changed

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@
153153
"fuse.js": "^7.1.0",
154154
"generate-schema": "^2.6.0",
155155
"get-timezone-offset": "^1.0.5",
156-
"gpt-tokens": "^1.3.14",
157156
"hash-wasm": "^4.12.0",
158157
"haversine": "^1.1.1",
159158
"heic-convert": "^2.1.0",
@@ -184,6 +183,7 @@
184183
"jq-wasm": "^1.0.0",
185184
"js-base64": "^3.7.7",
186185
"js-beautify": "^1.15.4",
186+
"js-tiktoken": "^1.0.21",
187187
"jsbarcode": "^3.12.1",
188188
"json-analyzer": "^1.2.2",
189189
"json-editor-vue": "^0.18.0",
@@ -236,6 +236,7 @@
236236
"netstack.js": "^2.1.2",
237237
"nginx-config-formatter": "^1.4.5",
238238
"node-forge": "^1.3.1",
239+
"openai-chat-tokens": "^0.2.8",
239240
"openpgp": "^6.2.0",
240241
"oui-data": "^1.1.391",
241242
"parse-duration": "^2.1.2",

pnpm-lock.yaml

Lines changed: 10 additions & 15 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/tools/gpt-token-estimator/gpt-token-estimator.vue

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
<script setup lang="ts">
22
import { useI18n } from 'vue-i18n';
3-
import { GPTTokens } from 'gpt-tokens';
43
import JSON5 from 'json5';
5-
import type { supportModelType } from 'gpt-tokens';
4+
import type { TiktokenModel } from 'js-tiktoken';
5+
import { GPTTokens } from './gpt-tokens.service';
66
import TextareaCopyable from '@/components/TextareaCopyable.vue';
77
import { useValidation } from '@/composable/validation';
88
import { useQueryParamOrStorage } from '@/composable/queryParams';
@@ -60,21 +60,19 @@ const outputTokenCosts = computed(() => {
6060
return {
6161
error: '',
6262
usedTokens: '0',
63-
usedUSD: '0',
6463
promptUsedTokens: '0',
6564
completionUsedTokens: '0',
6665
};
6766
}
6867
6968
const tokens = new GPTTokens({
70-
model: model.value as supportModelType,
69+
model: model.value as TiktokenModel,
7170
messages: messagesArray,
7271
tools: toolsArray,
7372
});
7473
return {
7574
error: '',
7675
usedTokens: tokens.usedTokens.toString(),
77-
usedUSD: tokens.usedUSD.toString(),
7876
promptUsedTokens: tokens.promptUsedTokens.toString(),
7977
completionUsedTokens: tokens.completionUsedTokens.toString(),
8078
};
@@ -83,7 +81,6 @@ const outputTokenCosts = computed(() => {
8381
return {
8482
error: e.toString(),
8583
usedTokens: '',
86-
usedUSD: '',
8784
promptUsedTokens: '',
8885
completionUsedTokens: '',
8986
};
@@ -156,17 +153,14 @@ const outputTokenCosts = computed(() => {
156153
</c-alert>
157154

158155
<div v-if="!outputTokenCosts.error">
159-
<n-form-item :label="t('tools.gpt-token-estimator.texts.label-used-tokens')">
160-
<TextareaCopyable :value="outputTokenCosts.usedTokens" />
156+
<n-form-item :label="t('tools.gpt-token-estimator.texts.label-used-tokens')" label-placement="left">
157+
<InputCopyable :value="outputTokenCosts.usedTokens" />
161158
</n-form-item>
162-
<n-form-item :label="t('tools.gpt-token-estimator.texts.label-prompt-tokens')">
163-
<TextareaCopyable :value="outputTokenCosts.promptUsedTokens" />
159+
<n-form-item :label="t('tools.gpt-token-estimator.texts.label-prompt-tokens')" label-placement="left">
160+
<InputCopyable :value="outputTokenCosts.promptUsedTokens" />
164161
</n-form-item>
165-
<n-form-item :label="t('tools.gpt-token-estimator.texts.label-completion-tokens')">
166-
<TextareaCopyable :value="outputTokenCosts.completionUsedTokens" />
167-
</n-form-item>
168-
<n-form-item :label="t('tools.gpt-token-estimator.texts.label-used-usd')">
169-
<TextareaCopyable :value="outputTokenCosts.usedUSD" />
162+
<n-form-item :label="t('tools.gpt-token-estimator.texts.label-completion-tokens')" label-placement="left">
163+
<InputCopyable :value="outputTokenCosts.completionUsedTokens" />
170164
</n-form-item>
171165
</div>
172166
</div>
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
import type { Tiktoken, TiktokenModel } from 'js-tiktoken';
2+
import { encodingForModel } from 'js-tiktoken';
3+
import { promptTokensEstimate } from 'openai-chat-tokens';
4+
5+
interface MessageItem {
6+
name?: string
7+
role: 'system' | 'user' | 'assistant'
8+
content: string
9+
}
10+
11+
export class GPTTokens {
12+
protected static modelEncodingCache = new Map<TiktokenModel, Tiktoken>();
13+
14+
protected static getEncodingForModelCached(model: TiktokenModel): Tiktoken {
15+
const modelEncodingCache = GPTTokens.modelEncodingCache;
16+
if (!modelEncodingCache.has(model)) {
17+
try {
18+
modelEncodingCache.set(model, encodingForModel(model));
19+
}
20+
catch (e: any) {
21+
throw new Error(`Model '${model}' not found: ${e.toString()}`);
22+
}
23+
}
24+
25+
return modelEncodingCache.get(model)!;
26+
}
27+
28+
constructor(options: {
29+
model?: TiktokenModel
30+
messages?: GPTTokens['messages']
31+
tools?: GPTTokens['tools']
32+
debug?: boolean
33+
}) {
34+
const {
35+
model,
36+
messages,
37+
tools,
38+
debug = false,
39+
} = options;
40+
41+
this.model = model as TiktokenModel;
42+
this.debug = debug;
43+
this.messages = messages;
44+
this.tools = tools;
45+
46+
this.checkOptions();
47+
}
48+
49+
private checkOptions() {
50+
if (!this.messages && !this.tools) {
51+
throw new Error('Must set one of messages | function');
52+
}
53+
54+
if (this.tools && !this.messages) {
55+
throw new Error('Function must set messages');
56+
}
57+
}
58+
59+
public static get supportModels() {
60+
return ['davinci-002', 'babbage-002', 'text-davinci-003', 'text-davinci-002', 'text-davinci-001', 'text-curie-001', 'text-babbage-001', 'text-ada-001', 'davinci', 'curie', 'babbage', 'ada', 'code-davinci-002', 'code-davinci-001', 'code-cushman-002', 'code-cushman-001', 'davinci-codex', 'cushman-codex', 'text-davinci-edit-001', 'code-davinci-edit-001', 'text-embedding-ada-002', 'text-embedding-3-small', 'text-embedding-3-large', 'text-similarity-davinci-001', 'text-similarity-curie-001', 'text-similarity-babbage-001', 'text-similarity-ada-001', 'text-search-davinci-doc-001', 'text-search-curie-doc-001', 'text-search-babbage-doc-001', 'text-search-ada-doc-001', 'code-search-babbage-code-001', 'code-search-ada-code-001', 'gpt2', 'gpt-3.5-turbo', 'gpt-35-turbo', 'gpt-3.5-turbo-0301', 'gpt-3.5-turbo-0613', 'gpt-3.5-turbo-1106', 'gpt-3.5-turbo-0125', 'gpt-3.5-turbo-16k', 'gpt-3.5-turbo-16k-0613', 'gpt-3.5-turbo-instruct', 'gpt-3.5-turbo-instruct-0914', 'gpt-4', 'gpt-4-0314', 'gpt-4-0613', 'gpt-4-32k', 'gpt-4-32k-0314', 'gpt-4-32k-0613', 'gpt-4-turbo', 'gpt-4-turbo-2024-04-09', 'gpt-4-turbo-preview', 'gpt-4-1106-preview', 'gpt-4-0125-preview', 'gpt-4-vision-preview', 'gpt-4o', 'gpt-4o-2024-05-13', 'gpt-4o-2024-08-06', 'gpt-4o-2024-11-20', 'gpt-4o-mini-2024-07-18', 'gpt-4o-mini', 'gpt-4o-search-preview', 'gpt-4o-search-preview-2025-03-11', 'gpt-4o-mini-search-preview', 'gpt-4o-mini-search-preview-2025-03-11', 'gpt-4o-audio-preview', 'gpt-4o-audio-preview-2024-12-17', 'gpt-4o-audio-preview-2024-10-01', 'gpt-4o-mini-audio-preview', 'gpt-4o-mini-audio-preview-2024-12-17', 'o1', 'o1-2024-12-17', 'o1-mini', 'o1-mini-2024-09-12', 'o1-preview', 'o1-preview-2024-09-12', 'o1-pro', 'o1-pro-2025-03-19', 'o3', 'o3-2025-04-16', 'o3-mini', 'o3-mini-2025-01-31', 'o4-mini', 'o4-mini-2025-04-16', 'chatgpt-4o-latest', 'gpt-4o-realtime', 'gpt-4o-realtime-preview-2024-10-01', 'gpt-4o-realtime-preview-2024-12-17', 'gpt-4o-mini-realtime-preview', 'gpt-4o-mini-realtime-preview-2024-12-17', 'gpt-4.1', 'gpt-4.1-2025-04-14', 'gpt-4.1-mini', 'gpt-4.1-mini-2025-04-14', 'gpt-4.1-nano', 'gpt-4.1-nano-2025-04-14', 'gpt-4.5-preview', 'gpt-4.5-preview-2025-02-27', 'gpt-5', 'gpt-5-2025-08-07', 'gpt-5-nano', 'gpt-5-nano-2025-08-07', 'gpt-5-mini', 'gpt-5-mini-2025-08-07', 'gpt-5-chat-latest'];
61+
}
62+
63+
public readonly debug;
64+
public readonly model;
65+
public readonly messages?: MessageItem [];
66+
67+
public readonly tools?: {
68+
type: 'function'
69+
function: {
70+
name: string
71+
description?: string
72+
parameters: Record<string, unknown>
73+
}
74+
} [];
75+
76+
// Used Tokens (total)
77+
public get usedTokens(): number {
78+
if (this.tools) {
79+
return promptTokensEstimate({
80+
messages: this.messages!,
81+
functions: this.tools.map(item => item.function),
82+
});
83+
}
84+
85+
if (this.messages) {
86+
return this.promptUsedTokens + this.completionUsedTokens;
87+
}
88+
89+
return 0;
90+
}
91+
92+
// Used Tokens (prompt)
93+
public get promptUsedTokens() {
94+
return GPTTokens.num_tokens_from_messages(this.promptMessages, this.model);
95+
}
96+
97+
// Used Tokens (completion)
98+
public get completionUsedTokens() {
99+
return this.completionMessage
100+
? GPTTokens.contentUsedTokens(this.model, this.completionMessage)
101+
: 0;
102+
}
103+
104+
public static contentUsedTokens(model: TiktokenModel, content: string) {
105+
const encoding = GPTTokens.getEncodingForModelCached(model);
106+
107+
return encoding.encode(content).length;
108+
}
109+
110+
private get lastMessage() {
111+
return this.messages![this.messages!.length - 1];
112+
}
113+
114+
private get promptMessages() {
115+
return this.lastMessage.role === 'assistant' ? this.messages!.slice(0, -1) : this.messages!;
116+
}
117+
118+
private get completionMessage() {
119+
return this.lastMessage.role === 'assistant'
120+
? this.lastMessage.content
121+
: '';
122+
}
123+
124+
/**
125+
* Return the number of tokens in a list of messages.
126+
* @param messages A list of messages.
127+
* @param model The model to use for encoding.
128+
* @returns The number of tokens in the messages.
129+
* @throws If the model is not supported.
130+
*/
131+
private static num_tokens_from_messages(messages: MessageItem [], model: TiktokenModel) {
132+
let tokens_per_message!: number;
133+
let tokens_per_name !: number;
134+
135+
let num_tokens = 0;
136+
137+
if (model === 'gpt-3.5-turbo-0301') {
138+
tokens_per_message = 4;
139+
tokens_per_name = -1;
140+
}
141+
else {
142+
tokens_per_message = 3;
143+
tokens_per_name = 1;
144+
}
145+
146+
const encoding = GPTTokens.getEncodingForModelCached(model);
147+
148+
// This is a port of the Python code from
149+
//
150+
// Python => Typescript by gpt-4
151+
//
152+
// https://notebooks.githubusercontent.com/view/ipynb?browser=edge&bypass_fastly=true&color_mode=dark&commit=d67c4181abe9dfd871d382930bb778b7014edc66&device=unknown_device&docs_host=https%3A%2F%2Fdocs.github.com&enc_url=68747470733a2f2f7261772e67697468756275736572636f6e74656e742e636f6d2f6f70656e61692f6f70656e61692d636f6f6b626f6f6b2f643637633431383161626539646664383731643338323933306262373738623730313465646336362f6578616d706c65732f486f775f746f5f636f756e745f746f6b656e735f776974685f74696b746f6b656e2e6970796e62&logged_in=true&nwo=openai%2Fopenai-cookbook&path=examples%2FHow_to_count_tokens_with_tiktoken.ipynb&platform=mac&repository_id=468576060&repository_type=Repository&version=114#6d8d98eb-e018-4e1f-8c9e-19b152a97aaf
153+
154+
for (const message of messages) {
155+
num_tokens += tokens_per_message;
156+
157+
for (const [key, value] of Object.entries(message)) {
158+
if (typeof value !== 'string') {
159+
continue;
160+
}
161+
162+
num_tokens += encoding.encode(value as string).length;
163+
if (key === 'name') {
164+
num_tokens += tokens_per_name;
165+
}
166+
}
167+
}
168+
169+
// Supplementary
170+
// encoding.free()
171+
172+
// every reply is primed with <|start|>assistant<|message|>
173+
return num_tokens + 3;
174+
}
175+
176+
public static encode(model: TiktokenModel, text: string) {
177+
const encoding = GPTTokens.getEncodingForModelCached(model);
178+
return encoding.encode(text);
179+
}
180+
181+
public static decode(model: TiktokenModel, tokens: number[]) {
182+
const encoding = GPTTokens.getEncodingForModelCached(model);
183+
return encoding.decode(tokens);
184+
}
185+
}

src/tools/gpt-token-estimator/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ export const tool = defineTool({
66
name: t('tools.gpt-token-estimator.title'),
77
path: '/gpt-token-estimator',
88
description: t('tools.gpt-token-estimator.description'),
9-
keywords: ['gpt', 'token', 'estimator'],
9+
keywords: ['gpt', 'llm', 'openai', 'token', 'estimator'],
1010
component: () => import('./gpt-token-estimator.vue'),
1111
icon: CurrencyDollar,
1212
createdAt: new Date('2024-08-15'),

0 commit comments

Comments
 (0)