Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions api/app/clients/AnthropicClient.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
// const { Agent, ProxyAgent } = require('undici');
const BaseClient = require('./BaseClient');
const {
encoding_for_model: encodingForModel,
get_encoding: getEncoding,
} = require('@dqbd/tiktoken');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const Anthropic = require('@anthropic-ai/sdk');

const HUMAN_PROMPT = '\n\nHuman:';
Expand Down
47 changes: 17 additions & 30 deletions api/app/clients/BaseClient.js
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,9 @@ class BaseClient {
* @returns {Object} An object with three properties: `context`, `remainingContextTokens`, and `messagesToRefine`. `context` is an array of messages that fit within the token limit. `remainingContextTokens` is the number of tokens remaining within the limit after adding the messages to the context. `messagesToRefine` is an array of messages that were not added to the context because they would have exceeded the token limit.
*/
async getMessagesWithinTokenLimit(messages) {
let currentTokenCount = 0;
// Every reply is primed with <|start|>assistant<|message|>, so we
// start with 3 tokens for the label after all messages have been counted.
let currentTokenCount = 3;
let context = [];
let messagesToRefine = [];
let refineIndex = -1;
Expand Down Expand Up @@ -562,44 +564,29 @@ class BaseClient {
* Algorithm adapted from "6. Counting tokens for chat API calls" of
* https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
*
* An additional 2 tokens need to be added for metadata after all messages have been counted.
* An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
*
* @param {*} message
* @param {Object} message
*/
getTokenCountForMessage(message) {
let tokensPerMessage;
let nameAdjustment;
if (this.modelOptions.model.startsWith('gpt-4')) {
tokensPerMessage = 3;
nameAdjustment = 1;
} else {
tokensPerMessage = 4;
nameAdjustment = -1;
}
// Note: gpt-3.5-turbo and gpt-4 may update over time. Use default for these as well as for unknown models
let tokensPerMessage = 3;
let tokensPerName = 1;

if (this.options.debug) {
console.debug('getTokenCountForMessage', message);
if (this.modelOptions.model === 'gpt-3.5-turbo-0301') {
tokensPerMessage = 4;
tokensPerName = -1;
}

// Map each property of the message to the number of tokens it contains
const propertyTokenCounts = Object.entries(message).map(([key, value]) => {
if (key === 'tokenCount' || typeof value !== 'string') {
return 0;
let numTokens = tokensPerMessage;
for (let [key, value] of Object.entries(message)) {
numTokens += this.getTokenCount(value);
if (key === 'name') {
numTokens += tokensPerName;
}
// Count the number of tokens in the property value
const numTokens = this.getTokenCount(value);

// Adjust by `nameAdjustment` tokens if the property key is 'name'
const adjustment = key === 'name' ? nameAdjustment : 0;
return numTokens + adjustment;
});

if (this.options.debug) {
console.debug('propertyTokenCounts', propertyTokenCounts);
}

// Sum the number of tokens in all properties and add `tokensPerMessage` for metadata
return propertyTokenCounts.reduce((a, b) => a + b, tokensPerMessage);
return numTokens;
}

async sendPayload(payload, opts = {}) {
Expand Down
45 changes: 19 additions & 26 deletions api/app/clients/ChatGPTClient.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
const crypto = require('crypto');
const Keyv = require('keyv');
const {
encoding_for_model: encodingForModel,
get_encoding: getEncoding,
} = require('@dqbd/tiktoken');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
const { Agent, ProxyAgent } = require('undici');
const BaseClient = require('./BaseClient');
Expand Down Expand Up @@ -526,8 +523,8 @@ ${botMessage.message}
const prompt = `${promptBody}${promptSuffix}`;
if (isChatGptModel) {
messagePayload.content = prompt;
// Add 2 tokens for metadata after all messages have been counted.
currentTokenCount += 2;
// Add 3 tokens for Assistant Label priming after all messages have been counted.
currentTokenCount += 3;
}

// Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
Expand All @@ -554,33 +551,29 @@ ${botMessage.message}
* Algorithm adapted from "6. Counting tokens for chat API calls" of
* https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
*
* An additional 2 tokens need to be added for metadata after all messages have been counted.
* An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
*
* @param {*} message
* @param {Object} message
*/
getTokenCountForMessage(message) {
let tokensPerMessage;
let nameAdjustment;
if (this.modelOptions.model.startsWith('gpt-4')) {
tokensPerMessage = 3;
nameAdjustment = 1;
} else {
// Note: gpt-3.5-turbo and gpt-4 may update over time. Use default for these as well as for unknown models
let tokensPerMessage = 3;
let tokensPerName = 1;

if (this.modelOptions.model === 'gpt-3.5-turbo-0301') {
tokensPerMessage = 4;
nameAdjustment = -1;
tokensPerName = -1;
}

// Map each property of the message to the number of tokens it contains
const propertyTokenCounts = Object.entries(message).map(([key, value]) => {
// Count the number of tokens in the property value
const numTokens = this.getTokenCount(value);

// Adjust by `nameAdjustment` tokens if the property key is 'name'
const adjustment = key === 'name' ? nameAdjustment : 0;
return numTokens + adjustment;
});
let numTokens = tokensPerMessage;
for (let [key, value] of Object.entries(message)) {
numTokens += this.getTokenCount(value);
if (key === 'name') {
numTokens += tokensPerName;
}
}

// Sum the number of tokens in all properties and add `tokensPerMessage` for metadata
return propertyTokenCounts.reduce((a, b) => a + b, tokensPerMessage);
return numTokens;
}
}

Expand Down
5 changes: 1 addition & 4 deletions api/app/clients/GoogleClient.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
const BaseClient = require('./BaseClient');
const { google } = require('googleapis');
const { Agent, ProxyAgent } = require('undici');
const {
encoding_for_model: encodingForModel,
get_encoding: getEncoding,
} = require('@dqbd/tiktoken');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');

const tokenizersCache = {};

Expand Down
5 changes: 1 addition & 4 deletions api/app/clients/OpenAIClient.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
const BaseClient = require('./BaseClient');
const ChatGPTClient = require('./ChatGPTClient');
const {
encoding_for_model: encodingForModel,
get_encoding: getEncoding,
} = require('@dqbd/tiktoken');
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
const { maxTokensMap, genAzureChatCompletion } = require('../../utils');
const { runTitleChain } = require('./chains');
const { createLLM } = require('./llm');
Expand Down
7 changes: 5 additions & 2 deletions api/app/clients/specs/BaseClient.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ describe('BaseClient', () => {
{ role: 'assistant', content: 'How can I help you?', tokenCount: 19 },
{ role: 'user', content: 'I have a question.', tokenCount: 18 },
];
const expectedRemainingContextTokens = 58; // 100 - 5 - 19 - 18
// Subtract 3 tokens for Assistant Label priming after all messages have been counted.
const expectedRemainingContextTokens = 58 - 3; // (100 - 5 - 19 - 18) - 3
const expectedMessagesToRefine = [];

const result = await TestClient.getMessagesWithinTokenLimit(messages);
Expand Down Expand Up @@ -168,7 +169,9 @@ describe('BaseClient', () => {
{ role: 'assistant', content: 'How can I help you?', tokenCount: 19 },
{ role: 'user', content: 'I have a question.', tokenCount: 18 },
];
const expectedRemainingContextTokens = 8; // 50 - 18 - 19 - 5

// Subtract 3 tokens for Assistant Label priming after all messages have been counted.
const expectedRemainingContextTokens = 8 - 3; // (50 - 18 - 19 - 5) - 3
const expectedMessagesToRefine = [
{ role: 'user', content: 'I need a coffee, stat!', tokenCount: 30 },
{ role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 30 },
Expand Down
59 changes: 59 additions & 0 deletions api/app/clients/specs/OpenAIClient.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -213,4 +213,63 @@ describe('OpenAIClient', () => {
expect(result.prompt).toEqual([]);
});
});

describe('getTokenCountForMessage', () => {
const example_messages = [
{
role: 'system',
content:
'You are a helpful, pattern-following assistant that translates corporate jargon into plain English.',
},
{
role: 'system',
name: 'example_user',
content: 'New synergies will help drive top-line growth.',
},
{
role: 'system',
name: 'example_assistant',
content: 'Things working well together will increase revenue.',
},
{
role: 'system',
name: 'example_user',
content:
'Let\'s circle back when we have more bandwidth to touch base on opportunities for increased leverage.',
},
{
role: 'system',
name: 'example_assistant',
content: 'Let\'s talk later when we\'re less busy about how to do better.',
},
{
role: 'user',
content:
'This late pivot means we don\'t have time to boil the ocean for the client deliverable.',
},
];

const testCases = [
{ model: 'gpt-3.5-turbo-0301', expected: 127 },
{ model: 'gpt-3.5-turbo-0613', expected: 129 },
{ model: 'gpt-3.5-turbo', expected: 129 },
{ model: 'gpt-4-0314', expected: 129 },
{ model: 'gpt-4-0613', expected: 129 },
{ model: 'gpt-4', expected: 129 },
{ model: 'unknown', expected: 129 },
];

testCases.forEach((testCase) => {
it(`should return ${testCase.expected} tokens for model ${testCase.model}`, () => {
client.modelOptions.model = testCase.model;
client.selectTokenizer();
// 3 tokens for assistant label
let totalTokens = 3;
for (let message of example_messages) {
totalTokens += client.getTokenCountForMessage(message);
}
expect(totalTokens).toBe(testCase.expected);
});
});
});
});
2 changes: 1 addition & 1 deletion api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
"dependencies": {
"@anthropic-ai/sdk": "^0.5.4",
"@azure/search-documents": "^11.3.2",
"@dqbd/tiktoken": "^1.0.7",
"@keyv/mongo": "^2.1.8",
"@waylaidwanderer/chatgpt-api": "^1.37.2",
"axios": "^1.3.4",
Expand Down Expand Up @@ -60,6 +59,7 @@
"passport-local": "^1.0.0",
"pino": "^8.12.1",
"sharp": "^0.32.5",
"tiktoken": "^1.0.10",
"ua-parser-js": "^1.0.36",
"zod": "^3.22.2"
},
Expand Down
8 changes: 4 additions & 4 deletions api/server/routes/tokenizer.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
const express = require('express');
const router = express.Router();
const { Tiktoken } = require('@dqbd/tiktoken/lite');
const { load } = require('@dqbd/tiktoken/load');
const registry = require('@dqbd/tiktoken/registry.json');
const models = require('@dqbd/tiktoken/model_to_encoding.json');
const { Tiktoken } = require('tiktoken/lite');
const { load } = require('tiktoken/load');
const registry = require('tiktoken/registry.json');
const models = require('tiktoken/model_to_encoding.json');
const requireJwtAuth = require('../middleware/requireJwtAuth');

router.post('/', requireJwtAuth, async (req, res) => {
Expand Down
1 change: 1 addition & 0 deletions api/utils/tokens.js
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ const maxTokensMap = {
'gpt-4': 8191,
'gpt-4-0613': 8191,
'gpt-4-32k': 32767,
'gpt-4-32k-0314': 32767,
'gpt-4-32k-0613': 32767,
'gpt-3.5-turbo': 4095,
'gpt-3.5-turbo-0613': 4095,
Expand Down
7 changes: 6 additions & 1 deletion package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.