fix: Match OpenAI Token Counting Strategy 🪙 (danny-avila#945)

danny-avila · web-flow · commit b964b526cbf6 · 2023-09-14T19:40:21.000-04:00
* wip token fix

* fix: complete token count refactor to match OpenAI example

* chore: add back sendPayload method (accidentally deleted)

* chore: revise JSDoc for getTokenCountForMessage
diff --git a/app/clients/AnthropicClient.js b/app/clients/AnthropicClient.js
@@ -1,9 +1,6 @@
 // const { Agent, ProxyAgent } = require('undici');
 const BaseClient = require('./BaseClient');
-const {
-  encoding_for_model: encodingForModel,
-  get_encoding: getEncoding,
-} = require('@dqbd/tiktoken');
+const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const Anthropic = require('@anthropic-ai/sdk');
 
 const HUMAN_PROMPT = '\n\nHuman:';
diff --git a/app/clients/BaseClient.js b/app/clients/BaseClient.js
@@ -272,7 +272,9 @@ class BaseClient {
    * @returns {Object} An object with three properties: `context`, `remainingContextTokens`, and `messagesToRefine`. `context` is an array of messages that fit within the token limit. `remainingContextTokens` is the number of tokens remaining within the limit after adding the messages to the context. `messagesToRefine` is an array of messages that were not added to the context because they would have exceeded the token limit.
    */
   async getMessagesWithinTokenLimit(messages) {
-    let currentTokenCount = 0;
+    // Every reply is primed with <|start|>assistant<|message|>, so we
+    // start with 3 tokens for the label after all messages have been counted.
+    let currentTokenCount = 3;
     let context = [];
     let messagesToRefine = [];
     let refineIndex = -1;
@@ -562,44 +564,29 @@ class BaseClient {
    * Algorithm adapted from "6. Counting tokens for chat API calls" of
    * https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
    *
-   * An additional 2 tokens need to be added for metadata after all messages have been counted.
+   * An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
    *
-   * @param {*} message
+   * @param {Object} message
    */
   getTokenCountForMessage(message) {
-    let tokensPerMessage;
-    let nameAdjustment;
-    if (this.modelOptions.model.startsWith('gpt-4')) {
-      tokensPerMessage = 3;
-      nameAdjustment = 1;
-    } else {
-      tokensPerMessage = 4;
-      nameAdjustment = -1;
-    }
+    // Note: gpt-3.5-turbo and gpt-4 may update over time. Use default for these as well as for unknown models
+    let tokensPerMessage = 3;
+    let tokensPerName = 1;
 
-    if (this.options.debug) {
-      console.debug('getTokenCountForMessage', message);
+    if (this.modelOptions.model === 'gpt-3.5-turbo-0301') {
+      tokensPerMessage = 4;
+      tokensPerName = -1;
     }
 
-    // Map each property of the message to the number of tokens it contains
-    const propertyTokenCounts = Object.entries(message).map(([key, value]) => {
-      if (key === 'tokenCount' || typeof value !== 'string') {
-        return 0;
+    let numTokens = tokensPerMessage;
+    for (let [key, value] of Object.entries(message)) {
+      numTokens += this.getTokenCount(value);
+      if (key === 'name') {
+        numTokens += tokensPerName;
       }
-      // Count the number of tokens in the property value
-      const numTokens = this.getTokenCount(value);
-
-      // Adjust by `nameAdjustment` tokens if the property key is 'name'
-      const adjustment = key === 'name' ? nameAdjustment : 0;
-      return numTokens + adjustment;
-    });
-
-    if (this.options.debug) {
-      console.debug('propertyTokenCounts', propertyTokenCounts);
     }
 
-    // Sum the number of tokens in all properties and add `tokensPerMessage` for metadata
-    return propertyTokenCounts.reduce((a, b) => a + b, tokensPerMessage);
+    return numTokens;
   }
 
   async sendPayload(payload, opts = {}) {
diff --git a/app/clients/ChatGPTClient.js b/app/clients/ChatGPTClient.js
@@ -1,9 +1,6 @@
 const crypto = require('crypto');
 const Keyv = require('keyv');
-const {
-  encoding_for_model: encodingForModel,
-  get_encoding: getEncoding,
-} = require('@dqbd/tiktoken');
+const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const { fetchEventSource } = require('@waylaidwanderer/fetch-event-source');
 const { Agent, ProxyAgent } = require('undici');
 const BaseClient = require('./BaseClient');
@@ -526,8 +523,8 @@ ${botMessage.message}
     const prompt = `${promptBody}${promptSuffix}`;
     if (isChatGptModel) {
       messagePayload.content = prompt;
-      // Add 2 tokens for metadata after all messages have been counted.
-      currentTokenCount += 2;
+      // Add 3 tokens for Assistant Label priming after all messages have been counted.
+      currentTokenCount += 3;
     }
 
     // Use up to `this.maxContextTokens` tokens (prompt + response), but try to leave `this.maxTokens` tokens for the response.
@@ -554,33 +551,29 @@ ${botMessage.message}
    * Algorithm adapted from "6. Counting tokens for chat API calls" of
    * https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
    *
-   * An additional 2 tokens need to be added for metadata after all messages have been counted.
+   * An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
    *
-   * @param {*} message
+   * @param {Object} message
    */
   getTokenCountForMessage(message) {
-    let tokensPerMessage;
-    let nameAdjustment;
-    if (this.modelOptions.model.startsWith('gpt-4')) {
-      tokensPerMessage = 3;
-      nameAdjustment = 1;
-    } else {
+    // Note: gpt-3.5-turbo and gpt-4 may update over time. Use default for these as well as for unknown models
+    let tokensPerMessage = 3;
+    let tokensPerName = 1;
+
+    if (this.modelOptions.model === 'gpt-3.5-turbo-0301') {
       tokensPerMessage = 4;
-      nameAdjustment = -1;
+      tokensPerName = -1;
     }
 
-    // Map each property of the message to the number of tokens it contains
-    const propertyTokenCounts = Object.entries(message).map(([key, value]) => {
-      // Count the number of tokens in the property value
-      const numTokens = this.getTokenCount(value);
-
-      // Adjust by `nameAdjustment` tokens if the property key is 'name'
-      const adjustment = key === 'name' ? nameAdjustment : 0;
-      return numTokens + adjustment;
-    });
+    let numTokens = tokensPerMessage;
+    for (let [key, value] of Object.entries(message)) {
+      numTokens += this.getTokenCount(value);
+      if (key === 'name') {
+        numTokens += tokensPerName;
+      }
+    }
 
-    // Sum the number of tokens in all properties and add `tokensPerMessage` for metadata
-    return propertyTokenCounts.reduce((a, b) => a + b, tokensPerMessage);
+    return numTokens;
   }
 }
 
diff --git a/app/clients/GoogleClient.js b/app/clients/GoogleClient.js
@@ -1,10 +1,7 @@
 const BaseClient = require('./BaseClient');
 const { google } = require('googleapis');
 const { Agent, ProxyAgent } = require('undici');
-const {
-  encoding_for_model: encodingForModel,
-  get_encoding: getEncoding,
-} = require('@dqbd/tiktoken');
+const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 
 const tokenizersCache = {};
 
diff --git a/app/clients/OpenAIClient.js b/app/clients/OpenAIClient.js
@@ -1,9 +1,6 @@
 const BaseClient = require('./BaseClient');
 const ChatGPTClient = require('./ChatGPTClient');
-const {
-  encoding_for_model: encodingForModel,
-  get_encoding: getEncoding,
-} = require('@dqbd/tiktoken');
+const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
 const { maxTokensMap, genAzureChatCompletion } = require('../../utils');
 const { runTitleChain } = require('./chains');
 const { createLLM } = require('./llm');
diff --git a/app/clients/specs/BaseClient.test.js b/app/clients/specs/BaseClient.test.js
@@ -138,7 +138,8 @@ describe('BaseClient', () => {
       { role: 'assistant', content: 'How can I help you?', tokenCount: 19 },
       { role: 'user', content: 'I have a question.', tokenCount: 18 },
     ];
-    const expectedRemainingContextTokens = 58; // 100 - 5 - 19 - 18
+    // Subtract 3 tokens for Assistant Label priming after all messages have been counted.
+    const expectedRemainingContextTokens = 58 - 3; // (100 - 5 - 19 - 18) - 3
     const expectedMessagesToRefine = [];
 
     const result = await TestClient.getMessagesWithinTokenLimit(messages);
@@ -168,7 +169,9 @@ describe('BaseClient', () => {
       { role: 'assistant', content: 'How can I help you?', tokenCount: 19 },
       { role: 'user', content: 'I have a question.', tokenCount: 18 },
     ];
-    const expectedRemainingContextTokens = 8; // 50 - 18 - 19 - 5
+
+    // Subtract 3 tokens for Assistant Label priming after all messages have been counted.
+    const expectedRemainingContextTokens = 8 - 3; // (50 - 18 - 19 - 5) - 3
     const expectedMessagesToRefine = [
       { role: 'user', content: 'I need a coffee, stat!', tokenCount: 30 },
       { role: 'assistant', content: 'Sure, I can help with that.', tokenCount: 30 },
diff --git a/app/clients/specs/OpenAIClient.test.js b/app/clients/specs/OpenAIClient.test.js
@@ -213,4 +213,63 @@ describe('OpenAIClient', () => {
       expect(result.prompt).toEqual([]);
     });
   });
+
+  describe('getTokenCountForMessage', () => {
+    const example_messages = [
+      {
+        role: 'system',
+        content:
+          'You are a helpful, pattern-following assistant that translates corporate jargon into plain English.',
+      },
+      {
+        role: 'system',
+        name: 'example_user',
+        content: 'New synergies will help drive top-line growth.',
+      },
+      {
+        role: 'system',
+        name: 'example_assistant',
+        content: 'Things working well together will increase revenue.',
+      },
+      {
+        role: 'system',
+        name: 'example_user',
+        content:
+          'Let\'s circle back when we have more bandwidth to touch base on opportunities for increased leverage.',
+      },
+      {
+        role: 'system',
+        name: 'example_assistant',
+        content: 'Let\'s talk later when we\'re less busy about how to do better.',
+      },
+      {
+        role: 'user',
+        content:
+          'This late pivot means we don\'t have time to boil the ocean for the client deliverable.',
+      },
+    ];
+
+    const testCases = [
+      { model: 'gpt-3.5-turbo-0301', expected: 127 },
+      { model: 'gpt-3.5-turbo-0613', expected: 129 },
+      { model: 'gpt-3.5-turbo', expected: 129 },
+      { model: 'gpt-4-0314', expected: 129 },
+      { model: 'gpt-4-0613', expected: 129 },
+      { model: 'gpt-4', expected: 129 },
+      { model: 'unknown', expected: 129 },
+    ];
+
+    testCases.forEach((testCase) => {
+      it(`should return ${testCase.expected} tokens for model ${testCase.model}`, () => {
+        client.modelOptions.model = testCase.model;
+        client.selectTokenizer();
+        // 3 tokens for assistant label
+        let totalTokens = 3;
+        for (let message of example_messages) {
+          totalTokens += client.getTokenCountForMessage(message);
+        }
+        expect(totalTokens).toBe(testCase.expected);
+      });
+    });
+  });
 });
diff --git a/package.json b/package.json
@@ -23,7 +23,6 @@
   "dependencies": {
     "@anthropic-ai/sdk": "^0.5.4",
     "@azure/search-documents": "^11.3.2",
-    "@dqbd/tiktoken": "^1.0.7",
     "@keyv/mongo": "^2.1.8",
     "@waylaidwanderer/chatgpt-api": "^1.37.2",
     "axios": "^1.3.4",
@@ -60,6 +59,7 @@
     "passport-local": "^1.0.0",
     "pino": "^8.12.1",
     "sharp": "^0.32.5",
+    "tiktoken": "^1.0.10",
     "ua-parser-js": "^1.0.36",
     "zod": "^3.22.2"
   },
diff --git a/server/routes/tokenizer.js b/server/routes/tokenizer.js
@@ -1,9 +1,9 @@
 const express = require('express');
 const router = express.Router();
-const { Tiktoken } = require('@dqbd/tiktoken/lite');
-const { load } = require('@dqbd/tiktoken/load');
-const registry = require('@dqbd/tiktoken/registry.json');
-const models = require('@dqbd/tiktoken/model_to_encoding.json');
+const { Tiktoken } = require('tiktoken/lite');
+const { load } = require('tiktoken/load');
+const registry = require('tiktoken/registry.json');
+const models = require('tiktoken/model_to_encoding.json');
 const requireJwtAuth = require('../middleware/requireJwtAuth');
 
 router.post('/', requireJwtAuth, async (req, res) => {
diff --git a/utils/tokens.js b/utils/tokens.js
@@ -41,6 +41,7 @@ const maxTokensMap = {
   'gpt-4': 8191,
   'gpt-4-0613': 8191,
   'gpt-4-32k': 32767,
+  'gpt-4-32k-0314': 32767,
   'gpt-4-32k-0613': 32767,
   'gpt-3.5-turbo': 4095,
   'gpt-3.5-turbo-0613': 4095,