Skip to content

Commit 74c8bf7

Browse files
authored
🚀 Feat: Streamline File Strategies & GPT-4-Vision Settings (danny-avila#1535)
* chore: fix `endpoint` typescript issues and typo in console info message * feat(api): files GET endpoint and save only file_id references to messages * refactor(client): `useGetFiles` query hook, update file types, optimistic update of filesQuery on file upload * refactor(buildTree): update to use params object and accept fileMap * feat: map files to messages; refactor(ChatView): messages only available after files are fetched * fix: fetch files only when authenticated * feat(api): AppService - rename app.locals.configs to app.locals.paths - load custom config use fileStrategy from yaml config in app.locals * refactor: separate Firebase and Local strategies, call based on config * refactor: modularize file strategies and employ with use of DALL-E * refactor(librechat.yaml): add fileStrategy field * feat: add source to MongoFile schema, as well as BatchFile, and ExtendedFile types * feat: employ file strategies for upload/delete files * refactor(deleteFirebaseFile): add user id validation for firebase file deletion * chore(deleteFirebaseFile): update jsdocs * feat: employ strategies for vision requests * fix(client): handle messages with deleted files * fix(client): ensure `filesToDelete` always saves/sends `file.source` * feat(openAI): configurable `resendImages` and `imageDetail` * refactor(getTokenCountForMessage): recursive process only when array of Objects and only their values (not keys) aside from `image_url` types * feat(OpenAIClient): calculateImageTokenCost * chore: remove comment * refactor(uploadAvatar): employ fileStrategy for avatars, from social logins or user upload * docs: update docs on how to configure fileStrategy * fix(ci): mock winston and winston related modules, update DALLE3.spec.js with changes made * refactor(redis): change terminal message to reflect current development state * fix(DALL-E-2): pass fileStrategy to dall-e
1 parent 3ca465d commit 74c8bf7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1418
-806
lines changed

app/clients/BaseClient.js

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ class BaseClient {
4646
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', response);
4747
}
4848

49+
async addPreviousAttachments(messages) {
50+
return messages;
51+
}
52+
4953
async recordTokenUsage({ promptTokens, completionTokens }) {
5054
logger.debug('`[BaseClient] recordTokenUsage` not implemented.', {
5155
promptTokens,
@@ -484,20 +488,22 @@ class BaseClient {
484488
mapMethod = this.getMessageMapMethod();
485489
}
486490

487-
const orderedMessages = this.constructor.getMessagesForConversation({
491+
let _messages = this.constructor.getMessagesForConversation({
488492
messages,
489493
parentMessageId,
490494
mapMethod,
491495
});
492496

497+
_messages = await this.addPreviousAttachments(_messages);
498+
493499
if (!this.shouldSummarize) {
494-
return orderedMessages;
500+
return _messages;
495501
}
496502

497503
// Find the latest message with a 'summary' property
498-
for (let i = orderedMessages.length - 1; i >= 0; i--) {
499-
if (orderedMessages[i]?.summary) {
500-
this.previous_summary = orderedMessages[i];
504+
for (let i = _messages.length - 1; i >= 0; i--) {
505+
if (_messages[i]?.summary) {
506+
this.previous_summary = _messages[i];
501507
break;
502508
}
503509
}
@@ -512,7 +518,7 @@ class BaseClient {
512518
});
513519
}
514520

515-
return orderedMessages;
521+
return _messages;
516522
}
517523

518524
async saveMessageToDatabase(message, endpointOptions, user = null) {
@@ -618,6 +624,11 @@ class BaseClient {
618624
* An additional 3 tokens need to be added for assistant label priming after all messages have been counted.
619625
* In our implementation, this is accounted for in the getMessagesWithinTokenLimit method.
620626
*
627+
* The content parts example was adapted from the following example:
628+
* https://github.com/openai/openai-cookbook/pull/881/files
629+
*
630+
* Note: image token calculation is to be done elsewhere where we have access to the image metadata
631+
*
621632
* @param {Object} message
622633
*/
623634
getTokenCountForMessage(message) {
@@ -631,11 +642,18 @@ class BaseClient {
631642
}
632643

633644
const processValue = (value) => {
634-
if (typeof value === 'object' && value !== null) {
635-
for (let [nestedKey, nestedValue] of Object.entries(value)) {
636-
if (nestedKey === 'image_url' || nestedValue === 'image_url') {
645+
if (Array.isArray(value)) {
646+
for (let item of value) {
647+
if (!item || !item.type || item.type === 'image_url') {
637648
continue;
638649
}
650+
651+
const nestedValue = item[item.type];
652+
653+
if (!nestedValue) {
654+
continue;
655+
}
656+
639657
processValue(nestedValue);
640658
}
641659
} else {

app/clients/OpenAIClient.js

Lines changed: 149 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
11
const OpenAI = require('openai');
22
const { HttpsProxyAgent } = require('https-proxy-agent');
3-
const { getResponseSender } = require('librechat-data-provider');
3+
const { getResponseSender, ImageDetailCost, ImageDetail } = require('librechat-data-provider');
44
const { encoding_for_model: encodingForModel, get_encoding: getEncoding } = require('tiktoken');
55
const { encodeAndFormat, validateVisionModel } = require('~/server/services/Files/images');
66
const { getModelMaxTokens, genAzureChatCompletion, extractBaseURL } = require('~/utils');
77
const { truncateText, formatMessage, CUT_OFF_PROMPT } = require('./prompts');
88
const { handleOpenAIErrors } = require('./tools/util');
99
const spendTokens = require('~/models/spendTokens');
1010
const { createLLM, RunManager } = require('./llm');
11-
const { isEnabled } = require('~/server/utils');
1211
const ChatGPTClient = require('./ChatGPTClient');
12+
const { isEnabled } = require('~/server/utils');
13+
const { getFiles } = require('~/models/File');
1314
const { summaryBuffer } = require('./memory');
1415
const { runTitleChain } = require('./chains');
1516
const { tokenSplit } = require('./document');
@@ -76,16 +77,7 @@ class OpenAIClient extends BaseClient {
7677
};
7778
}
7879

79-
this.isVisionModel = validateVisionModel(this.modelOptions.model);
80-
81-
if (this.options.attachments && !this.isVisionModel) {
82-
this.modelOptions.model = 'gpt-4-vision-preview';
83-
this.isVisionModel = true;
84-
}
85-
86-
if (this.isVisionModel) {
87-
delete this.modelOptions.stop;
88-
}
80+
this.checkVisionRequest(this.options.attachments);
8981

9082
const { OPENROUTER_API_KEY, OPENAI_FORCE_PROMPT } = process.env ?? {};
9183
if (OPENROUTER_API_KEY && !this.azure) {
@@ -204,6 +196,27 @@ class OpenAIClient extends BaseClient {
204196
return this;
205197
}
206198

199+
/**
200+
*
201+
* Checks if the model is a vision model based on request attachments and sets the appropriate options:
202+
* - Sets `this.modelOptions.model` to `gpt-4-vision-preview` if the request is a vision request.
203+
* - Sets `this.isVisionModel` to `true` if vision request.
204+
* - Deletes `this.modelOptions.stop` if vision request.
205+
* @param {Array<Promise<MongoFile[]> | MongoFile[]> | Record<string, MongoFile[]>} attachments
206+
*/
207+
checkVisionRequest(attachments) {
208+
this.isVisionModel = validateVisionModel(this.modelOptions.model);
209+
210+
if (attachments && !this.isVisionModel) {
211+
this.modelOptions.model = 'gpt-4-vision-preview';
212+
this.isVisionModel = true;
213+
}
214+
215+
if (this.isVisionModel) {
216+
delete this.modelOptions.stop;
217+
}
218+
}
219+
207220
setupTokens() {
208221
if (this.isChatCompletion) {
209222
this.startToken = '||>';
@@ -288,7 +301,11 @@ class OpenAIClient extends BaseClient {
288301
tokenizerCallsCount++;
289302
}
290303

291-
// Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
304+
/**
305+
* Returns the token count of a given text. It also checks and resets the tokenizers if necessary.
306+
* @param {string} text - The text to get the token count for.
307+
* @returns {number} The token count of the given text.
308+
*/
292309
getTokenCount(text) {
293310
this.resetTokenizersIfNecessary();
294311
try {
@@ -301,10 +318,33 @@ class OpenAIClient extends BaseClient {
301318
}
302319
}
303320

321+
/**
322+
* Calculate the token cost for an image based on its dimensions and detail level.
323+
*
324+
* @param {Object} image - The image object.
325+
* @param {number} image.width - The width of the image.
326+
* @param {number} image.height - The height of the image.
327+
* @param {'low'|'high'|string|undefined} [image.detail] - The detail level ('low', 'high', or other).
328+
* @returns {number} The calculated token cost.
329+
*/
330+
calculateImageTokenCost({ width, height, detail }) {
331+
if (detail === 'low') {
332+
return ImageDetailCost.LOW;
333+
}
334+
335+
// Calculate the number of 512px squares
336+
const numSquares = Math.ceil(width / 512) * Math.ceil(height / 512);
337+
338+
// Default to high detail cost calculation
339+
return numSquares * ImageDetailCost.HIGH + ImageDetailCost.ADDITIONAL;
340+
}
341+
304342
getSaveOptions() {
305343
return {
306344
chatGptLabel: this.options.chatGptLabel,
307345
promptPrefix: this.options.promptPrefix,
346+
resendImages: this.options.resendImages,
347+
imageDetail: this.options.imageDetail,
308348
...this.modelOptions,
309349
};
310350
}
@@ -317,6 +357,69 @@ class OpenAIClient extends BaseClient {
317357
};
318358
}
319359

360+
/**
361+
*
362+
* @param {TMessage[]} _messages
363+
* @returns {TMessage[]}
364+
*/
365+
async addPreviousAttachments(_messages) {
366+
if (!this.options.resendImages) {
367+
return _messages;
368+
}
369+
370+
/**
371+
*
372+
* @param {TMessage} message
373+
*/
374+
const processMessage = async (message) => {
375+
if (!this.message_file_map) {
376+
/** @type {Record<string, MongoFile[]> */
377+
this.message_file_map = {};
378+
}
379+
380+
const fileIds = message.files.map((file) => file.file_id);
381+
const files = await getFiles({
382+
file_id: { $in: fileIds },
383+
});
384+
385+
await this.addImageURLs(message, files);
386+
387+
this.message_file_map[message.messageId] = files;
388+
return message;
389+
};
390+
391+
const promises = [];
392+
393+
for (const message of _messages) {
394+
if (!message.files) {
395+
promises.push(message);
396+
continue;
397+
}
398+
399+
promises.push(processMessage(message));
400+
}
401+
402+
const messages = await Promise.all(promises);
403+
404+
this.checkVisionRequest(this.message_file_map);
405+
return messages;
406+
}
407+
408+
/**
409+
*
410+
* Adds image URLs to the message object and returns the files
411+
*
412+
* @param {TMessage[]} messages
413+
* @param {MongoFile[]} files
414+
* @returns {Promise<MongoFile[]>}
415+
*/
416+
async addImageURLs(message, attachments) {
417+
const { files, image_urls } = await encodeAndFormat(this.options.req, attachments);
418+
419+
message.image_urls = image_urls;
420+
return files;
421+
}
422+
320423
async buildMessages(
321424
messages,
322425
parentMessageId,
@@ -355,13 +458,23 @@ class OpenAIClient extends BaseClient {
355458
}
356459

357460
if (this.options.attachments) {
358-
const attachments = await this.options.attachments;
359-
const { files, image_urls } = await encodeAndFormat(
360-
this.options.req,
361-
attachments.filter((file) => file.type.includes('image')),
461+
const attachments = (await this.options.attachments).filter((file) =>
462+
file.type.includes('image'),
463+
);
464+
465+
if (this.message_file_map) {
466+
this.message_file_map[orderedMessages[orderedMessages.length - 1].messageId] = attachments;
467+
} else {
468+
this.message_file_map = {
469+
[orderedMessages[orderedMessages.length - 1].messageId]: attachments,
470+
};
471+
}
472+
473+
const files = await this.addImageURLs(
474+
orderedMessages[orderedMessages.length - 1],
475+
attachments,
362476
);
363477

364-
orderedMessages[orderedMessages.length - 1].image_urls = image_urls;
365478
this.options.attachments = files;
366479
}
367480

@@ -372,10 +485,25 @@ class OpenAIClient extends BaseClient {
372485
assistantName: this.options?.chatGptLabel,
373486
});
374487

375-
if (this.contextStrategy && !orderedMessages[i].tokenCount) {
488+
const needsTokenCount = this.contextStrategy && !orderedMessages[i].tokenCount;
489+
490+
/* If tokens were never counted, or, is a Vision request and the message has files, count again */
491+
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
376492
orderedMessages[i].tokenCount = this.getTokenCountForMessage(formattedMessage);
377493
}
378494

495+
/* If message has files, calculate image token cost */
496+
if (this.message_file_map && this.message_file_map[message.messageId]) {
497+
const attachments = this.message_file_map[message.messageId];
498+
for (const file of attachments) {
499+
orderedMessages[i].tokenCount += this.calculateImageTokenCost({
500+
width: file.width,
501+
height: file.height,
502+
detail: this.options.imageDetail ?? ImageDetail.auto,
503+
});
504+
}
505+
}
506+
379507
return formattedMessage;
380508
});
381509

@@ -780,7 +908,6 @@ ${convo}
780908
if (this.isChatCompletion) {
781909
modelOptions.messages = payload;
782910
} else {
783-
// TODO: unreachable code. Need to implement completions call for non-chat models
784911
modelOptions.prompt = payload;
785912
}
786913

@@ -916,6 +1043,8 @@ ${convo}
9161043
clientOptions.addMetadata({ finish_reason });
9171044
}
9181045

1046+
logger.debug('[OpenAIClient] chatCompletion response', chatCompletion);
1047+
9191048
return message.content;
9201049
} catch (err) {
9211050
if (

app/clients/PluginsClient.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ class PluginsClient extends OpenAIClient {
112112
signal: this.abortController.signal,
113113
openAIApiKey: this.openAIApiKey,
114114
conversationId: this.conversationId,
115-
debug: this.options?.debug,
115+
fileStrategy: this.options.req.app.locals.fileStrategy,
116116
message,
117117
},
118118
});

app/clients/specs/OpenAIClient.test.js

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,39 @@ describe('OpenAIClient', () => {
546546
expect(totalTokens).toBe(testCase.expected);
547547
});
548548
});
549+
550+
const vision_request = [
551+
{
552+
role: 'user',
553+
content: [
554+
{
555+
type: 'text',
556+
text: 'describe what is in this image?',
557+
},
558+
{
559+
type: 'image_url',
560+
image_url: {
561+
url: 'https://venturebeat.com/wp-content/uploads/2019/03/openai-1.png',
562+
detail: 'high',
563+
},
564+
},
565+
],
566+
},
567+
];
568+
569+
const expectedTokens = 14;
570+
const visionModel = 'gpt-4-vision-preview';
571+
572+
it(`should return ${expectedTokens} tokens for model ${visionModel} (Vision Request)`, () => {
573+
client.modelOptions.model = visionModel;
574+
client.selectTokenizer();
575+
// 3 tokens for assistant label
576+
let totalTokens = 3;
577+
for (let message of vision_request) {
578+
totalTokens += client.getTokenCountForMessage(message);
579+
}
580+
expect(totalTokens).toBe(expectedTokens);
581+
});
549582
});
550583

551584
describe('sendMessage/getCompletion/chatCompletion', () => {

0 commit comments

Comments
 (0)