Skip to content

Commit 1f0fb49

Browse files
authored
🎉 feat: Optimizations and Anthropic Title Generation (#2184)
* feat: add claude-3-haiku-20240307 to default anthropic list * refactor: optimize `saveMessage` calls mid-stream via throttling * chore: remove addMetadata operations and consolidate in BaseClient * fix(listAssistantsForAzure): attempt to specify correct model mapping as accurately as possible (#2177) * refactor(client): update last conversation setup with current assistant model, call newConvo again when assistants load to allow fast initial load and ensure assistant model is always the default, not the last selected model * refactor(cache): explicitly add TTL of 2 minutes when setting titleCache and add default TTL of 10 minutes to abortKeys cache * feat(AnthropicClient): conversation titling using Anthropic Function Calling * chore: remove extraneous token usage logging * fix(convos): unhandled edge case for conversation grouping (undefined conversation) * style: Improved style of Search Bar after recent UI update * chore: remove unused code, content part helpers * feat: always show code option
1 parent 8e78164 commit 1f0fb49

File tree

31 files changed

+426
-188
lines changed

31 files changed

+426
-188
lines changed

api/app/clients/AnthropicClient.js

Lines changed: 96 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@ const {
66
validateVisionModel,
77
} = require('librechat-data-provider');
88
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
9-
const { formatMessage, createContextHandlers } = require('./prompts');
9+
const {
10+
titleFunctionPrompt,
11+
parseTitleFromPrompt,
12+
truncateText,
13+
formatMessage,
14+
createContextHandlers,
15+
} = require('./prompts');
1016
const spendTokens = require('~/models/spendTokens');
1117
const { getModelMaxTokens } = require('~/utils');
1218
const BaseClient = require('./BaseClient');
@@ -108,7 +114,12 @@ class AnthropicClient extends BaseClient {
108114
return this;
109115
}
110116

117+
/**
118+
* Get the initialized Anthropic client.
119+
* @returns {Anthropic} The Anthropic client instance.
120+
*/
111121
getClient() {
122+
/** @type {Anthropic.default.RequestOptions} */
112123
const options = {
113124
apiKey: this.apiKey,
114125
};
@@ -176,14 +187,13 @@ class AnthropicClient extends BaseClient {
176187
return files;
177188
}
178189

179-
async recordTokenUsage({ promptTokens, completionTokens }) {
180-
logger.debug('[AnthropicClient] recordTokenUsage:', { promptTokens, completionTokens });
190+
async recordTokenUsage({ promptTokens, completionTokens, model, context = 'message' }) {
181191
await spendTokens(
182192
{
193+
context,
183194
user: this.user,
184-
model: this.modelOptions.model,
185-
context: 'message',
186195
conversationId: this.conversationId,
196+
model: model ?? this.modelOptions.model,
187197
endpointTokenConfig: this.options.endpointTokenConfig,
188198
},
189199
{ promptTokens, completionTokens },
@@ -512,8 +522,15 @@ class AnthropicClient extends BaseClient {
512522
logger.debug('AnthropicClient doesn\'t use getCompletion (all handled in sendCompletion)');
513523
}
514524

515-
async createResponse(client, options) {
516-
return this.useMessages
525+
/**
526+
* Creates a message or completion response using the Anthropic client.
527+
* @param {Anthropic} client - The Anthropic client instance.
528+
* @param {Anthropic.default.MessageCreateParams | Anthropic.default.CompletionCreateParams} options - The options for the message or completion.
529+
* @param {boolean} useMessages - Whether to use messages or completions. Defaults to `this.useMessages`.
530+
* @returns {Promise<Anthropic.default.Message | Anthropic.default.Completion>} The response from the Anthropic client.
531+
*/
532+
async createResponse(client, options, useMessages) {
533+
return useMessages ?? this.useMessages
517534
? await client.messages.create(options)
518535
: await client.completions.create(options);
519536
}
@@ -663,6 +680,78 @@ class AnthropicClient extends BaseClient {
663680
getTokenCount(text) {
664681
return this.gptEncoder.encode(text, 'all').length;
665682
}
683+
684+
/**
685+
* Generates a concise title for a conversation based on the user's input text and response.
686+
* Involves sending a chat completion request with specific instructions for title generation.
687+
*
688+
* This function capitlizes on [Anthropic's function calling training](https://docs.anthropic.com/claude/docs/functions-external-tools).
689+
*
690+
* @param {Object} params - The parameters for the conversation title generation.
691+
* @param {string} params.text - The user's input.
692+
* @param {string} [params.responseText=''] - The AI's immediate response to the user.
693+
*
694+
* @returns {Promise<string | 'New Chat'>} A promise that resolves to the generated conversation title.
695+
* In case of failure, it will return the default title, "New Chat".
696+
*/
697+
async titleConvo({ text, responseText = '' }) {
698+
let title = 'New Chat';
699+
const convo = `<initial_message>
700+
${truncateText(text)}
701+
</initial_message>
702+
<response>
703+
${JSON.stringify(truncateText(responseText))}
704+
</response>`;
705+
706+
const { ANTHROPIC_TITLE_MODEL } = process.env ?? {};
707+
const model = this.options.titleModel ?? ANTHROPIC_TITLE_MODEL ?? 'claude-3-haiku-20240307';
708+
const system = titleFunctionPrompt;
709+
710+
const titleChatCompletion = async () => {
711+
const content = `<conversation_context>
712+
${convo}
713+
</conversation_context>
714+
715+
Please generate a title for this conversation.`;
716+
717+
const titleMessage = { role: 'user', content };
718+
const requestOptions = {
719+
model,
720+
temperature: 0.3,
721+
max_tokens: 1024,
722+
system,
723+
stop_sequences: ['\n\nHuman:', '\n\nAssistant', '</function_calls>'],
724+
messages: [titleMessage],
725+
};
726+
727+
try {
728+
const response = await this.createResponse(this.getClient(), requestOptions, true);
729+
let promptTokens = response?.usage?.input_tokens;
730+
let completionTokens = response?.usage?.output_tokens;
731+
if (!promptTokens) {
732+
promptTokens = this.getTokenCountForMessage(titleMessage);
733+
promptTokens += this.getTokenCountForMessage({ role: 'system', content: system });
734+
}
735+
if (!completionTokens) {
736+
completionTokens = this.getTokenCountForMessage(response.content[0]);
737+
}
738+
await this.recordTokenUsage({
739+
model,
740+
promptTokens,
741+
completionTokens,
742+
context: 'title',
743+
});
744+
const text = response.content[0].text;
745+
title = parseTitleFromPrompt(text);
746+
} catch (e) {
747+
logger.error('[AnthropicClient] There was an issue generating the title', e);
748+
}
749+
};
750+
751+
await titleChatCompletion();
752+
logger.debug('[AnthropicClient] Convo Title: ' + title);
753+
return title;
754+
}
666755
}
667756

668757
module.exports = AnthropicClient;

api/app/clients/BaseClient.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ class BaseClient {
456456
sender: this.sender,
457457
text: addSpaceIfNeeded(generation) + completion,
458458
promptTokens,
459+
...(this.metadata ?? {}),
459460
};
460461

461462
if (

api/app/clients/OpenAIClient.js

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ class OpenAIClient extends BaseClient {
4747
/** @type {AzureOptions} */
4848
this.azure = options.azure || false;
4949
this.setOptions(options);
50+
this.metadata = {};
5051
}
5152

5253
// TODO: PluginsClient calls this 3x, unneeded
@@ -574,7 +575,6 @@ class OpenAIClient extends BaseClient {
574575
} else if (typeof opts.onProgress === 'function' || this.options.useChatCompletion) {
575576
reply = await this.chatCompletion({
576577
payload,
577-
clientOptions: opts,
578578
onProgress: opts.onProgress,
579579
abortController: opts.abortController,
580580
});
@@ -594,9 +594,9 @@ class OpenAIClient extends BaseClient {
594594
}
595595
}
596596

597-
if (streamResult && typeof opts.addMetadata === 'function') {
597+
if (streamResult) {
598598
const { finish_reason } = streamResult.choices[0];
599-
opts.addMetadata({ finish_reason });
599+
this.metadata = { finish_reason };
600600
}
601601
return (reply ?? '').trim();
602602
}
@@ -921,7 +921,6 @@ ${convo}
921921
}
922922

923923
async recordTokenUsage({ promptTokens, completionTokens }) {
924-
logger.debug('[OpenAIClient] recordTokenUsage:', { promptTokens, completionTokens });
925924
await spendTokens(
926925
{
927926
user: this.user,
@@ -941,7 +940,7 @@ ${convo}
941940
});
942941
}
943942

944-
async chatCompletion({ payload, onProgress, clientOptions, abortController = null }) {
943+
async chatCompletion({ payload, onProgress, abortController = null }) {
945944
let error = null;
946945
const errorCallback = (err) => (error = err);
947946
let intermediateReply = '';
@@ -962,15 +961,6 @@ ${convo}
962961
}
963962

964963
const baseURL = extractBaseURL(this.completionsUrl);
965-
// let { messages: _msgsToLog, ...modelOptionsToLog } = modelOptions;
966-
// if (modelOptionsToLog.messages) {
967-
// _msgsToLog = modelOptionsToLog.messages.map((msg) => {
968-
// let { content, ...rest } = msg;
969-
970-
// if (content)
971-
// return { ...rest, content: truncateText(content) };
972-
// });
973-
// }
974964
logger.debug('[OpenAIClient] chatCompletion', { baseURL, modelOptions });
975965
const opts = {
976966
baseURL,
@@ -1163,8 +1153,8 @@ ${convo}
11631153
}
11641154

11651155
const { message, finish_reason } = chatCompletion.choices[0];
1166-
if (chatCompletion && typeof clientOptions.addMetadata === 'function') {
1167-
clientOptions.addMetadata({ finish_reason });
1156+
if (chatCompletion) {
1157+
this.metadata = { finish_reason };
11681158
}
11691159

11701160
logger.debug('[OpenAIClient] chatCompletion response', chatCompletion);

api/app/clients/prompts/titlePrompts.js

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,60 @@ ${convo}`,
2727
return titlePrompt;
2828
};
2929

30+
const titleFunctionPrompt = `In this environment you have access to a set of tools you can use to generate the conversation title.
31+
32+
You may call them like this:
33+
<function_calls>
34+
<invoke>
35+
<tool_name>$TOOL_NAME</tool_name>
36+
<parameters>
37+
<$PARAMETER_NAME>$PARAMETER_VALUE</$PARAMETER_NAME>
38+
...
39+
</parameters>
40+
</invoke>
41+
</function_calls>
42+
43+
Here are the tools available:
44+
<tools>
45+
<tool_description>
46+
<tool_name>submit_title</tool_name>
47+
<description>
48+
Submit a brief title in the conversation's language, following the parameter description closely.
49+
</description>
50+
<parameters>
51+
<parameter>
52+
<name>title</name>
53+
<type>string</type>
54+
<description>A concise, 5-word-or-less title for the conversation, using its same language, with no punctuation. Apply title case conventions appropriate for the language. For English, use AP Stylebook Title Case. Never directly mention the language name or the word "title"</description>
55+
</parameter>
56+
</parameters>
57+
</tool_description>
58+
</tools>`;
59+
60+
/**
61+
* Parses titles from title functions based on the provided prompt.
62+
* @param {string} prompt - The prompt containing the title function.
63+
* @returns {string} The parsed title. "New Chat" if no title is found.
64+
*/
65+
function parseTitleFromPrompt(prompt) {
66+
const titleRegex = /<title>(.+?)<\/title>/;
67+
const titleMatch = prompt.match(titleRegex);
68+
69+
if (titleMatch && titleMatch[1]) {
70+
const title = titleMatch[1].trim();
71+
72+
// // Capitalize the first letter of each word; Note: unnecessary due to title case prompting
73+
// const capitalizedTitle = title.replace(/\b\w/g, (char) => char.toUpperCase());
74+
75+
return title;
76+
}
77+
78+
return 'New Chat';
79+
}
80+
3081
module.exports = {
3182
langPrompt,
3283
createTitlePrompt,
84+
titleFunctionPrompt,
85+
parseTitleFromPrompt,
3386
};

api/cache/getLogStores.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ const modelQueries = isEnabled(process.env.USE_REDIS)
3737

3838
const abortKeys = isEnabled(USE_REDIS)
3939
? new Keyv({ store: keyvRedis })
40-
: new Keyv({ namespace: CacheKeys.ABORT_KEYS });
40+
: new Keyv({ namespace: CacheKeys.ABORT_KEYS, ttl: 600000 });
4141

4242
const namespaces = {
4343
[CacheKeys.CONFIG_STORE]: config,

api/models/spendTokens.js

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,15 @@ const { logger } = require('~/config');
2121
*/
2222
const spendTokens = async (txData, tokenUsage) => {
2323
const { promptTokens, completionTokens } = tokenUsage;
24-
logger.debug(`[spendTokens] conversationId: ${txData.conversationId} | Token usage: `, {
25-
promptTokens,
26-
completionTokens,
27-
});
24+
logger.debug(
25+
`[spendTokens] conversationId: ${txData.conversationId}${
26+
txData?.context ? ` | Context: ${txData?.context}` : ''
27+
} | Token usage: `,
28+
{
29+
promptTokens,
30+
completionTokens,
31+
},
32+
);
2833
let prompt, completion;
2934
try {
3035
if (promptTokens >= 0) {

api/server/controllers/AskController.js

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
const throttle = require('lodash/throttle');
12
const { getResponseSender, Constants } = require('librechat-data-provider');
23
const { createAbortController, handleAbortError } = require('~/server/middleware');
34
const { sendMessage, createOnProgress } = require('~/server/utils');
@@ -16,13 +17,10 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
1617

1718
logger.debug('[AskController]', { text, conversationId, ...endpointOption });
1819

19-
let metadata;
2020
let userMessage;
2121
let promptTokens;
2222
let userMessageId;
2323
let responseMessageId;
24-
let lastSavedTimestamp = 0;
25-
let saveDelay = 100;
2624
const sender = getResponseSender({
2725
...endpointOption,
2826
model: endpointOption.modelOptions.model,
@@ -31,8 +29,6 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
3129
const newConvo = !conversationId;
3230
const user = req.user.id;
3331

34-
const addMetadata = (data) => (metadata = data);
35-
3632
const getReqData = (data = {}) => {
3733
for (let key in data) {
3834
if (key === 'userMessage') {
@@ -54,11 +50,8 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
5450
const { client } = await initializeClient({ req, res, endpointOption });
5551

5652
const { onProgress: progressCallback, getPartialText } = createOnProgress({
57-
onProgress: ({ text: partialText }) => {
58-
const currentTimestamp = Date.now();
59-
60-
if (currentTimestamp - lastSavedTimestamp > saveDelay) {
61-
lastSavedTimestamp = currentTimestamp;
53+
onProgress: throttle(
54+
({ text: partialText }) => {
6255
saveMessage({
6356
messageId: responseMessageId,
6457
sender,
@@ -70,12 +63,10 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
7063
error: false,
7164
user,
7265
});
73-
}
74-
75-
if (saveDelay < 500) {
76-
saveDelay = 500;
77-
}
78-
},
66+
},
67+
3000,
68+
{ trailing: false },
69+
),
7970
});
8071

8172
getText = getPartialText;
@@ -113,7 +104,6 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
113104
overrideParentMessageId,
114105
getReqData,
115106
onStart,
116-
addMetadata,
117107
abortController,
118108
onProgress: progressCallback.call(null, {
119109
res,
@@ -128,10 +118,6 @@ const AskController = async (req, res, next, initializeClient, addTitle) => {
128118
response.parentMessageId = overrideParentMessageId;
129119
}
130120

131-
if (metadata) {
132-
response = { ...response, ...metadata };
133-
}
134-
135121
response.endpoint = endpointOption.endpoint;
136122

137123
const conversation = await getConvo(user, conversationId);

0 commit comments

Comments
 (0)