Skip to content

Commit 076f99f

Browse files
committed
🚀 feat: Agent Cache Tokens & Anthropic Reasoning Support (#6098)
* fix: handling of top_k and top_p parameters for Claude-3.7 models (allowed without reasoning) * feat: bump @librechat/agents for Anthropic Reasoning support * fix: update reasoning handling for OpenRouter integration * fix: enhance agent token spending logic to include cache creation and read details * fix: update logic for thinking status in ContentParts component * refactor: improve agent title handling * chore: bump @librechat/agents to version 2.1.7 for parallel tool calling for Google models
1 parent 5cba139 commit 076f99f

File tree

11 files changed

+187
-40
lines changed

11 files changed

+187
-40
lines changed

api/app/clients/AnthropicClient.js

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -746,15 +746,6 @@ class AnthropicClient extends BaseClient {
746746
metadata,
747747
};
748748

749-
if (!/claude-3[-.]7/.test(model)) {
750-
if (top_p !== undefined) {
751-
requestOptions.top_p = top_p;
752-
}
753-
if (top_k !== undefined) {
754-
requestOptions.top_k = top_k;
755-
}
756-
}
757-
758749
if (this.useMessages) {
759750
requestOptions.messages = payload;
760751
requestOptions.max_tokens =
@@ -769,6 +760,14 @@ class AnthropicClient extends BaseClient {
769760
thinkingBudget: this.options.thinkingBudget,
770761
});
771762

763+
if (!/claude-3[-.]7/.test(model)) {
764+
requestOptions.top_p = top_p;
765+
requestOptions.top_k = top_k;
766+
} else if (requestOptions.thinking == null) {
767+
requestOptions.topP = top_p;
768+
requestOptions.topK = top_k;
769+
}
770+
772771
if (this.systemMessage && this.supportsCacheControl === true) {
773772
requestOptions.system = [
774773
{

api/app/clients/OpenAIClient.js

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1309,6 +1309,12 @@ ${convo}
13091309
modelOptions.include_reasoning = true;
13101310
reasoningKey = 'reasoning';
13111311
}
1312+
if (this.useOpenRouter && modelOptions.reasoning_effort != null) {
1313+
modelOptions.reasoning = {
1314+
effort: modelOptions.reasoning_effort,
1315+
};
1316+
delete modelOptions.reasoning_effort;
1317+
}
13121318

13131319
this.streamHandler = new SplitStreamHandler({
13141320
reasoningKey,

api/app/clients/specs/AnthropicClient.test.js

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,4 +680,53 @@ describe('AnthropicClient', () => {
680680
expect(capturedOptions).not.toHaveProperty('top_p');
681681
});
682682
});
683+
684+
it('should include top_k and top_p parameters for Claude-3.7 models when thinking is explicitly disabled', async () => {
685+
const client = new AnthropicClient('test-api-key', {
686+
modelOptions: {
687+
model: 'claude-3-7-sonnet',
688+
temperature: 0.7,
689+
topK: 10,
690+
topP: 0.9,
691+
},
692+
thinking: false,
693+
});
694+
695+
async function* mockAsyncGenerator() {
696+
yield { type: 'message_start', message: { usage: {} } };
697+
yield { delta: { text: 'Test response' } };
698+
yield { type: 'message_delta', usage: {} };
699+
}
700+
701+
jest.spyOn(client, 'createResponse').mockImplementation(() => {
702+
return mockAsyncGenerator();
703+
});
704+
705+
let capturedOptions = null;
706+
jest.spyOn(client, 'getClient').mockImplementation((options) => {
707+
capturedOptions = options;
708+
return {};
709+
});
710+
711+
const payload = [{ role: 'user', content: 'Test message' }];
712+
await client.sendCompletion(payload, {});
713+
714+
expect(capturedOptions).toHaveProperty('topK', 10);
715+
expect(capturedOptions).toHaveProperty('topP', 0.9);
716+
717+
client.setOptions({
718+
modelOptions: {
719+
model: 'claude-3.7-sonnet',
720+
temperature: 0.7,
721+
topK: 10,
722+
topP: 0.9,
723+
},
724+
thinking: false,
725+
});
726+
727+
await client.sendCompletion(payload, {});
728+
729+
expect(capturedOptions).toHaveProperty('topK', 10);
730+
expect(capturedOptions).toHaveProperty('topP', 0.9);
731+
});
683732
});

api/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
"@langchain/google-genai": "^0.1.9",
4747
"@langchain/google-vertexai": "^0.2.0",
4848
"@langchain/textsplitters": "^0.1.0",
49-
"@librechat/agents": "^2.1.3",
49+
"@librechat/agents": "^2.1.7",
5050
"@waylaidwanderer/fetch-event-source": "^3.0.1",
5151
"axios": "1.7.8",
5252
"bcryptjs": "^2.4.3",

api/server/controllers/agents/client.js

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ const {
2727
formatContentStrings,
2828
createContextHandlers,
2929
} = require('~/app/clients/prompts');
30-
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
30+
const { spendTokens, spendStructuredTokens } = require('~/models/spendTokens');
3131
const { getBufferString, HumanMessage } = require('@langchain/core/messages');
32+
const { encodeAndFormat } = require('~/server/services/Files/images/encode');
3233
const Tokenizer = require('~/server/services/Tokenizer');
33-
const { spendTokens } = require('~/models/spendTokens');
3434
const BaseClient = require('~/app/clients/BaseClient');
3535
const { getCurrentDateTime } = require('~/utils');
3636
const { createRun } = require('./run');
@@ -380,32 +380,61 @@ class AgentClient extends BaseClient {
380380
if (!collectedUsage || !collectedUsage.length) {
381381
return;
382382
}
383-
const input_tokens = collectedUsage[0]?.input_tokens || 0;
383+
const input_tokens =
384+
(collectedUsage[0]?.input_tokens || 0) +
385+
(Number(collectedUsage[0]?.input_token_details?.cache_creation) || 0) +
386+
(Number(collectedUsage[0]?.input_token_details?.cache_read) || 0);
384387

385388
let output_tokens = 0;
386389
let previousTokens = input_tokens; // Start with original input
387390
for (let i = 0; i < collectedUsage.length; i++) {
388391
const usage = collectedUsage[i];
392+
if (!usage) {
393+
continue;
394+
}
395+
396+
const cache_creation = Number(usage.input_token_details?.cache_creation) || 0;
397+
const cache_read = Number(usage.input_token_details?.cache_read) || 0;
398+
399+
const txMetadata = {
400+
context,
401+
conversationId: this.conversationId,
402+
user: this.user ?? this.options.req.user?.id,
403+
endpointTokenConfig: this.options.endpointTokenConfig,
404+
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
405+
};
406+
389407
if (i > 0) {
390408
// Count new tokens generated (input_tokens minus previous accumulated tokens)
391-
output_tokens += (Number(usage.input_tokens) || 0) - previousTokens;
409+
output_tokens +=
410+
(Number(usage.input_tokens) || 0) + cache_creation + cache_read - previousTokens;
392411
}
393412

394413
// Add this message's output tokens
395414
output_tokens += Number(usage.output_tokens) || 0;
396415

397416
// Update previousTokens to include this message's output
398417
previousTokens += Number(usage.output_tokens) || 0;
399-
spendTokens(
400-
{
401-
context,
402-
conversationId: this.conversationId,
403-
user: this.user ?? this.options.req.user?.id,
404-
endpointTokenConfig: this.options.endpointTokenConfig,
405-
model: usage.model ?? model ?? this.model ?? this.options.agent.model_parameters.model,
406-
},
407-
{ promptTokens: usage.input_tokens, completionTokens: usage.output_tokens },
408-
).catch((err) => {
418+
419+
if (cache_creation > 0 || cache_read > 0) {
420+
spendStructuredTokens(txMetadata, {
421+
promptTokens: {
422+
input: usage.input_tokens,
423+
write: cache_creation,
424+
read: cache_read,
425+
},
426+
completionTokens: usage.output_tokens,
427+
}).catch((err) => {
428+
logger.error(
429+
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending structured tokens',
430+
err,
431+
);
432+
});
433+
}
434+
spendTokens(txMetadata, {
435+
promptTokens: usage.input_tokens,
436+
completionTokens: usage.output_tokens,
437+
}).catch((err) => {
409438
logger.error(
410439
'[api/server/controllers/agents/client.js #recordCollectedUsage] Error spending tokens',
411440
err,
@@ -794,7 +823,10 @@ class AgentClient extends BaseClient {
794823
throw new Error('Run not initialized');
795824
}
796825
const { handleLLMEnd, collected: collectedMetadata } = createMetadataAggregator();
797-
const clientOptions = {};
826+
/** @type {import('@librechat/agents').ClientOptions} */
827+
const clientOptions = {
828+
maxTokens: 75,
829+
};
798830
const providerConfig = this.options.req.app.locals[this.options.agent.provider];
799831
if (
800832
providerConfig &&

api/server/services/Endpoints/agents/title.js

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,19 @@ const addTitle = async (req, { text, response, client }) => {
2020

2121
const titleCache = getLogStores(CacheKeys.GEN_TITLE);
2222
const key = `${req.user.id}-${response.conversationId}`;
23+
const responseText =
24+
response?.content && Array.isArray(response?.content)
25+
? response.content.reduce((acc, block) => {
26+
if (block?.type === 'text') {
27+
return acc + block.text;
28+
}
29+
return acc;
30+
}, '')
31+
: (response?.content ?? response?.text ?? '');
2332

2433
const title = await client.titleConvo({
2534
text,
26-
responseText: response?.text ?? '',
35+
responseText,
2736
conversationId: response.conversationId,
2837
});
2938
await titleCache.set(key, title, 120000);

api/server/services/Endpoints/anthropic/llm.js

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
const { HttpsProxyAgent } = require('https-proxy-agent');
22
const { anthropicSettings, removeNullishValues } = require('librechat-data-provider');
3-
const { checkPromptCacheSupport, getClaudeHeaders } = require('./helpers');
3+
const { checkPromptCacheSupport, getClaudeHeaders, configureReasoning } = require('./helpers');
44

55
/**
66
* Generates configuration options for creating an Anthropic language model (LLM) instance.
@@ -49,13 +49,14 @@ function getLLMConfig(apiKey, options = {}) {
4949
clientOptions: {},
5050
};
5151

52+
requestOptions = configureReasoning(requestOptions, systemOptions);
53+
5254
if (!/claude-3[-.]7/.test(mergedOptions.model)) {
53-
if (mergedOptions.topP !== undefined) {
54-
requestOptions.topP = mergedOptions.topP;
55-
}
56-
if (mergedOptions.topK !== undefined) {
57-
requestOptions.topK = mergedOptions.topK;
58-
}
55+
requestOptions.topP = mergedOptions.topP;
56+
requestOptions.topK = mergedOptions.topK;
57+
} else if (requestOptions.thinking == null) {
58+
requestOptions.topP = mergedOptions.topP;
59+
requestOptions.topK = mergedOptions.topK;
5960
}
6061

6162
const supportsCacheControl =

api/server/services/Endpoints/anthropic/llm.spec.js

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,45 @@ describe('getLLMConfig', () => {
109109
// Just verifying that the promptCache setting is processed
110110
expect(result.llmConfig).toBeDefined();
111111
});
112+
113+
it('should include topK and topP for Claude-3.7 models when thinking is not enabled', () => {
114+
// Test with thinking explicitly set to null/undefined
115+
const result = getLLMConfig('test-api-key', {
116+
modelOptions: {
117+
model: 'claude-3-7-sonnet',
118+
topK: 10,
119+
topP: 0.9,
120+
thinking: false,
121+
},
122+
});
123+
124+
expect(result.llmConfig).toHaveProperty('topK', 10);
125+
expect(result.llmConfig).toHaveProperty('topP', 0.9);
126+
127+
// Test with thinking explicitly set to false
128+
const result2 = getLLMConfig('test-api-key', {
129+
modelOptions: {
130+
model: 'claude-3-7-sonnet',
131+
topK: 10,
132+
topP: 0.9,
133+
thinking: false,
134+
},
135+
});
136+
137+
expect(result2.llmConfig).toHaveProperty('topK', 10);
138+
expect(result2.llmConfig).toHaveProperty('topP', 0.9);
139+
140+
// Test with decimal notation as well
141+
const result3 = getLLMConfig('test-api-key', {
142+
modelOptions: {
143+
model: 'claude-3.7-sonnet',
144+
topK: 10,
145+
topP: 0.9,
146+
thinking: false,
147+
},
148+
});
149+
150+
expect(result3.llmConfig).toHaveProperty('topK', 10);
151+
expect(result3.llmConfig).toHaveProperty('topP', 0.9);
152+
});
112153
});

api/server/services/Endpoints/openAI/llm.js

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ function getLLMConfig(apiKey, options = {}) {
2929
const {
3030
modelOptions = {},
3131
reverseProxyUrl,
32-
useOpenRouter,
3332
defaultQuery,
3433
headers,
3534
proxy,
@@ -56,9 +55,11 @@ function getLLMConfig(apiKey, options = {}) {
5655
});
5756
}
5857

58+
let useOpenRouter;
5959
/** @type {OpenAIClientOptions['configuration']} */
6060
const configOptions = {};
61-
if (useOpenRouter || (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter))) {
61+
if (reverseProxyUrl && reverseProxyUrl.includes(KnownEndpoints.openrouter)) {
62+
useOpenRouter = true;
6263
llmConfig.include_reasoning = true;
6364
configOptions.baseURL = reverseProxyUrl;
6465
configOptions.defaultHeaders = Object.assign(
@@ -118,6 +119,13 @@ function getLLMConfig(apiKey, options = {}) {
118119
llmConfig.organization = process.env.OPENAI_ORGANIZATION;
119120
}
120121

122+
if (useOpenRouter && llmConfig.reasoning_effort != null) {
123+
llmConfig.reasoning = {
124+
effort: llmConfig.reasoning_effort,
125+
};
126+
delete llmConfig.reasoning_effort;
127+
}
128+
121129
return {
122130
/** @type {OpenAIClientOptions} */
123131
llmConfig,

client/src/components/Chat/Messages/Content/ContentParts.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,9 @@ const ContentParts = memo(
109109
return val;
110110
})
111111
}
112-
label={isSubmitting ? localize('com_ui_thinking') : localize('com_ui_thoughts')}
112+
label={
113+
isSubmitting && isLast ? localize('com_ui_thinking') : localize('com_ui_thoughts')
114+
}
113115
/>
114116
</div>
115117
)}

0 commit comments

Comments
 (0)