Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion libs/langchain-openai/src/chat_models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ import {
ResponsesTool,
ResponsesToolChoice,
} from "./utils/tools.js";
import { handleMultiModalOutput } from "./utils/output.js";

const _FUNCTION_CALL_IDS_MAP_KEY = "__openai_function_call_ids__";

Expand Down Expand Up @@ -2721,8 +2722,13 @@ export class ChatOpenAICompletions<
additional_kwargs.audio = message.audio;
}

const content = handleMultiModalOutput(
message.content || "",
rawResponse.choices?.[0]?.message
);

return new AIMessage({
content: message.content || "",
content,
tool_calls: toolCalls,
invalid_tool_calls: invalidToolCalls,
additional_kwargs,
Expand Down
143 changes: 143 additions & 0 deletions libs/langchain-openai/src/tests/chat_models.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,149 @@ describe("ChatOpenAI", () => {
]);
});

describe("OpenRouter image response handling", () => {
it("Should correctly parse OpenRouter-style image responses", () => {
// Create a minimal ChatOpenAI instance to test the method
const model = new ChatOpenAI({
model: "test-model",
apiKey: "test-key",
});

// Access the completions object to test the method
const { completions } = model as any;

// Mock message with images from OpenRouter
const mockMessage = {
role: "assistant" as const,
content: "Here is your image of a cute cat:",
};

const mockRawResponse = {
id: "chatcmpl-12345",
object: "chat.completion",
created: 1234567890,
model: "google/gemini-2.5-flash-image-preview",
choices: [
{
index: 0,
message: {
...mockMessage,
// OpenRouter includes images in a separate array
images: [
{
type: "image_url",
image_url: {
url: "",
},
},
],
},
finish_reason: "stop",
},
],
usage: {
prompt_tokens: 10,
completion_tokens: 20,
total_tokens: 30,
},
};

// Test the _convertCompletionsMessageToBaseMessage method
const result = completions._convertCompletionsMessageToBaseMessage(
mockMessage,
mockRawResponse
);

// Verify the result is an AIMessage with structured content
expect(result.constructor.name).toBe("AIMessage");
expect(result.content).toEqual([
{
source_type: "text",
type: "text",
text: "Here is your image of a cute cat:",
},
{
source_type: "url",
type: "image",
url: "",
},
]);
});

it("Should handle OpenRouter responses with multiple images", () => {
const model = new ChatOpenAI({
model: "test-model",
apiKey: "test-key",
});

const { completions } = model as any;

const mockMessage = {
role: "assistant" as const,
content: "Here are multiple images:",
};

const mockRawResponse = {
id: "chatcmpl-12345",
object: "chat.completion",
created: 1234567890,
model: "google/gemini-2.5-flash-image-preview",
choices: [
{
index: 0,
message: {
...mockMessage,
images: [
{
type: "image_url",
image_url: {
url: "",
},
},
{
type: "image_url",
image_url: {
url: "",
},
},
],
},
finish_reason: "stop",
},
],
usage: {
prompt_tokens: 10,
completion_tokens: 20,
total_tokens: 30,
},
};

const result = completions._convertCompletionsMessageToBaseMessage(
mockMessage,
mockRawResponse
);

// Verify the response contains structured content with multiple image_urls
expect(result.content).toEqual([
{
source_type: "text",
type: "text",
text: "Here are multiple images:",
},
{
source_type: "url",
type: "image",
url: "",
},
{
source_type: "url",
type: "image",
url: "",
},
]);
});
});

test("can be constructed with reasoningEffort", async () => {
const model = new ChatOpenAI({
model: "gpt-4o-2024-08-06",
Expand Down
41 changes: 41 additions & 0 deletions libs/langchain-openai/src/utils/output.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import {
StandardImageBlock,
StandardTextBlock,
} from "@langchain/core/messages";

/**
* Handle multi modal response content.
*
* @param content The content of the message.
* @param messages The messages of the response.
* @returns The new content of the message.
*/
export function handleMultiModalOutput(
content: string,
messages: unknown
): (StandardImageBlock | StandardTextBlock)[] | string {
/**
* Handle OpenRouter image responses
* @see https://openrouter.ai/docs/features/multimodal/image-generation#api-usage
*/
if (
messages &&
typeof messages === "object" &&
"images" in messages &&
Array.isArray(messages.images)
) {
const images = messages.images
.filter((image) => typeof image?.image_url?.url === "string")
.map(
(image) =>
({
type: "image",
url: image.image_url.url as string,
source_type: "url",
} as const)
);
return [{ type: "text", text: content, source_type: "text" }, ...images];
}

return content;
}
Loading