Skip to content

Commit 5bcf7d0

Browse files
mdrxyccurme
andauthored
refactor(core): data block handling, normalize message formats, strip IDs from messages (#32572)
> [!WARNING] > **BREAKING:** Simplifies message normalization to single consistent path, requiring partner package updates **Key Changes:** - Consistent multimodal handling: - OpenAI `image_url` blocks pass through unchanged (broad compatibility) - OpenAI `input_audio` and `file` blocks convert to v1 standard equivalents - Legacy v0 multimodal blocks convert to v1 standard - Everything else passes through unchanged - Partner packages must update content block parsing logic **Partner Updates** `output_version` affects how messages are serialized into `.content`. `_normalize_messages()` will now upgrade v0 content to v1, so, all partners now receive v1 format input regardless of `output_version`. Migration: - Partner packages must update to handle v1 input content blocks - `output_version` still controls serialization format of responses (unchanged) --------- Co-authored-by: Chester Curme <[email protected]>
1 parent 3c8edbe commit 5bcf7d0

File tree

8 files changed

+1005
-269
lines changed

8 files changed

+1005
-269
lines changed

libs/core/langchain_core/language_models/_utils.py

Lines changed: 219 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,30 @@
11
import re
22
from collections.abc import Sequence
3-
from typing import Optional, TypeVar
3+
from typing import (
4+
TYPE_CHECKING,
5+
Literal,
6+
Optional,
7+
TypedDict,
8+
TypeVar,
9+
Union,
10+
)
411

5-
from langchain_core.messages import BaseMessage
12+
if TYPE_CHECKING:
13+
from langchain_core.messages import BaseMessage
14+
from langchain_core.messages.content import (
15+
ContentBlock,
16+
)
617

718

819
def _is_openai_data_block(block: dict) -> bool:
9-
"""Check if the block contains multimodal data in OpenAI Chat Completions format."""
20+
"""Check if the block contains multimodal data in OpenAI Chat Completions format.
21+
22+
Supports both data and ID-style blocks (e.g. ``'file_data'`` and ``'file_id'``)
23+
24+
If additional keys are present, they are ignored / will not affect outcome as long
25+
as the required keys are present and valid.
26+
27+
"""
1028
if block.get("type") == "image_url":
1129
if (
1230
(set(block.keys()) <= {"type", "image_url", "detail"})
@@ -15,29 +33,43 @@ def _is_openai_data_block(block: dict) -> bool:
1533
):
1634
url = image_url.get("url")
1735
if isinstance(url, str):
36+
# Required per OpenAI spec
37+
return True
38+
# Ignore `'detail'` since it's optional and specific to OpenAI
39+
40+
elif block.get("type") == "input_audio":
41+
if (audio := block.get("input_audio")) and isinstance(audio, dict):
42+
audio_data = audio.get("data")
43+
audio_format = audio.get("format")
44+
# Both required per OpenAI spec
45+
if isinstance(audio_data, str) and isinstance(audio_format, str):
1846
return True
1947

2048
elif block.get("type") == "file":
2149
if (file := block.get("file")) and isinstance(file, dict):
2250
file_data = file.get("file_data")
23-
if isinstance(file_data, str):
24-
return True
25-
26-
elif block.get("type") == "input_audio":
27-
if (input_audio := block.get("input_audio")) and isinstance(input_audio, dict):
28-
audio_data = input_audio.get("data")
29-
audio_format = input_audio.get("format")
30-
if isinstance(audio_data, str) and isinstance(audio_format, str):
51+
file_id = file.get("file_id")
52+
# Files can be either base64-encoded or pre-uploaded with an ID
53+
if isinstance(file_data, str) or isinstance(file_id, str):
3154
return True
3255

3356
else:
3457
return False
3558

59+
# Has no `'type'` key
3660
return False
3761

3862

39-
def _parse_data_uri(uri: str) -> Optional[dict]:
40-
"""Parse a data URI into its components. If parsing fails, return None.
63+
class ParsedDataUri(TypedDict):
64+
source_type: Literal["base64"]
65+
data: str
66+
mime_type: str
67+
68+
69+
def _parse_data_uri(uri: str) -> Optional[ParsedDataUri]:
70+
"""Parse a data URI into its components.
71+
72+
If parsing fails, return None. If either MIME type or data is missing, return None.
4173
4274
Example:
4375
@@ -57,90 +89,208 @@ def _parse_data_uri(uri: str) -> Optional[dict]:
5789
match = re.match(regex, uri)
5890
if match is None:
5991
return None
92+
93+
mime_type = match.group("mime_type")
94+
data = match.group("data")
95+
if not mime_type or not data:
96+
return None
97+
6098
return {
6199
"source_type": "base64",
62-
"data": match.group("data"),
63-
"mime_type": match.group("mime_type"),
100+
"data": data,
101+
"mime_type": mime_type,
64102
}
65103

66104

67-
def _convert_openai_format_to_data_block(block: dict) -> dict:
68-
"""Convert OpenAI image content block to standard data content block.
105+
def _normalize_messages(
106+
messages: Sequence["BaseMessage"],
107+
) -> list["BaseMessage"]:
108+
"""Normalize message formats to LangChain v1 standard content blocks.
69109
70-
If parsing fails, pass-through.
110+
Chat models already implement support for:
111+
- Images in OpenAI Chat Completions format
112+
These will be passed through unchanged
113+
- LangChain v1 standard content blocks
71114
72-
Args:
73-
block: The OpenAI image content block to convert.
115+
This function extends support to:
116+
- `Audio <https://platform.openai.com/docs/api-reference/chat/create>`__ and
117+
`file <https://platform.openai.com/docs/api-reference/files>`__ data in OpenAI
118+
Chat Completions format
119+
- Images are technically supported but we expect chat models to handle them
120+
directly; this may change in the future
121+
- LangChain v0 standard content blocks for backward compatibility
74122
75-
Returns:
76-
The converted standard data content block.
77-
"""
78-
if block["type"] == "image_url":
79-
parsed = _parse_data_uri(block["image_url"]["url"])
80-
if parsed is not None:
81-
parsed["type"] = "image"
82-
return parsed
83-
return block
84-
85-
if block["type"] == "file":
86-
parsed = _parse_data_uri(block["file"]["file_data"])
87-
if parsed is not None:
88-
parsed["type"] = "file"
89-
if filename := block["file"].get("filename"):
90-
parsed["filename"] = filename
91-
return parsed
92-
return block
93-
94-
if block["type"] == "input_audio":
95-
data = block["input_audio"].get("data")
96-
audio_format = block["input_audio"].get("format")
97-
if data and audio_format:
98-
return {
99-
"type": "audio",
100-
"source_type": "base64",
101-
"data": data,
102-
"mime_type": f"audio/{audio_format}",
123+
.. versionchanged:: 1.0.0
124+
In previous versions, this function returned messages in LangChain v0 format.
125+
Now, it returns messages in LangChain v1 format, which upgraded chat models now
126+
expect to receive when passing back in message history. For backward
127+
compatibility, this function will convert v0 message content to v1 format.
128+
129+
.. dropdown:: v0 Content Block Schemas
130+
131+
``URLContentBlock``:
132+
133+
.. codeblock::
134+
135+
{
136+
mime_type: NotRequired[str]
137+
type: Literal['image', 'audio', 'file'],
138+
source_type: Literal['url'],
139+
url: str,
103140
}
104-
return block
105141
106-
return block
142+
``Base64ContentBlock``:
143+
144+
.. codeblock::
145+
146+
{
147+
mime_type: NotRequired[str]
148+
type: Literal['image', 'audio', 'file'],
149+
source_type: Literal['base64'],
150+
data: str,
151+
}
107152
153+
``IDContentBlock``:
108154
109-
def _normalize_messages(messages: Sequence[BaseMessage]) -> list[BaseMessage]:
110-
"""Extend support for message formats.
155+
(In practice, this was never used)
156+
157+
.. codeblock::
158+
159+
{
160+
type: Literal['image', 'audio', 'file'],
161+
source_type: Literal['id'],
162+
id: str,
163+
}
164+
165+
``PlainTextContentBlock``:
166+
167+
.. codeblock::
168+
169+
{
170+
mime_type: NotRequired[str]
171+
type: Literal['file'],
172+
source_type: Literal['text'],
173+
url: str,
174+
}
175+
176+
If a v1 message is passed in, it will be returned as-is, meaning it is safe to
177+
always pass in v1 messages to this function for assurance.
178+
179+
For posterity, here are the OpenAI Chat Completions schemas we expect:
180+
181+
Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types
182+
png, jpeg/jpg, webp, static gif:
183+
{
184+
"type": Literal['image_url'],
185+
"image_url": {
186+
"url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"],
187+
"detail": Literal['low', 'high', 'auto'] = 'auto', # Supported by OpenAI
188+
}
189+
}
190+
191+
Chat Completions audio:
192+
{
193+
"type": Literal['input_audio'],
194+
"input_audio": {
195+
"format": Literal['wav', 'mp3'],
196+
"data": str = "$BASE64_ENCODED_AUDIO",
197+
},
198+
}
199+
200+
Chat Completions files: either base64 or pre-uploaded file ID
201+
{
202+
"type": Literal['file'],
203+
"file": Union[
204+
{
205+
"filename": Optional[str] = "$FILENAME",
206+
"file_data": str = "$BASE64_ENCODED_FILE",
207+
},
208+
{
209+
"file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI
210+
},
211+
],
212+
}
111213
112-
Chat models implement support for images in OpenAI Chat Completions format, as well
113-
as other multimodal data as standard data blocks. This function extends support to
114-
audio and file data in OpenAI Chat Completions format by converting them to standard
115-
data blocks.
116214
"""
215+
from langchain_core.messages.block_translators.langchain import (
216+
_convert_legacy_v0_content_block_to_v1,
217+
_convert_openai_format_to_data_block,
218+
)
219+
117220
formatted_messages = []
118221
for message in messages:
222+
# We preserve input messages - the caller may reuse them elsewhere and expects
223+
# them to remain unchanged. We only create a copy if we need to translate.
119224
formatted_message = message
225+
120226
if isinstance(message.content, list):
121227
for idx, block in enumerate(message.content):
228+
# OpenAI Chat Completions multimodal data blocks to v1 standard
122229
if (
123230
isinstance(block, dict)
124-
# Subset to (PDF) files and audio, as most relevant chat models
125-
# support images in OAI format (and some may not yet support the
126-
# standard data block format)
127-
and block.get("type") in {"file", "input_audio"}
231+
and block.get("type") in {"input_audio", "file"}
232+
# Discriminate between OpenAI/LC format since they share `'type'`
128233
and _is_openai_data_block(block)
129234
):
130-
if formatted_message is message:
131-
formatted_message = message.model_copy()
132-
# Also shallow-copy content
133-
formatted_message.content = list(formatted_message.content)
134-
135-
formatted_message.content[idx] = ( # type: ignore[index] # mypy confused by .model_copy
136-
_convert_openai_format_to_data_block(block)
137-
)
235+
formatted_message = _ensure_message_copy(message, formatted_message)
236+
237+
converted_block = _convert_openai_format_to_data_block(block)
238+
_update_content_block(formatted_message, idx, converted_block)
239+
240+
# Convert multimodal LangChain v0 to v1 standard content blocks
241+
elif (
242+
isinstance(block, dict)
243+
and block.get("type")
244+
in {
245+
"image",
246+
"audio",
247+
"file",
248+
}
249+
and block.get("source_type") # v1 doesn't have `source_type`
250+
in {
251+
"url",
252+
"base64",
253+
"id",
254+
"text",
255+
}
256+
):
257+
formatted_message = _ensure_message_copy(message, formatted_message)
258+
259+
converted_block = _convert_legacy_v0_content_block_to_v1(block)
260+
_update_content_block(formatted_message, idx, converted_block)
261+
continue
262+
263+
# else, pass through blocks that look like they have v1 format unchanged
264+
138265
formatted_messages.append(formatted_message)
139266

140267
return formatted_messages
141268

142269

143-
T = TypeVar("T", bound=BaseMessage)
270+
T = TypeVar("T", bound="BaseMessage")
271+
272+
273+
def _ensure_message_copy(message: T, formatted_message: T) -> T:
274+
"""Create a copy of the message if it hasn't been copied yet."""
275+
if formatted_message is message:
276+
formatted_message = message.model_copy()
277+
# Shallow-copy content list to allow modifications
278+
formatted_message.content = list(formatted_message.content)
279+
return formatted_message
280+
281+
282+
def _update_content_block(
283+
formatted_message: "BaseMessage", idx: int, new_block: Union[ContentBlock, dict]
284+
) -> None:
285+
"""Update a content block at the given index, handling type issues."""
286+
# Type ignore needed because:
287+
# - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]`
288+
# - When content is str, indexing fails (index error)
289+
# - When content is list, the items are `Union[str, dict]` but we're assigning
290+
# `Union[ContentBlock, dict]` where ContentBlock is richer than dict
291+
# - This is safe because we only call this when we've verified content is a list and
292+
# we're doing content block conversions
293+
formatted_message.content[idx] = new_block # type: ignore[index, assignment]
144294

145295

146296
def _update_message_content_to_blocks(message: T, output_version: str) -> T:

0 commit comments

Comments
 (0)