1
1
import re
2
2
from collections .abc import Sequence
3
- from typing import Optional , TypeVar
3
+ from typing import (
4
+ TYPE_CHECKING ,
5
+ Literal ,
6
+ Optional ,
7
+ TypedDict ,
8
+ TypeVar ,
9
+ Union ,
10
+ )
4
11
5
- from langchain_core .messages import BaseMessage
12
+ if TYPE_CHECKING :
13
+ from langchain_core .messages import BaseMessage
14
+ from langchain_core .messages .content import (
15
+ ContentBlock ,
16
+ )
6
17
7
18
8
19
def _is_openai_data_block (block : dict ) -> bool :
9
- """Check if the block contains multimodal data in OpenAI Chat Completions format."""
20
+ """Check if the block contains multimodal data in OpenAI Chat Completions format.
21
+
22
+ Supports both data and ID-style blocks (e.g. ``'file_data'`` and ``'file_id'``)
23
+
24
+ If additional keys are present, they are ignored / will not affect outcome as long
25
+ as the required keys are present and valid.
26
+
27
+ """
10
28
if block .get ("type" ) == "image_url" :
11
29
if (
12
30
(set (block .keys ()) <= {"type" , "image_url" , "detail" })
@@ -15,29 +33,43 @@ def _is_openai_data_block(block: dict) -> bool:
15
33
):
16
34
url = image_url .get ("url" )
17
35
if isinstance (url , str ):
36
+ # Required per OpenAI spec
37
+ return True
38
+ # Ignore `'detail'` since it's optional and specific to OpenAI
39
+
40
+ elif block .get ("type" ) == "input_audio" :
41
+ if (audio := block .get ("input_audio" )) and isinstance (audio , dict ):
42
+ audio_data = audio .get ("data" )
43
+ audio_format = audio .get ("format" )
44
+ # Both required per OpenAI spec
45
+ if isinstance (audio_data , str ) and isinstance (audio_format , str ):
18
46
return True
19
47
20
48
elif block .get ("type" ) == "file" :
21
49
if (file := block .get ("file" )) and isinstance (file , dict ):
22
50
file_data = file .get ("file_data" )
23
- if isinstance (file_data , str ):
24
- return True
25
-
26
- elif block .get ("type" ) == "input_audio" :
27
- if (input_audio := block .get ("input_audio" )) and isinstance (input_audio , dict ):
28
- audio_data = input_audio .get ("data" )
29
- audio_format = input_audio .get ("format" )
30
- if isinstance (audio_data , str ) and isinstance (audio_format , str ):
51
+ file_id = file .get ("file_id" )
52
+ # Files can be either base64-encoded or pre-uploaded with an ID
53
+ if isinstance (file_data , str ) or isinstance (file_id , str ):
31
54
return True
32
55
33
56
else :
34
57
return False
35
58
59
+ # Has no `'type'` key
36
60
return False
37
61
38
62
39
- def _parse_data_uri (uri : str ) -> Optional [dict ]:
40
- """Parse a data URI into its components. If parsing fails, return None.
63
+ class ParsedDataUri (TypedDict ):
64
+ source_type : Literal ["base64" ]
65
+ data : str
66
+ mime_type : str
67
+
68
+
69
+ def _parse_data_uri (uri : str ) -> Optional [ParsedDataUri ]:
70
+ """Parse a data URI into its components.
71
+
72
+ If parsing fails, return None. If either MIME type or data is missing, return None.
41
73
42
74
Example:
43
75
@@ -57,90 +89,208 @@ def _parse_data_uri(uri: str) -> Optional[dict]:
57
89
match = re .match (regex , uri )
58
90
if match is None :
59
91
return None
92
+
93
+ mime_type = match .group ("mime_type" )
94
+ data = match .group ("data" )
95
+ if not mime_type or not data :
96
+ return None
97
+
60
98
return {
61
99
"source_type" : "base64" ,
62
- "data" : match . group ( " data" ) ,
63
- "mime_type" : match . group ( " mime_type" ) ,
100
+ "data" : data ,
101
+ "mime_type" : mime_type ,
64
102
}
65
103
66
104
67
- def _convert_openai_format_to_data_block (block : dict ) -> dict :
68
- """Convert OpenAI image content block to standard data content block.
105
+ def _normalize_messages (
106
+ messages : Sequence ["BaseMessage" ],
107
+ ) -> list ["BaseMessage" ]:
108
+ """Normalize message formats to LangChain v1 standard content blocks.
69
109
70
- If parsing fails, pass-through.
110
+ Chat models already implement support for:
111
+ - Images in OpenAI Chat Completions format
112
+ These will be passed through unchanged
113
+ - LangChain v1 standard content blocks
71
114
72
- Args:
73
- block: The OpenAI image content block to convert.
115
+ This function extends support to:
116
+ - `Audio <https://platform.openai.com/docs/api-reference/chat/create>`__ and
117
+ `file <https://platform.openai.com/docs/api-reference/files>`__ data in OpenAI
118
+ Chat Completions format
119
+ - Images are technically supported but we expect chat models to handle them
120
+ directly; this may change in the future
121
+ - LangChain v0 standard content blocks for backward compatibility
74
122
75
- Returns:
76
- The converted standard data content block.
77
- """
78
- if block ["type" ] == "image_url" :
79
- parsed = _parse_data_uri (block ["image_url" ]["url" ])
80
- if parsed is not None :
81
- parsed ["type" ] = "image"
82
- return parsed
83
- return block
84
-
85
- if block ["type" ] == "file" :
86
- parsed = _parse_data_uri (block ["file" ]["file_data" ])
87
- if parsed is not None :
88
- parsed ["type" ] = "file"
89
- if filename := block ["file" ].get ("filename" ):
90
- parsed ["filename" ] = filename
91
- return parsed
92
- return block
93
-
94
- if block ["type" ] == "input_audio" :
95
- data = block ["input_audio" ].get ("data" )
96
- audio_format = block ["input_audio" ].get ("format" )
97
- if data and audio_format :
98
- return {
99
- "type" : "audio" ,
100
- "source_type" : "base64" ,
101
- "data" : data ,
102
- "mime_type" : f"audio/{ audio_format } " ,
123
+ .. versionchanged:: 1.0.0
124
+ In previous versions, this function returned messages in LangChain v0 format.
125
+ Now, it returns messages in LangChain v1 format, which upgraded chat models now
126
+ expect to receive when passing back in message history. For backward
127
+ compatibility, this function will convert v0 message content to v1 format.
128
+
129
+ .. dropdown:: v0 Content Block Schemas
130
+
131
+ ``URLContentBlock``:
132
+
133
+ .. codeblock::
134
+
135
+ {
136
+ mime_type: NotRequired[str]
137
+ type: Literal['image', 'audio', 'file'],
138
+ source_type: Literal['url'],
139
+ url: str,
103
140
}
104
- return block
105
141
106
- return block
142
+ ``Base64ContentBlock``:
143
+
144
+ .. codeblock::
145
+
146
+ {
147
+ mime_type: NotRequired[str]
148
+ type: Literal['image', 'audio', 'file'],
149
+ source_type: Literal['base64'],
150
+ data: str,
151
+ }
107
152
153
+ ``IDContentBlock``:
108
154
109
- def _normalize_messages (messages : Sequence [BaseMessage ]) -> list [BaseMessage ]:
110
- """Extend support for message formats.
155
+ (In practice, this was never used)
156
+
157
+ .. codeblock::
158
+
159
+ {
160
+ type: Literal['image', 'audio', 'file'],
161
+ source_type: Literal['id'],
162
+ id: str,
163
+ }
164
+
165
+ ``PlainTextContentBlock``:
166
+
167
+ .. codeblock::
168
+
169
+ {
170
+ mime_type: NotRequired[str]
171
+ type: Literal['file'],
172
+ source_type: Literal['text'],
173
+ url: str,
174
+ }
175
+
176
+ If a v1 message is passed in, it will be returned as-is, meaning it is safe to
177
+ always pass in v1 messages to this function for assurance.
178
+
179
+ For posterity, here are the OpenAI Chat Completions schemas we expect:
180
+
181
+ Chat Completions image. Can be URL-based or base64-encoded. Supports MIME types
182
+ png, jpeg/jpg, webp, static gif:
183
+ {
184
+ "type": Literal['image_url'],
185
+ "image_url": {
186
+ "url": Union["data:$MIME_TYPE;base64,$BASE64_ENCODED_IMAGE", "$IMAGE_URL"],
187
+ "detail": Literal['low', 'high', 'auto'] = 'auto', # Supported by OpenAI
188
+ }
189
+ }
190
+
191
+ Chat Completions audio:
192
+ {
193
+ "type": Literal['input_audio'],
194
+ "input_audio": {
195
+ "format": Literal['wav', 'mp3'],
196
+ "data": str = "$BASE64_ENCODED_AUDIO",
197
+ },
198
+ }
199
+
200
+ Chat Completions files: either base64 or pre-uploaded file ID
201
+ {
202
+ "type": Literal['file'],
203
+ "file": Union[
204
+ {
205
+ "filename": Optional[str] = "$FILENAME",
206
+ "file_data": str = "$BASE64_ENCODED_FILE",
207
+ },
208
+ {
209
+ "file_id": str = "$FILE_ID", # For pre-uploaded files to OpenAI
210
+ },
211
+ ],
212
+ }
111
213
112
- Chat models implement support for images in OpenAI Chat Completions format, as well
113
- as other multimodal data as standard data blocks. This function extends support to
114
- audio and file data in OpenAI Chat Completions format by converting them to standard
115
- data blocks.
116
214
"""
215
+ from langchain_core .messages .block_translators .langchain import (
216
+ _convert_legacy_v0_content_block_to_v1 ,
217
+ _convert_openai_format_to_data_block ,
218
+ )
219
+
117
220
formatted_messages = []
118
221
for message in messages :
222
+ # We preserve input messages - the caller may reuse them elsewhere and expects
223
+ # them to remain unchanged. We only create a copy if we need to translate.
119
224
formatted_message = message
225
+
120
226
if isinstance (message .content , list ):
121
227
for idx , block in enumerate (message .content ):
228
+ # OpenAI Chat Completions multimodal data blocks to v1 standard
122
229
if (
123
230
isinstance (block , dict )
124
- # Subset to (PDF) files and audio, as most relevant chat models
125
- # support images in OAI format (and some may not yet support the
126
- # standard data block format)
127
- and block .get ("type" ) in {"file" , "input_audio" }
231
+ and block .get ("type" ) in {"input_audio" , "file" }
232
+ # Discriminate between OpenAI/LC format since they share `'type'`
128
233
and _is_openai_data_block (block )
129
234
):
130
- if formatted_message is message :
131
- formatted_message = message .model_copy ()
132
- # Also shallow-copy content
133
- formatted_message .content = list (formatted_message .content )
134
-
135
- formatted_message .content [idx ] = ( # type: ignore[index] # mypy confused by .model_copy
136
- _convert_openai_format_to_data_block (block )
137
- )
235
+ formatted_message = _ensure_message_copy (message , formatted_message )
236
+
237
+ converted_block = _convert_openai_format_to_data_block (block )
238
+ _update_content_block (formatted_message , idx , converted_block )
239
+
240
+ # Convert multimodal LangChain v0 to v1 standard content blocks
241
+ elif (
242
+ isinstance (block , dict )
243
+ and block .get ("type" )
244
+ in {
245
+ "image" ,
246
+ "audio" ,
247
+ "file" ,
248
+ }
249
+ and block .get ("source_type" ) # v1 doesn't have `source_type`
250
+ in {
251
+ "url" ,
252
+ "base64" ,
253
+ "id" ,
254
+ "text" ,
255
+ }
256
+ ):
257
+ formatted_message = _ensure_message_copy (message , formatted_message )
258
+
259
+ converted_block = _convert_legacy_v0_content_block_to_v1 (block )
260
+ _update_content_block (formatted_message , idx , converted_block )
261
+ continue
262
+
263
+ # else, pass through blocks that look like they have v1 format unchanged
264
+
138
265
formatted_messages .append (formatted_message )
139
266
140
267
return formatted_messages
141
268
142
269
143
- T = TypeVar ("T" , bound = BaseMessage )
270
+ T = TypeVar ("T" , bound = "BaseMessage" )
271
+
272
+
273
+ def _ensure_message_copy (message : T , formatted_message : T ) -> T :
274
+ """Create a copy of the message if it hasn't been copied yet."""
275
+ if formatted_message is message :
276
+ formatted_message = message .model_copy ()
277
+ # Shallow-copy content list to allow modifications
278
+ formatted_message .content = list (formatted_message .content )
279
+ return formatted_message
280
+
281
+
282
+ def _update_content_block (
283
+ formatted_message : "BaseMessage" , idx : int , new_block : Union [ContentBlock , dict ]
284
+ ) -> None :
285
+ """Update a content block at the given index, handling type issues."""
286
+ # Type ignore needed because:
287
+ # - `BaseMessage.content` is typed as `Union[str, list[Union[str, dict]]]`
288
+ # - When content is str, indexing fails (index error)
289
+ # - When content is list, the items are `Union[str, dict]` but we're assigning
290
+ # `Union[ContentBlock, dict]` where ContentBlock is richer than dict
291
+ # - This is safe because we only call this when we've verified content is a list and
292
+ # we're doing content block conversions
293
+ formatted_message .content [idx ] = new_block # type: ignore[index, assignment]
144
294
145
295
146
296
def _update_message_content_to_blocks (message : T , output_version : str ) -> T :
0 commit comments