Skip to content

Commit 1dd0eba

Browse files
committed
Re-order args so that pydantic doesn't parse nested dict to a empty extraction result
1 parent 6f1de75 commit 1dd0eba

File tree

3 files changed

+174
-4
lines changed

3 files changed

+174
-4
lines changed

py/llama_cloud_services/beta/agent_data/schema.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ class ExtractedFieldMetadata(BaseModel):
203203

204204

205205
ExtractedFieldMetaDataDict = Dict[
206-
str, Union[ExtractedFieldMetadata, Dict[str, Any], list[Any]]
206+
str, Union[Dict[str, Any], ExtractedFieldMetadata, list[Any]]
207207
]
208208

209209

@@ -223,7 +223,7 @@ def parse_extracted_field_metadata(
223223
def _parse_extracted_field_metadata_recursive(
224224
field_value: Any,
225225
additional_fields: dict[str, Any] = {},
226-
) -> Union[ExtractedFieldMetadata, Dict[str, Any], list[Any]]:
226+
) -> Union[Dict[str, Any], ExtractedFieldMetadata, list[Any]]:
227227
"""
228228
Parse the extracted field metadata into a dictionary of field names to field metadata.
229229
"""

py/unit_tests/beta/agent/test_agent_data_schema.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
from datetime import datetime
2-
from typing import Any, Dict
2+
import json
3+
from pathlib import Path
4+
from typing import Any, Dict, Optional
35

46
import pytest
57
from llama_cloud import ExtractRun, File
68
from llama_cloud.types.agent_data import AgentData
79
from llama_cloud.types.aggregate_group import AggregateGroup
8-
from pydantic import BaseModel, ValidationError
10+
from pydantic import BaseModel, Field, ValidationError
911

1012
from llama_cloud_services.beta.agent_data.schema import (
1113
ExtractedData,
@@ -523,3 +525,55 @@ def test_extracted_data_from_extraction_result_invalid_data():
523525
assert isinstance(invalid_data.field_metadata["name"], ExtractedFieldMetadata)
524526
assert invalid_data.field_metadata["name"].confidence == 0.9
525527
assert invalid_data.overall_confidence == 0.9
528+
529+
530+
class Dimensions(BaseModel):
531+
length: Optional[str] = Field(
532+
None, description="Length in mm (Size, Longest Side, L)"
533+
)
534+
width: Optional[str] = Field(
535+
None, description="Width in mm (Breadth, Side Width, W)"
536+
)
537+
height: Optional[str] = Field(
538+
None, description="Height in mm (Thickness, Vertical Size, H)"
539+
)
540+
diameter: Optional[str] = Field(
541+
None,
542+
description="Diameter in mm (for radial or cylindrical types) (Outer Diameter, dt, OD, D, d<sub>t</sub>)",
543+
)
544+
lead_spacing: Optional[str] = Field(
545+
None, description="Lead spacing in mm (Pin Pitch, Terminal Gap, LS)"
546+
)
547+
548+
549+
class Capacitor(BaseModel):
550+
dimensions: Optional[Dimensions] = None
551+
552+
553+
def test_full_parse_nested_dimensions():
554+
with open(Path(__file__).parent.parent.parent / "data" / "capacitor.json") as f:
555+
data = json.load(f)
556+
result = ExtractedData.from_extraction_result(ExtractRun.parse_obj(data), Capacitor)
557+
assert result.field_metadata == {
558+
"dimensions": {
559+
"diameter": ExtractedFieldMetadata(
560+
reasoning="VERBATIM EXTRACTION",
561+
confidence=1.0,
562+
extraction_confidence=1.0,
563+
),
564+
"lead_spacing": ExtractedFieldMetadata(
565+
reasoning="VERBATIM EXTRACTION",
566+
confidence=0.9999999031936799,
567+
extraction_confidence=0.9999999031936799,
568+
page_number=None,
569+
matching_text=None,
570+
),
571+
"length": ExtractedFieldMetadata(
572+
reasoning="VERBATIM EXTRACTION",
573+
confidence=0.9999968039036192,
574+
extraction_confidence=0.9999968039036192,
575+
page_number=None,
576+
matching_text=None,
577+
),
578+
}
579+
}

py/unit_tests/data/capacitor.json

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
{
2+
"id": "de058dda-6ca7-4eea-a426-da802f84f971",
3+
"created_at": "2025-08-13T15:45:39.286921Z",
4+
"updated_at": "2025-08-13T15:47:04.878069Z",
5+
"extraction_agent_id": "e834e99f-1f35-4748-b82f-03de4bd07ca6",
6+
"data_schema": {
7+
"additionalProperties": false,
8+
"properties": {
9+
"dimensions": {
10+
"anyOf": [
11+
{
12+
"additionalProperties": false,
13+
"properties": {
14+
"length": {
15+
"anyOf": [{ "type": "string" }, { "type": "null" }],
16+
"description": "Length in mm (Size, Longest Side, L)"
17+
},
18+
"width": {
19+
"anyOf": [{ "type": "string" }, { "type": "null" }],
20+
"description": "Width in mm (Breadth, Side Width, W)"
21+
},
22+
"height": {
23+
"anyOf": [{ "type": "string" }, { "type": "null" }],
24+
"description": "Height in mm (Thickness, Vertical Size, H)"
25+
},
26+
"diameter": {
27+
"anyOf": [{ "type": "string" }, { "type": "null" }],
28+
"description": "Diameter in mm (for radial or cylindrical types) (Outer Diameter, OD, D)"
29+
},
30+
"lead_spacing": {
31+
"anyOf": [{ "type": "string" }, { "type": "null" }],
32+
"description": "Lead spacing in mm (Pin Pitch, Terminal Gap, LS)"
33+
}
34+
},
35+
"required": [
36+
"length",
37+
"width",
38+
"height",
39+
"diameter",
40+
"lead_spacing"
41+
],
42+
"type": "object"
43+
},
44+
{ "type": "null" }
45+
]
46+
}
47+
},
48+
"required": ["dimensions"],
49+
"type": "object"
50+
},
51+
"config": {
52+
"priority": null,
53+
"extraction_target": "PER_DOC",
54+
"extraction_mode": "PREMIUM",
55+
"multimodal_fast_mode": false,
56+
"system_prompt": "",
57+
"use_reasoning": true,
58+
"cite_sources": false,
59+
"confidence_scores": true,
60+
"chunk_mode": "PAGE",
61+
"high_resolution_mode": false,
62+
"invalidate_cache": false,
63+
"page_range": null
64+
},
65+
"file": {
66+
"id": "9233cc2b-00e3-4ddd-b426-b8b59357d4cb",
67+
"created_at": "2025-08-12T18:31:54.269440Z",
68+
"updated_at": "2025-08-13T15:45:39.064906Z",
69+
"name": "document (2).pdf.txt",
70+
"external_file_id": "document (2).pdf.txt",
71+
"file_size": 2562,
72+
"file_type": "txt",
73+
"project_id": "77bdc79f-fb69-49ae-a783-fcc573eec7ce",
74+
"last_modified_at": "2025-08-13T15:45:39Z",
75+
"resource_info": {
76+
"file_size": 2562,
77+
"last_modified_at": "2025-08-13T15:45:39"
78+
},
79+
"permission_info": null,
80+
"data_source_id": null
81+
},
82+
"status": "SUCCESS",
83+
"error": null,
84+
"job_id": "5bb8a583-366c-416c-ba55-4f5724fef9a9",
85+
"data": {
86+
"dimensions": {
87+
"length": "82 mm",
88+
"width": null,
89+
"height": null,
90+
"diameter": "35 mm",
91+
"lead_spacing": "6.0 mm"
92+
}
93+
},
94+
"extraction_metadata": {
95+
"field_metadata": {
96+
"dimensions": {
97+
"length": {
98+
"extraction_confidence": 0.9999968039036192,
99+
"confidence": 0.9999968039036192
100+
},
101+
"diameter": { "extraction_confidence": 1.0, "confidence": 1.0 },
102+
"lead_spacing": {
103+
"extraction_confidence": 0.9999999031936799,
104+
"confidence": 0.9999999031936799
105+
},
106+
"reasoning": "VERBATIM EXTRACTION"
107+
}
108+
},
109+
"usage": {
110+
"num_pages_extracted": 2,
111+
"num_document_tokens": 1034,
112+
"num_output_tokens": 3440
113+
}
114+
},
115+
"from_ui": false
116+
}

0 commit comments

Comments
 (0)