Skip to content

Commit 1333f71

Browse files
authored
fix: referenced paths relative to zip root (#289)
Signed-off-by: Michele Dolfi <[email protected]>
1 parent ec594d8 commit 1333f71

File tree

4 files changed

+52
-7
lines changed

4 files changed

+52
-7
lines changed

docling_serve/response_preparation.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ def _export_documents_as_files(
9191
# Default failure in case results is empty
9292
conv_result = ConversionStatus.FAILURE
9393

94+
artifacts_dir = Path("artifacts/") # will be relative to the fname
95+
9496
for conv_res in conv_results:
9597
conv_result = conv_res.status
9698
if conv_res.status == ConversionStatus.SUCCESS:
@@ -102,15 +104,19 @@ def _export_documents_as_files(
102104
fname = output_dir / f"{doc_filename}.json"
103105
_log.info(f"writing JSON output to {fname}")
104106
conv_res.document.save_as_json(
105-
filename=fname, image_mode=image_export_mode
107+
filename=fname,
108+
image_mode=image_export_mode,
109+
artifacts_dir=artifacts_dir,
106110
)
107111

108112
# Export HTML format:
109113
if export_html:
110114
fname = output_dir / f"{doc_filename}.html"
111115
_log.info(f"writing HTML output to {fname}")
112116
conv_res.document.save_as_html(
113-
filename=fname, image_mode=image_export_mode
117+
filename=fname,
118+
image_mode=image_export_mode,
119+
artifacts_dir=artifacts_dir,
114120
)
115121

116122
# Export Text format:
@@ -129,6 +135,7 @@ def _export_documents_as_files(
129135
_log.info(f"writing Markdown output to {fname}")
130136
conv_res.document.save_as_markdown(
131137
filename=fname,
138+
artifacts_dir=artifacts_dir,
132139
image_mode=image_export_mode,
133140
page_break_placeholder=md_page_break_placeholder or None,
134141
)

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ classifiers = [
3535
requires-python = ">=3.10"
3636
dependencies = [
3737
"docling~=2.38",
38-
"docling-core>=2.32.0",
38+
"docling-core>=2.44.1",
3939
"docling-jobkit[kfp,vlm]~=1.2",
4040
"fastapi[standard]~=0.115",
4141
"httpx~=0.28",

tests/test_fastapi_endpoints.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
import asyncio
2+
import io
23
import json
34
import os
5+
import zipfile
46

57
import pytest
68
import pytest_asyncio
79
from asgi_lifespan import LifespanManager
810
from httpx import ASGITransport, AsyncClient
911
from pytest_check import check
1012

13+
from docling_core.types.doc import DoclingDocument, PictureItem
14+
1115
from docling_serve.app import create_app
1216

1317

@@ -153,3 +157,37 @@ def safe_slice(value, length=100):
153157
data["document"]["doctags_content"],
154158
msg=f"DocTags document should contain '<doctag><page_header>'. Received: {safe_slice(data['document']['doctags_content'])}",
155159
)
160+
161+
162+
@pytest.mark.asyncio
163+
async def test_referenced_artifacts(client: AsyncClient):
164+
"""Test that paths in the zip file are relative to the zip file root."""
165+
166+
endpoint = "/v1/convert/file"
167+
options = {
168+
"to_formats": ["json"],
169+
"image_export_mode": "referenced",
170+
"target_type": "zip",
171+
"ocr": False,
172+
}
173+
174+
current_dir = os.path.dirname(__file__)
175+
file_path = os.path.join(current_dir, "2206.01062v1.pdf")
176+
177+
files = {
178+
"files": ("2206.01062v1.pdf", open(file_path, "rb"), "application/pdf"),
179+
}
180+
181+
response = await client.post(endpoint, files=files, data=options)
182+
assert response.status_code == 200, "Response should be 200 OK"
183+
184+
with zipfile.ZipFile(io.BytesIO(response.content)) as zip_file:
185+
namelist = zip_file.namelist()
186+
for file in namelist:
187+
if file.endswith(".json"):
188+
doc = DoclingDocument.model_validate(json.loads(zip_file.read(file)))
189+
for item, _level in doc.iterate_items():
190+
if isinstance(item, PictureItem):
191+
assert item.image is not None
192+
print(f"{item.image.uri}=")
193+
assert str(item.image.uri) in namelist

uv.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)