Skip to content

Commit b598872

Browse files
feat(UI): change UI to use async endpoints (#131)
Signed-off-by: Tiago Santana <[email protected]> Signed-off-by: Michele Dolfi <[email protected]> Co-authored-by: Michele Dolfi <[email protected]>
1 parent 087417e commit b598872

File tree

1 file changed

+139
-44
lines changed

1 file changed

+139
-44
lines changed

docling_serve/gradio_ui.py

Lines changed: 139 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1+
import base64
12
import importlib
23
import json
34
import logging
45
import ssl
56
import tempfile
7+
import time
68
from pathlib import Path
79

810
import certifi
@@ -149,6 +151,11 @@ def set_outputs_visibility_direct(x, y):
149151
return content, file
150152

151153

154+
def set_task_id_visibility(x):
155+
task_id_row = gr.Row(visible=x)
156+
return task_id_row
157+
158+
152159
def set_outputs_visibility_process(x):
153160
content = gr.Row(visible=not x)
154161
file = gr.Row(visible=x)
@@ -160,6 +167,7 @@ def set_download_button_label(label_text: gr.State):
160167

161168

162169
def clear_outputs():
170+
task_id_rendered = ""
163171
markdown_content = ""
164172
json_content = ""
165173
json_rendered_content = ""
@@ -168,6 +176,7 @@ def clear_outputs():
168176
doctags_content = ""
169177

170178
return (
179+
task_id_rendered,
171180
markdown_content,
172181
markdown_content,
173182
json_content,
@@ -210,6 +219,51 @@ def change_ocr_lang(ocr_engine):
210219
return "english,chinese"
211220

212221

222+
def wait_task_finish(task_id: str, return_as_file: bool):
223+
conversion_sucess = False
224+
task_finished = False
225+
task_status = ""
226+
ssl_ctx = get_ssl_context()
227+
while not task_finished:
228+
try:
229+
response = httpx.get(
230+
f"{get_api_endpoint()}/v1alpha/status/poll/{task_id}?wait=5",
231+
verify=ssl_ctx,
232+
timeout=15,
233+
)
234+
task_status = response.json()["task_status"]
235+
if task_status == "success":
236+
conversion_sucess = True
237+
task_finished = True
238+
239+
if task_status in ("failure", "revoked"):
240+
conversion_sucess = False
241+
task_finished = True
242+
raise RuntimeError(f"Task failed with status {task_status!r}")
243+
time.sleep(5)
244+
except Exception as e:
245+
logger.error(f"Error processing file(s): {e}")
246+
conversion_sucess = False
247+
task_finished = True
248+
raise gr.Error(f"Error processing file(s): {e}", print_exception=False)
249+
250+
if conversion_sucess:
251+
try:
252+
response = httpx.get(
253+
f"{get_api_endpoint()}/v1alpha/result/{task_id}",
254+
timeout=15,
255+
verify=ssl_ctx,
256+
)
257+
output = response_to_output(response, return_as_file)
258+
return output
259+
except Exception as e:
260+
logger.error(f"Error getting task result: {e}")
261+
262+
raise gr.Error(
263+
f"Error getting task result, conversion finished with status: {task_status}"
264+
)
265+
266+
213267
def process_url(
214268
input_sources,
215269
to_formats,
@@ -256,7 +310,7 @@ def process_url(
256310
try:
257311
ssl_ctx = get_ssl_context()
258312
response = httpx.post(
259-
f"{get_api_endpoint()}/v1alpha/convert/source",
313+
f"{get_api_endpoint()}/v1alpha/convert/source/async",
260314
json=parameters,
261315
verify=ssl_ctx,
262316
timeout=60,
@@ -269,12 +323,19 @@ def process_url(
269323
error_message = data.get("detail", "An unknown error occurred.")
270324
logger.error(f"Error processing file: {error_message}")
271325
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
272-
output = response_to_output(response, return_as_file)
273-
return output
326+
327+
task_id_rendered = response.json()["task_id"]
328+
return task_id_rendered
329+
330+
331+
def file_to_base64(file):
332+
with open(file.name, "rb") as f:
333+
encoded_string = base64.b64encode(f.read()).decode("utf-8")
334+
return encoded_string
274335

275336

276337
def process_file(
277-
files,
338+
file,
278339
to_formats,
279340
image_export_mode,
280341
ocr,
@@ -290,12 +351,13 @@ def process_file(
290351
do_picture_classification,
291352
do_picture_description,
292353
):
293-
if not files or len(files) == 0 or files[0] == "":
354+
if not file or file == "":
294355
logger.error("No files provided.")
295356
raise gr.Error("No files provided.", print_exception=False)
296-
files_data = [("files", (file.name, open(file.name, "rb"))) for file in files]
357+
files_data = [{"base64_string": file_to_base64(file), "filename": file.name}]
297358

298359
parameters = {
360+
"file_sources": files_data,
299361
"to_formats": to_formats,
300362
"image_export_mode": image_export_mode,
301363
"ocr": str(ocr).lower(),
@@ -315,9 +377,8 @@ def process_file(
315377
try:
316378
ssl_ctx = get_ssl_context()
317379
response = httpx.post(
318-
f"{get_api_endpoint()}/v1alpha/convert/file",
319-
files=files_data,
320-
data=parameters,
380+
f"{get_api_endpoint()}/v1alpha/convert/source/async",
381+
json=parameters,
321382
verify=ssl_ctx,
322383
timeout=60,
323384
)
@@ -329,8 +390,9 @@ def process_file(
329390
error_message = data.get("detail", "An unknown error occurred.")
330391
logger.error(f"Error processing file: {error_message}")
331392
raise gr.Error(f"Error processing file: {error_message}", print_exception=False)
332-
output = response_to_output(response, return_as_file)
333-
return output
393+
394+
task_id_rendered = response.json()["task_id"]
395+
return task_id_rendered
334396

335397

336398
def response_to_output(response, return_as_file):
@@ -444,24 +506,24 @@ def response_to_output(response, return_as_file):
444506
)
445507

446508
# URL Processing Tab
447-
with gr.Tab("Convert URL(s)"):
509+
with gr.Tab("Convert URL"):
448510
with gr.Row():
449511
with gr.Column(scale=4):
450512
url_input = gr.Textbox(
451-
label="Input Sources (comma-separated URLs)",
513+
label="URL Input Source",
452514
placeholder="https://arxiv.org/pdf/2206.01062",
453515
)
454516
with gr.Column(scale=1):
455-
url_process_btn = gr.Button("Process URL(s)", scale=1)
517+
url_process_btn = gr.Button("Process URL", scale=1)
456518
url_reset_btn = gr.Button("Reset", scale=1)
457519

458520
# File Processing Tab
459-
with gr.Tab("Convert File(s)"):
521+
with gr.Tab("Convert File"):
460522
with gr.Row():
461523
with gr.Column(scale=4):
462524
file_input = gr.File(
463525
elem_id="file_input_zone",
464-
label="Upload Files",
526+
label="Upload File",
465527
file_types=[
466528
".pdf",
467529
".docx",
@@ -476,11 +538,11 @@ def response_to_output(response, return_as_file):
476538
".png",
477539
".gif",
478540
],
479-
file_count="multiple",
541+
file_count="single",
480542
scale=4,
481543
)
482544
with gr.Column(scale=1):
483-
file_process_btn = gr.Button("Process File(s)", scale=1)
545+
file_process_btn = gr.Button("Process File", scale=1)
484546
file_reset_btn = gr.Button("Reset", scale=1)
485547

486548
# Options
@@ -540,7 +602,9 @@ def response_to_output(response, return_as_file):
540602
)
541603
with gr.Column(scale=1):
542604
abort_on_error = gr.Checkbox(label="Abort on Error", value=False)
543-
return_as_file = gr.Checkbox(label="Return as File", value=False)
605+
return_as_file = gr.Checkbox(
606+
label="Return as File", visible=False, value=False
607+
) # Disable until async handle output as file
544608
with gr.Row():
545609
with gr.Column():
546610
do_code_enrichment = gr.Checkbox(
@@ -557,6 +621,10 @@ def response_to_output(response, return_as_file):
557621
label="Enable picture description", value=False
558622
)
559623

624+
# Task id output
625+
with gr.Row(visible=False) as task_id_output:
626+
task_id_rendered = gr.Textbox(label="Task id", interactive=False)
627+
560628
# Document output
561629
with gr.Row(visible=False) as content_output:
562630
with gr.Tab("Markdown"):
@@ -586,36 +654,34 @@ def response_to_output(response, return_as_file):
586654
# UI Actions #
587655
##############
588656

657+
# Disable until async handle output as file
589658
# Handle Return as File
590-
url_input.change(
591-
auto_set_return_as_file,
592-
inputs=[url_input, file_input, image_export_mode],
593-
outputs=[return_as_file],
594-
)
595-
file_input.change(
596-
auto_set_return_as_file,
597-
inputs=[url_input, file_input, image_export_mode],
598-
outputs=[return_as_file],
599-
)
600-
image_export_mode.change(
601-
auto_set_return_as_file,
602-
inputs=[url_input, file_input, image_export_mode],
603-
outputs=[return_as_file],
604-
)
659+
# url_input.change(
660+
# auto_set_return_as_file,
661+
# inputs=[url_input, file_input, image_export_mode],
662+
# outputs=[return_as_file],
663+
# )
664+
# file_input.change(
665+
# auto_set_return_as_file,
666+
# inputs=[url_input, file_input, image_export_mode],
667+
# outputs=[return_as_file],
668+
# )
669+
# image_export_mode.change(
670+
# auto_set_return_as_file,
671+
# inputs=[url_input, file_input, image_export_mode],
672+
# outputs=[return_as_file],
673+
# )
605674

606675
# URL processing
607676
url_process_btn.click(
608677
set_options_visibility, inputs=[false_bool], outputs=[options]
609678
).then(
610679
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
611-
).then(
612-
set_outputs_visibility_process,
613-
inputs=[return_as_file],
614-
outputs=[content_output, file_output],
615680
).then(
616681
clear_outputs,
617682
inputs=None,
618683
outputs=[
684+
task_id_rendered,
619685
output_markdown,
620686
output_markdown_rendered,
621687
output_json,
@@ -625,6 +691,10 @@ def response_to_output(response, return_as_file):
625691
output_text,
626692
output_doctags,
627693
],
694+
).then(
695+
set_task_id_visibility,
696+
inputs=[true_bool],
697+
outputs=[task_id_output],
628698
).then(
629699
process_url,
630700
inputs=[
@@ -644,6 +714,16 @@ def response_to_output(response, return_as_file):
644714
do_picture_classification,
645715
do_picture_description,
646716
],
717+
outputs=[
718+
task_id_rendered,
719+
],
720+
).then(
721+
set_outputs_visibility_process,
722+
inputs=[return_as_file],
723+
outputs=[content_output, file_output],
724+
).then(
725+
wait_task_finish,
726+
inputs=[task_id_rendered, return_as_file],
647727
outputs=[
648728
output_markdown,
649729
output_markdown_rendered,
@@ -674,21 +754,20 @@ def response_to_output(response, return_as_file):
674754
set_outputs_visibility_direct,
675755
inputs=[false_bool, false_bool],
676756
outputs=[content_output, file_output],
677-
).then(clear_url_input, inputs=None, outputs=[url_input])
757+
).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
758+
clear_url_input, inputs=None, outputs=[url_input]
759+
)
678760

679761
# File processing
680762
file_process_btn.click(
681763
set_options_visibility, inputs=[false_bool], outputs=[options]
682764
).then(
683765
set_download_button_label, inputs=[processing_text], outputs=[download_file_btn]
684-
).then(
685-
set_outputs_visibility_process,
686-
inputs=[return_as_file],
687-
outputs=[content_output, file_output],
688766
).then(
689767
clear_outputs,
690768
inputs=None,
691769
outputs=[
770+
task_id_rendered,
692771
output_markdown,
693772
output_markdown_rendered,
694773
output_json,
@@ -698,6 +777,10 @@ def response_to_output(response, return_as_file):
698777
output_text,
699778
output_doctags,
700779
],
780+
).then(
781+
set_task_id_visibility,
782+
inputs=[true_bool],
783+
outputs=[task_id_output],
701784
).then(
702785
process_file,
703786
inputs=[
@@ -717,6 +800,16 @@ def response_to_output(response, return_as_file):
717800
do_picture_classification,
718801
do_picture_description,
719802
],
803+
outputs=[
804+
task_id_rendered,
805+
],
806+
).then(
807+
set_outputs_visibility_process,
808+
inputs=[return_as_file],
809+
outputs=[content_output, file_output],
810+
).then(
811+
wait_task_finish,
812+
inputs=[task_id_rendered, return_as_file],
720813
outputs=[
721814
output_markdown,
722815
output_markdown_rendered,
@@ -747,4 +840,6 @@ def response_to_output(response, return_as_file):
747840
set_outputs_visibility_direct,
748841
inputs=[false_bool, false_bool],
749842
outputs=[content_output, file_output],
750-
).then(clear_file_input, inputs=None, outputs=[file_input])
843+
).then(set_task_id_visibility, inputs=[false_bool], outputs=[task_id_output]).then(
844+
clear_file_input, inputs=None, outputs=[file_input]
845+
)

0 commit comments

Comments
 (0)