28
28
from scalar_fastapi import get_scalar_api_reference
29
29
30
30
from docling .datamodel .base_models import DocumentStream
31
-
32
- from docling_serve .datamodel .callback import (
31
+ from docling_jobkit .datamodel .callback import (
33
32
ProgressCallbackRequest ,
34
33
ProgressCallbackResponse ,
35
34
)
36
- from docling_serve .datamodel .convert import ConvertDocumentsOptions
35
+ from docling_jobkit .datamodel .task import Task , TaskSource
36
+ from docling_jobkit .orchestrators .base_orchestrator import (
37
+ BaseOrchestrator ,
38
+ ProgressInvalid ,
39
+ TaskNotFoundError ,
40
+ )
41
+
42
+ from docling_serve .datamodel .convert import ConvertDocumentsRequestOptions
37
43
from docling_serve .datamodel .requests import (
38
44
ConvertDocumentFileSourcesRequest ,
39
45
ConvertDocumentHttpSourcesRequest ,
47
53
TaskStatusResponse ,
48
54
WebsocketMessage ,
49
55
)
50
- from docling_serve .datamodel .task import Task , TaskSource
51
- from docling_serve .docling_conversion import _get_converter_from_hash
52
- from docling_serve .engines .async_orchestrator import (
53
- BaseAsyncOrchestrator ,
54
- ProgressInvalid ,
55
- )
56
- from docling_serve .engines .async_orchestrator_factory import get_async_orchestrator
57
- from docling_serve .engines .base_orchestrator import TaskNotFoundError
58
56
from docling_serve .helper_functions import FormDepends
57
+ from docling_serve .orchestrator_factory import get_async_orchestrator
58
+ from docling_serve .response_preparation import prepare_response
59
59
from docling_serve .settings import docling_serve_settings
60
60
from docling_serve .storage import get_scratch
61
+ from docling_serve .websocker_notifier import WebsocketNotifier
61
62
62
63
63
64
# Set up custom logging as we'll be intermixes with FastAPI/Uvicorn's logging
@@ -95,9 +96,12 @@ def format(self, record):
95
96
# Context manager to initialize and clean up the lifespan of the FastAPI app
96
97
@asynccontextmanager
97
98
async def lifespan (app : FastAPI ):
98
- orchestrator = get_async_orchestrator ()
99
99
scratch_dir = get_scratch ()
100
100
101
+ orchestrator = get_async_orchestrator ()
102
+ notifier = WebsocketNotifier (orchestrator )
103
+ orchestrator .bind_notifier (notifier )
104
+
101
105
# Warm up processing cache
102
106
if docling_serve_settings .load_models_at_boot :
103
107
await orchestrator .warm_up_caches ()
@@ -230,7 +234,7 @@ async def scalar_html():
230
234
########################
231
235
232
236
async def _enque_source (
233
- orchestrator : BaseAsyncOrchestrator , conversion_request : ConvertDocumentsRequest
237
+ orchestrator : BaseOrchestrator , conversion_request : ConvertDocumentsRequest
234
238
) -> Task :
235
239
sources : list [TaskSource ] = []
236
240
if isinstance (conversion_request , ConvertDocumentFileSourcesRequest ):
@@ -244,9 +248,9 @@ async def _enque_source(
244
248
return task
245
249
246
250
async def _enque_file (
247
- orchestrator : BaseAsyncOrchestrator ,
251
+ orchestrator : BaseOrchestrator ,
248
252
files : list [UploadFile ],
249
- options : ConvertDocumentsOptions ,
253
+ options : ConvertDocumentsRequestOptions ,
250
254
) -> Task :
251
255
_log .info (f"Received { len (files )} files for processing." )
252
256
@@ -261,9 +265,7 @@ async def _enque_file(
261
265
task = await orchestrator .enqueue (sources = file_sources , options = options )
262
266
return task
263
267
264
- async def _wait_task_complete (
265
- orchestrator : BaseAsyncOrchestrator , task_id : str
266
- ) -> bool :
268
+ async def _wait_task_complete (orchestrator : BaseOrchestrator , task_id : str ) -> bool :
267
269
start_time = time .monotonic ()
268
270
while True :
269
271
task = await orchestrator .task_status (task_id = task_id )
@@ -309,32 +311,28 @@ def api_check() -> HealthCheckResponse:
309
311
)
310
312
async def process_url (
311
313
background_tasks : BackgroundTasks ,
312
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
314
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
313
315
conversion_request : ConvertDocumentsRequest ,
314
316
):
315
317
task = await _enque_source (
316
318
orchestrator = orchestrator , conversion_request = conversion_request
317
319
)
318
- success = await _wait_task_complete (
320
+ completed = await _wait_task_complete (
319
321
orchestrator = orchestrator , task_id = task .task_id
320
322
)
321
323
322
- if not success :
324
+ if not completed :
323
325
# TODO: abort task!
324
326
return HTTPException (
325
327
status_code = 504 ,
326
328
detail = f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={ docling_serve_settings .max_sync_wait } ." ,
327
329
)
328
330
329
- result = await orchestrator .task_result (
330
- task_id = task .task_id , background_tasks = background_tasks
331
+ task = await orchestrator .get_raw_task (task_id = task .task_id )
332
+ response = await prepare_response (
333
+ task = task , orchestrator = orchestrator , background_tasks = background_tasks
331
334
)
332
- if result is None :
333
- raise HTTPException (
334
- status_code = 404 ,
335
- detail = "Task result not found. Please wait for a completion status." ,
336
- )
337
- return result
335
+ return response
338
336
339
337
# Convert a document from file(s)
340
338
@app .post (
@@ -348,43 +346,39 @@ async def process_url(
348
346
)
349
347
async def process_file (
350
348
background_tasks : BackgroundTasks ,
351
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
349
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
352
350
files : list [UploadFile ],
353
351
options : Annotated [
354
- ConvertDocumentsOptions , FormDepends (ConvertDocumentsOptions )
352
+ ConvertDocumentsRequestOptions , FormDepends (ConvertDocumentsRequestOptions )
355
353
],
356
354
):
357
355
task = await _enque_file (
358
356
orchestrator = orchestrator , files = files , options = options
359
357
)
360
- success = await _wait_task_complete (
358
+ completed = await _wait_task_complete (
361
359
orchestrator = orchestrator , task_id = task .task_id
362
360
)
363
361
364
- if not success :
362
+ if not completed :
365
363
# TODO: abort task!
366
364
return HTTPException (
367
365
status_code = 504 ,
368
366
detail = f"Conversion is taking too long. The maximum wait time is configure as DOCLING_SERVE_MAX_SYNC_WAIT={ docling_serve_settings .max_sync_wait } ." ,
369
367
)
370
368
371
- result = await orchestrator .task_result (
372
- task_id = task .task_id , background_tasks = background_tasks
369
+ task = await orchestrator .get_raw_task (task_id = task .task_id )
370
+ response = await prepare_response (
371
+ task = task , orchestrator = orchestrator , background_tasks = background_tasks
373
372
)
374
- if result is None :
375
- raise HTTPException (
376
- status_code = 404 ,
377
- detail = "Task result not found. Please wait for a completion status." ,
378
- )
379
- return result
373
+ return response
380
374
381
375
# Convert a document from URL(s) using the async api
382
376
@app .post (
383
377
"/v1alpha/convert/source/async" ,
384
378
response_model = TaskStatusResponse ,
385
379
)
386
380
async def process_url_async (
387
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
381
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
388
382
conversion_request : ConvertDocumentsRequest ,
389
383
):
390
384
task = await _enque_source (
@@ -406,11 +400,11 @@ async def process_url_async(
406
400
response_model = TaskStatusResponse ,
407
401
)
408
402
async def process_file_async (
409
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
403
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
410
404
background_tasks : BackgroundTasks ,
411
405
files : list [UploadFile ],
412
406
options : Annotated [
413
- ConvertDocumentsOptions , FormDepends (ConvertDocumentsOptions )
407
+ ConvertDocumentsRequestOptions , FormDepends (ConvertDocumentsRequestOptions )
414
408
],
415
409
):
416
410
task = await _enque_file (
@@ -432,7 +426,7 @@ async def process_file_async(
432
426
response_model = TaskStatusResponse ,
433
427
)
434
428
async def task_status_poll (
435
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
429
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
436
430
task_id : str ,
437
431
wait : Annotated [
438
432
float , Query (help = "Number of seconds to wait for a completed status." )
@@ -456,9 +450,10 @@ async def task_status_poll(
456
450
)
457
451
async def task_status_ws (
458
452
websocket : WebSocket ,
459
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
453
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
460
454
task_id : str ,
461
455
):
456
+ assert isinstance (orchestrator .notifier , WebsocketNotifier )
462
457
await websocket .accept ()
463
458
464
459
if task_id not in orchestrator .tasks :
@@ -473,7 +468,7 @@ async def task_status_ws(
473
468
task = orchestrator .tasks [task_id ]
474
469
475
470
# Track active WebSocket connections for this job
476
- orchestrator .task_subscribers [task_id ].add (websocket )
471
+ orchestrator .notifier . task_subscribers [task_id ].add (websocket )
477
472
478
473
try :
479
474
task_queue_position = await orchestrator .get_queue_position (task_id = task_id )
@@ -511,7 +506,7 @@ async def task_status_ws(
511
506
_log .info (f"WebSocket disconnected for job { task_id } " )
512
507
513
508
finally :
514
- orchestrator .task_subscribers [task_id ].remove (websocket )
509
+ orchestrator .notifier . task_subscribers [task_id ].remove (websocket )
515
510
516
511
# Task result
517
512
@app .get (
@@ -524,27 +519,26 @@ async def task_status_ws(
524
519
},
525
520
)
526
521
async def task_result (
527
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
522
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
528
523
background_tasks : BackgroundTasks ,
529
524
task_id : str ,
530
525
):
531
- result = await orchestrator .task_result (
532
- task_id = task_id , background_tasks = background_tasks
533
- )
534
- if result is None :
535
- raise HTTPException (
536
- status_code = 404 ,
537
- detail = "Task result not found. Please wait for a completion status." ,
526
+ try :
527
+ task = await orchestrator .get_raw_task (task_id = task_id )
528
+ response = await prepare_response (
529
+ task = task , orchestrator = orchestrator , background_tasks = background_tasks
538
530
)
539
- return result
531
+ return response
532
+ except TaskNotFoundError :
533
+ raise HTTPException (status_code = 404 , detail = "Task not found." )
540
534
541
535
# Update task progress
542
536
@app .post (
543
537
"/v1alpha/callback/task/progress" ,
544
538
response_model = ProgressCallbackResponse ,
545
539
)
546
540
async def callback_task_progress (
547
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
541
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
548
542
request : ProgressCallbackRequest ,
549
543
):
550
544
try :
@@ -564,8 +558,10 @@ async def callback_task_progress(
564
558
"/v1alpha/clear/converters" ,
565
559
response_model = ClearResponse ,
566
560
)
567
- async def clear_converters ():
568
- _get_converter_from_hash .cache_clear ()
561
+ async def clear_converters (
562
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
563
+ ):
564
+ await orchestrator .clear_converters ()
569
565
return ClearResponse ()
570
566
571
567
# Clean results
@@ -574,7 +570,7 @@ async def clear_converters():
574
570
response_model = ClearResponse ,
575
571
)
576
572
async def clear_results (
577
- orchestrator : Annotated [BaseAsyncOrchestrator , Depends (get_async_orchestrator )],
573
+ orchestrator : Annotated [BaseOrchestrator , Depends (get_async_orchestrator )],
578
574
older_then : float = 3600 ,
579
575
):
580
576
await orchestrator .clear_results (older_than = older_then )
0 commit comments