@@ -62,7 +62,7 @@ def image_to_base64_data_url(image):
62
62
return f"data:image/png;base64,{ img_str } "
63
63
64
64
65
- def load_pdf_prompts (num_samples : int = 100 , seed : int = 42 , max_length : int = 2048 ) -> List [Dict [str , str ]]:
65
+ async def load_pdf_prompts (num_samples : int = 100 , seed : int = 42 , max_length : int = 2048 ) -> List [Dict [str , str ]]:
66
66
"""Load prompts and images from olmOCR-mix-0225-benchmarkset dataset with fixed random seed."""
67
67
print (f"Loading olmOCR-mix-0225-benchmarkset dataset with { num_samples } samples and seed { seed } " )
68
68
@@ -101,12 +101,12 @@ def load_pdf_prompts(num_samples: int = 100, seed: int = 42, max_length: int = 2
101
101
for pdf_path in sampled_pdfs :
102
102
try :
103
103
# Build page query for page 1 of each PDF
104
- query = asyncio . run ( build_page_query (
104
+ query = await build_page_query (
105
105
local_pdf_path = pdf_path ,
106
106
page = 1 ,
107
107
target_longest_image_dim = 1280 ,
108
108
image_rotation = 0
109
- ))
109
+ )
110
110
queries .append (query )
111
111
except Exception as e :
112
112
print (f"Error processing { os .path .basename (pdf_path )} : { e } " )
@@ -316,7 +316,7 @@ async def async_main():
316
316
model_path = await download_model (args .model )
317
317
318
318
# Load prompts and images
319
- samples = load_pdf_prompts (num_samples = args .num_prompts , seed = args .seed )
319
+ samples = await load_pdf_prompts (num_samples = args .num_prompts , seed = args .seed )
320
320
321
321
# Create vLLM engine
322
322
print ("\n === Creating vLLM Engine ===" )
0 commit comments