Skip to content

Commit f2c4d64

Browse files
author
PranavB-11
committed
Marker API Implemented + Updated Llama code if ever needed
1 parent 2ea729c commit f2c4d64

File tree

3 files changed

+177
-41
lines changed

3 files changed

+177
-41
lines changed

fastchat/serve/gradio_block_arena_vision.py

Lines changed: 71 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
from gradio.data_classes import FileData
1717
import numpy as np
1818

19+
from io import BytesIO
20+
import base64
21+
1922
from fastchat.constants import (
2023
TEXT_MODERATION_MSG,
2124
IMAGE_MODERATION_MSG,
@@ -217,29 +220,38 @@ def wrap_pdfchat_query(query, document):
217220

218221
# def parse_pdf(file_path):
219222
# from llama_parse import LlamaParse
223+
# from llama_index.core.schema import ImageDocument, TextNode
224+
225+
# from PIL import Image
220226

221-
# assert (
222-
# "LLAMA_CLOUD_API_KEY" in os.environ
223-
# ), "Make sure to specify LlamaParse API key."
224-
225-
# for _ in range(LLAMA_PARSE_MAX_RETRY):
226-
# try:
227-
# documents = LlamaParse(
228-
# result_type="markdown",
229-
# verbose=True,
230-
# languages=list(LLAMAPARSE_SUPPORTED_LANGS.values()),
231-
# accurate_mode=True,
232-
# ).load_data(file_path)
233-
# assert len(documents) > 0
234-
# break
235-
# except AssertionError as e:
236-
# continue
237-
238-
# output = "\n".join(
239-
# [f"Page {i+1}:\n{doc.text}\n" for i, doc in enumerate(documents)]
227+
# parser = LlamaParse(
228+
# api_key=os.getenv("LLAMA_CLOUD_API_KEY"),
229+
# result_type="markdown",
240230
# )
241231

242-
# return output
232+
# def get_image_nodes(json_objs: List[dict], download_path: str):
233+
# image_dicts = parser.get_images(json_objs, download_path=download_path)
234+
# return [ImageDocument(image_path=image_dict["path"]) for image_dict in image_dicts]
235+
236+
# json_objs = parser.get_json_result(file_path)
237+
# json_list = json_objs[0]["pages"]
238+
239+
# text = ""
240+
# for page in json_list:
241+
# text += f"Page {page['page']}:\n{page['md']}\n"
242+
# if (page['images']):
243+
# for i, image in enumerate(page['images']):
244+
# text += f"page{page['page']}_figure{i + 1}\n"
245+
246+
# image_documents = get_image_nodes(json_objs, ".")
247+
# images = []
248+
249+
# for image_doc in image_documents:
250+
# image_path = image_doc.image_path
251+
# image = Image.open(image_path)
252+
# images.append(image)
253+
254+
# return text, images
243255

244256

245257
PDFPARSE_MAX_RETRY = 2
@@ -259,29 +271,48 @@ def wrap_pdfchat_query(query, document):
259271
"languages": ",".join(PDFPARSE_SUPPORTED_LANGS.values()),
260272
}
261273

274+
def convert_base64_to_pil_image(b64_string):
275+
from PIL import Image
276+
277+
image_data = base64.b64decode(b64_string)
278+
image_bytes = BytesIO(image_data)
279+
image = Image.open(image_bytes)
280+
281+
return image
262282

263283
def parse_pdf(file_path):
264-
from marker.config.parser import ConfigParser
265-
from marker.models import create_model_dict
266-
from marker.converters.pdf import PdfConverter
267-
268-
output_md, output_images = None, None
269-
for _ in range(PDFPARSE_MAX_RETRY):
270-
try:
271-
config_parser = ConfigParser(MARKER_PDFPARSE_CONFIG)
272-
273-
converter = PdfConverter(
274-
config=config_parser.generate_config_dict(),
275-
artifact_dict=create_model_dict(),
276-
processor_list=config_parser.get_processors(),
277-
renderer=config_parser.get_renderer(),
278-
)
279-
rendered = converter(file_path)
280-
output_md = rendered.markdown
281-
output_images = list(rendered.images.values())
284+
import requests
285+
286+
url = "https://www.datalab.to/api/v1/marker"
287+
288+
form_data = {
289+
'file': ('test.pdf', open(file_path, 'rb'), 'application/pdf'),
290+
'langs': (None, "English"),
291+
"force_ocr": (None, False),
292+
"paginate": (None, False),
293+
'output_format': (None, 'markdown'),
294+
"use_llm": (None, True),
295+
"strip_existing_ocr": (None, False),
296+
"disable_image_extraction": (None, False)
297+
}
298+
299+
headers = {"X-Api-Key": os.getenv("X-Api-Key")}
300+
response = requests.post(url, files=form_data, headers=headers)
301+
data = response.json()
302+
303+
max_polls = 300
304+
check_url = data["request_check_url"]
305+
306+
for i in range(max_polls):
307+
time.sleep(2)
308+
response = requests.get(check_url, headers=headers)
309+
data = response.json()
310+
311+
if data["status"] == "complete":
282312
break
283-
except AssertionError as e:
284-
continue
313+
314+
output_md = data["markdown"]
315+
output_images = [convert_base64_to_pil_image(b64_image) for b64_image in data["images"].values()]
285316

286317
return output_md, output_images
287318

fastchat/serve/gradio_block_arena_vision_anony.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@
9595

9696
# TODO(chris): fix sampling weights
9797
VISION_SAMPLING_WEIGHTS = {}
98-
PDFCHAT_SAMPLING_WEIGHTS = {}
98+
PDFCHAT_SAMPLING_WEIGHTS = {
99+
"gpt-4o-2024-05-13": 1, "gpt-4o-mini-2024-07-18": 1}
99100

100101
# TODO(chris): Find battle targets that make sense
101102
VISION_BATTLE_TARGETS = {}

fastchat/serve/test.ipynb

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 8,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import requests\n",
10+
"\n",
11+
"url = \"https://www.datalab.to/api/v1/marker\"\n",
12+
"import os\n",
13+
"form_data = {\n",
14+
" 'file': ('test.pdf', open(\"ddsppaper (1).pdf\", 'rb'), 'application/pdf'),\n",
15+
" 'langs': (None, \"English\"),\n",
16+
" \"force_ocr\": (None, False),\n",
17+
" \"paginate\": (None, False),\n",
18+
" 'output_format': (None, 'markdown'),\n",
19+
" \"use_llm\": (None, True),\n",
20+
" \"strip_existing_ocr\": (None, False),\n",
21+
" \"disable_image_extraction\": (None, False)\n",
22+
"}\n",
23+
"\n",
24+
"headers = {\"X-Api-Key\": \"wAdzo2tLEsd5PzQTtQT4RNZSBM6rJy_LWFTtj8hjbZ0\"} \n",
25+
"response = requests.post(url, files=form_data, headers=headers)\n",
26+
"data = response.json()"
27+
]
28+
},
29+
{
30+
"cell_type": "code",
31+
"execution_count": 9,
32+
"metadata": {},
33+
"outputs": [
34+
{
35+
"name": "stdout",
36+
"output_type": "stream",
37+
"text": [
38+
"{'detail': 'Not authenticated'}\n"
39+
]
40+
}
41+
],
42+
"source": [
43+
"print(data)"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": 4,
49+
"metadata": {},
50+
"outputs": [
51+
{
52+
"ename": "KeyError",
53+
"evalue": "'request_check_url'",
54+
"output_type": "error",
55+
"traceback": [
56+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
57+
"\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)",
58+
"Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m max_polls \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m300\u001b[39m\n\u001b[0;32m----> 2\u001b[0m check_url \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrequest_check_url\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtime\u001b[39;00m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(max_polls):\n",
59+
"\u001b[0;31mKeyError\u001b[0m: 'request_check_url'"
60+
]
61+
}
62+
],
63+
"source": [
64+
"max_polls = 300\n",
65+
"check_url = data[\"request_check_url\"]\n",
66+
"import time\n",
67+
"for i in range(max_polls):\n",
68+
" time.sleep(2)\n",
69+
" response = requests.get(check_url, headers=headers)\n",
70+
" data = response.json()\n",
71+
"\n",
72+
" if data[\"status\"] == \"complete\":\n",
73+
" break"
74+
]
75+
}
76+
],
77+
"metadata": {
78+
"kernelspec": {
79+
"display_name": "Python 3.12.8 ('myenv')",
80+
"language": "python",
81+
"name": "python3"
82+
},
83+
"language_info": {
84+
"codemirror_mode": {
85+
"name": "ipython",
86+
"version": 3
87+
},
88+
"file_extension": ".py",
89+
"mimetype": "text/x-python",
90+
"name": "python",
91+
"nbconvert_exporter": "python",
92+
"pygments_lexer": "ipython3",
93+
"version": "3.12.8"
94+
},
95+
"orig_nbformat": 4,
96+
"vscode": {
97+
"interpreter": {
98+
"hash": "03c457903a5d26c69a3bb8be9c56ac1ee96fb7ba834b2e69a22fb0607b146481"
99+
}
100+
}
101+
},
102+
"nbformat": 4,
103+
"nbformat_minor": 2
104+
}

0 commit comments

Comments
 (0)