Content Moderation implemented + couple of latency improvements

PranavB-11 · PranavB-11 · commit cf9b4084c63c · 2025-02-11T06:08:35.000Z
diff --git a/fastchat/serve/gradio_block_arena_vision.py b/fastchat/serve/gradio_block_arena_vision.py
@@ -273,21 +273,26 @@ def wrap_pdfchat_query(query, document):
 
 def convert_base64_to_pil_image(b64_string):
     from PIL import Image
+    import numpy as np
 
-    image_data = base64.b64decode(b64_string)
+    image_data = np.frombuffer(base64.b64decode(b64_string), dtype=np.uint8)
     image_bytes = BytesIO(image_data)
     image = Image.open(image_bytes)
     
     return image
 
+def batch_convert_base64_to_images(base64_dict):
+    import concurrent.futures
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        return list(executor.map(convert_base64_to_pil_image, base64_dict.values()))
+
 def parse_pdf(file_path):
     import requests
 
     url = "https://www.datalab.to/api/v1/marker"
 
     form_data = {
         'file': ('test.pdf', open(file_path, 'rb'), 'application/pdf'),
-        'langs': (None, "English"),
         "force_ocr": (None, False),
         "paginate": (None, False),
         'output_format': (None, 'markdown'),
@@ -296,7 +301,7 @@ def parse_pdf(file_path):
         "disable_image_extraction": (None, False)
     }
 
-    headers = {"X-Api-Key": os.getenv("X-Api-Key")} 
+    headers = {"X-Api-Key": str(os.getenv("MARKER_API_KEY"))} 
     response = requests.post(url, files=form_data, headers=headers)
     data = response.json()
 
@@ -312,7 +317,7 @@ def parse_pdf(file_path):
             break
     
     output_md = data["markdown"]
-    output_images = [convert_base64_to_pil_image(b64_image) for b64_image in data["images"].values()]
+    output_images = batch_convert_base64_to_images(data["images"])
 
     return output_md, output_images
 
diff --git a/fastchat/serve/gradio_block_arena_vision_anony.py b/fastchat/serve/gradio_block_arena_vision_anony.py
@@ -312,10 +312,6 @@ def add_text(
                 State(model_left, is_vision=False, pdf_id=unique_id),
                 State(model_right, is_vision=False, pdf_id=unique_id),
             ]
-            upload_pdf_file_to_gcs(
-                pdf_file_path=pdfs[0],
-                filename=unique_id,
-            )
         else:
             model_left, model_right = get_battle_pair(
                 context.all_text_models,
@@ -366,10 +362,17 @@ def add_text(
 
     images = convert_images_to_conversation_format(images)
 
-    # TODO: add PDF moderator
-    text, image_flagged, csam_flag = moderate_input(
-        state0, text, text, model_list, images, ip
-    )
+    post_processed_text = _prepare_text_with_pdf(text[:BLIND_MODE_INPUT_CHAR_LEN_LIMIT], pdfs)
+    if type(post_processed_text) is tuple:
+        text += post_processed_text[0]
+        text, image_flagged, csam_flag = moderate_input(
+            state0, text, text, model_list, images + post_processed_text[1], ip
+        )
+    else:
+        text += post_processed_text
+        text, image_flagged, csam_flag = moderate_input(
+            state0, text, text, model_list, images, ip
+        )
 
     conv = states[0].conv
     if (len(conv.messages) - conv.offset) // 2 >= CONVERSATION_TURN_LIMIT:
@@ -408,7 +411,11 @@ def add_text(
         )
 
     text = text[:BLIND_MODE_INPUT_CHAR_LEN_LIMIT]  # Hard cut-off
-    post_processed_text = _prepare_text_with_pdf(text, pdfs)
+
+    upload_pdf_file_to_gcs(
+        pdf_file_path=pdfs[0],
+        filename=unique_id,
+    )
 
     for i in range(num_sides):
         post_processed_text = _prepare_text_with_image(