deepjavalibrary · sindhuvahinis · Jul 5, 2024 · Jul 11, 2024
@@ -114,6 +114,8 @@ jobs:
             instance: g6
           - test: TestVllmLora
             instance: g6
+          - test: TestVLMs
+            instance: g6
           - test: TestLmiDistLora
             instance: g6
           - test: TestNeuronx1
@@ -142,7 +144,7 @@ jobs:
           # of aarch64 with ubuntu-20.04 not supported by the actions/setup-python
           sudo apt-get install python3 python-is-python3 python3-pip -y
       - name: Install pip dependencies
-        run: pip3 install pytest requests "numpy<2" pillow huggingface_hub
+        run: pip3 install pytest requests "numpy<2" pillow huggingface_hub openai
       - name: Install awscurl
         working-directory: tests/integration
         run: |

@@ -25,7 +25,6 @@ def get_image_text_prompt(prompt_text: str) -> str:
     # TODO: image token str must be decoded from image_token_id in serving.properties. Change it after refactor PR.
     image_token_str = '<image>'
 
-    # TODO: Remove image_token_str*1176 after vllm next release, as the image placeholder is not needed.
     return f"{image_token_str}\n{prompt_text}"
 
 

@@ -0,0 +1,64 @@
+import unittest
+
+from openai import OpenAI
+from transformers import AutoTokenizer
+
+from djl_python.chat_completions.chat_utils import parse_chat_completions_request
+from djl_python.multimodal.utils import encode_image_base64_from_url
+
+OPENAI_API_KEY = "EMPTY"
+OPENAI_API_BASE = "http://localhost:8000/v1"
+
+client = OpenAI(
+    # defaults to os.environ.get("OPENAI_API_KEY")
+    api_key=OPENAI_API_KEY,
+    base_url=OPENAI_API_BASE,
+)
+
+
+class TestMultiModalUtils(unittest.TestCase):
+
+    def test_open_ai_format_parse(self):
+        image_url = "https://resources.djl.ai/images/dog_bike_car.jpg"
+        image_base64 = encode_image_base64_from_url(image_url=image_url)
+        sample_messages = [{
+            "role":
+            "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": "What’s in this image?"
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{image_base64}"
+                    },
+                },
+            ],
+        }]
+        sample_input_map = {'messages': sample_messages, 'model': ""}
+        tokenizer = AutoTokenizer.from_pretrained("llava-hf/llava-v1.6-34b-hf",
+                                                  use_fast=False)
+        inputs, params = parse_chat_completions_request(sample_input_map,
+                                                        is_rolling_batch=True,
+                                                        tokenizer=tokenizer)
+        print(inputs)
+        image_token = "<image>"
+        self.assertEqual(
+            f"<|im_start|>user\n{image_token}\nWhat’s in this image?<|im_end|>\n",
+            inputs)
+        images = params.pop("images", None)
+        for image in images:
+            print(image)
+        self.assertEqual(
+            {
+                'frequency_penalty': 0.0,
+                'presence_penalty': 0.0,
+                'stream': False,
+                'temperature': 1.0,
+                'top_p': 1.0,
+                'do_sample': True,
+                'details': True,
+                'output_formatter': 'json_chat'
+            }, params)
@@ -0,0 +1,82 @@
+import argparse
+import base64
+import sys
+
+import requests
+from openai import OpenAI
+
+OPENAI_API_KEY = "EMPTY"
+OPENAI_API_BASE = "http://localhost:8080/invocations"
+
+client = OpenAI(
+    # defaults to os.environ.get("OPENAI_API_KEY")
+    api_key=OPENAI_API_KEY,
+    base_url=OPENAI_API_BASE,
+)
+
+
+def call_chat_completion_api(image: str):
+
+    sample_messages = [{
+        "role":
+        "user",
+        "content": [
+            {
+                "type": "text",
+                "text": "What’s in this image?"
+            },
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": f"{image}"
+                },
+            },
+        ],
+    }]
+
+    chat_completion_with_image = client.chat.completions.create(
+        messages=sample_messages,
+        model="",
+    )
+
+    return chat_completion_with_image
+
+
+def get_image_url(image_url_type: str, image: str):
+    if image_url_type == "base64":
+        if image.startswith("http"):
+            with requests.get(image_url) as response:
+                response.raise_for_status()
+                image_base64 = base64.b64encode(
+                    response.content).decode('utf-8')
+        else:
+            with open(image, "rb") as image_file:
+                image_base64 = base64.b64encode(image_file.read())
+        return f"data:image/jpeg;base64,{image_base64}"
+    else:
+        return image
+
+
+def run(raw_args):
+    parser = argparse.ArgumentParser(description="OpenAI VLM API client")
+    parser.add_argument("image_url_type",
+                        type=str,
+                        choices=["url", "base64"],
+                        default="url",
+                        help="image url type")
+    parser.add_argument(
+        "image",
+        type=str,
+        default="https://resources.djl.ai/images/dog_bike_car.jpg",
+        help="image http url or local path")
+
+    global args
+    args = parser.parse_args(args=raw_args)
+
+    image_url = get_image_url(args.image_url_type, args.image)
+    result = call_chat_completion_api(image_url)
+    print(f"OpenAI vision client result {result}")
+
+
+if __name__ == "__main__":
+    run(sys.argv[1:])
@@ -749,6 +749,13 @@
         "option.dtype": "fp16",
         "option.tensor_parallel_degree": 4,
         "option.max_rolling_batch_size": 4,
+    },
+    "llava-next-chat": {
+        "option.model_id": "llava-hf/llava-v1.6-34b-hf",
+        "option.image_token_id": 64003,
+        "option.image_input_type": "pixel_values",
+        "option.image_input_shape": "1,3,336,336",
+        "option.image_feature_size": 1176
     }
 }
 

@@ -7,6 +7,7 @@
 import llm.client as client
 import rb_client as rb_client
 import test_client
+import llm.openai_vision_client as openai_vision_client
 
 djl_version = os.environ.get('TEST_DJL_VERSION', '').strip()
 
@@ -535,6 +536,16 @@ def test_lora_llama3_8b(self):
             client.run("vllm_adapters llama3-8b-unmerged-lora".split())
 
 
+class TestVLMs:
+
+    def test_llava_next_chat(self):
+        with Runner('lmi', 'llava-next-chat') as r:
+            prepare.build_vllm_model("llava-next-chat")
+            r.launch()
+            openai_vision_client.run(
+                "url https://resources.djl.ai/images/dog_bike_car.jpg".split())
+
+
 class TestLmiDistLora:
     # Runs on g5.12xl