Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ jobs:
instance: inf2
- test: TestNeuronxRollingBatch
instance: inf2
- test: TestMultiModal
instance: g6
steps:
- uses: actions/checkout@v4
- name: Clean env
Expand Down
50 changes: 50 additions & 0 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,18 @@ def get_model_name():
}
}

multi_modal_spec = {
"llava_v1.6-mistral": {
"batch_size": [1, 4]
},
"paligemma-3b-mix-448": {
"batch_size": [1, 4],
},
"phi-3-vision-128k-instruct": {
"batch_size": [1, 4],
}
}


def add_file_handler_to_logger(file_path: str):
handler = logging.FileHandler(file_path, mode='w')
Expand Down Expand Up @@ -1430,6 +1442,42 @@ def test_correctness(model, model_spec):
validate_correctness(dataset, data, score)


def get_multimodal_prompt():
messages = [{
"role":
"user",
"content": [{
"type": "text",
"text": "What is this an image of?",
}, {
"type": "image_url",
"image_url": {
"url": "https://resources.djl.ai/images/dog_bike_car.jpg",
}
}]
}]
return {
"messages": messages,
"temperature": 0.9,
"top_p": 0.6,
"max_new_tokens": 512,
}


def test_multimodal(model, model_spec):
if model not in model_spec:
raise ValueError(
f"{model} is not currently supported {list(model_spec.keys())}")
spec = model_spec[model]
messages = get_multimodal_prompt()
for i, batch_size in enumerate(spec["batch_size"]):
awscurl_run(messages,
spec.get("tokenizer", None),
batch_size,
num_run=5,
output=True)


def run(raw_args):
parser = argparse.ArgumentParser(description="Build the LLM configs")
parser.add_argument("handler", help="the handler used in the model")
Expand Down Expand Up @@ -1507,6 +1555,8 @@ def run(raw_args):
test_handler_rolling_batch(args.model, no_code_rolling_batch_spec)
elif args.handler == "correctness":
test_correctness(args.model, correctness_model_spec)
elif args.handler == "multimodal":
test_multimodal(args.model, multi_modal_spec)

else:
raise ValueError(
Expand Down
12 changes: 11 additions & 1 deletion tests/integration/llm/prepare.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,16 @@
"option.model_id": "s3://djl-llm/llama-2-tiny/",
"option.quantize": "awq",
"option.tensor_parallel_degree": 4
},
"llava_v1.6-mistral": {
"option.model_id": "s3://djl-llm/llava-v1.6-mistral-7b-hf/",
},
"paligemma-3b-mix-448": {
"option.model_id": "s3://djl-llm/paligemma-3b-mix-448/"
},
"phi-3-vision-128k-instruct": {
"option.model_id": "s3://djl-llm/phi-3-vision-128k-instruct/",
"option.trust_remote_code": True,
}
}

Expand Down Expand Up @@ -784,7 +794,7 @@
"option.dtype": "fp16",
"option.tensor_parallel_degree": 4,
"option.max_rolling_batch_size": 4,
}
},
}

lmi_dist_aiccl_model_list = {
Expand Down
21 changes: 21 additions & 0 deletions tests/integration/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,3 +883,24 @@ def test_llama3_1_8b(self):
prepare.build_correctness_model("neuronx-llama3-1-8b")
r.launch(container='pytorch-inf2-2')
client.run("correctness neuronx-llama3-1-8b".split())


class TestMultiModalLmiDist:

def test_llava_next(self):
with Runner('lmi', 'llava_v1.6-mistral') as r:
prepare.build_lmi_dist_model('llava_v1.6-mistral')
r.launch()
client.run("multimodal llava_v1.6-mistral".split())

def test_paligemma(self):
with Runner('lmi', 'paligemma-3b-mix-448') as r:
prepare.build_lmi_dist_model('paligemma-3b-mix-448')
r.launch()
client.run("multimodal paligemma-3b-mix-448".split())

def test_phi3_v(self):
with Runner('lmi', 'phi-3-vision-128k-instruct') as r:
prepare.build_lmi_dist_model('phi-3-vision-128k-instruct')
r.launch()
client.run("multimodal phi-3-vision-128k-instruct".split())