Skip to content

Commit 12c29a8

Browse files
authored
[Bugfix] Further clean up LoRA test (#14422)
Signed-off-by: Jee Jee Li <[email protected]>
1 parent 70da0c0 commit 12c29a8

File tree

3 files changed

+2
-48
lines changed

3 files changed

+2
-48
lines changed

tests/lora/conftest.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -185,11 +185,6 @@ def mixtral_lora_files():
185185
return snapshot_download(repo_id="SangBinCho/mixtral-lora")
186186

187187

188-
@pytest.fixture(scope="session")
189-
def mixtral_lora_files_all_target_modules():
190-
return snapshot_download(repo_id="dyang415/mixtral-lora-v0")
191-
192-
193188
@pytest.fixture(scope="session")
194189
def gemma_lora_files():
195190
return snapshot_download(repo_id="wskwon/gemma-7b-test-lora")

tests/lora/test_mixtral.py

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -69,45 +69,3 @@ def test_mixtral_lora(mixtral_lora_files, tp_size):
6969
prompts=prompts) == expected_lora_output
7070
assert do_sample(llm, mixtral_lora_files, lora_id=2,
7171
prompts=prompts) == expected_lora_output
72-
73-
74-
@pytest.mark.parametrize("tp_size", [4])
75-
@pytest.mark.parametrize("fully_shard", [True, False])
76-
def test_mixtral_lora_all_target_modules(mixtral_lora_files_all_target_modules,
77-
tp_size, fully_shard):
78-
"""This LoRA model has all supported Mixtral target modules"""
79-
80-
if torch.cuda.device_count() < tp_size:
81-
pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}")
82-
83-
prompts = [
84-
"Question: What happens if you touch the eyes of a blind man?\nChoices:\nB: The man's sight will be restored.\nA: Nothing happens if you touch the eyes of a blind man.\nC: The man will get his eyesight back.\nD: The man will no longer be blind.\nAnswer:", # noqa: E501
85-
"Question: Which will increase the energy of motion of water molecules?\nChoices:\nA: add heat\nB: add ice\nC: remove some of the water\nD: place the water in the freezer\nAnswer:", # noqa: E501
86-
"Since Craig threw aluminum cans in the trash and Benjamin recycled, _ was environmentally irresponsible.\nChoices:\n1: Craig\n2: Benjamin\nAnswer:", # noqa: E501
87-
]
88-
89-
llm = vllm.LLM(
90-
MODEL_PATH,
91-
enable_lora=True,
92-
max_num_seqs=16,
93-
max_loras=4,
94-
distributed_executor_backend="ray",
95-
tensor_parallel_size=tp_size,
96-
fully_sharded_loras=fully_shard,
97-
max_lora_rank=32,
98-
)
99-
100-
expected_lora_output = [
101-
"A: Nothing happens if you touch the eyes of a blind man.",
102-
"A: add heat",
103-
"1: Craig",
104-
]
105-
106-
assert do_sample(llm,
107-
mixtral_lora_files_all_target_modules,
108-
lora_id=1,
109-
prompts=prompts) == expected_lora_output
110-
assert do_sample(llm,
111-
mixtral_lora_files_all_target_modules,
112-
lora_id=2,
113-
prompts=prompts) == expected_lora_output

tests/lora/test_quant_model.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,8 @@ def test_quant_model_tp_equality(tinyllama_lora_files, num_gpus_available,
178178
model):
179179
if num_gpus_available < 2:
180180
pytest.skip(f"Not enough GPUs for tensor parallelism {2}")
181-
181+
if model.quantization == "GPTQ":
182+
pytest.skip("GPTQ lora outputs are just incredibly unstable")
182183
llm_tp1 = vllm.LLM(
183184
model=model.model_path,
184185
enable_lora=True,

0 commit comments

Comments
 (0)