cleanup

brian-dellabetta · brian-dellabetta · commit 019c6bbea86c · 2025-05-02T20:31:10.000Z
Signed-off-by: Brian Dellabetta &lt;bdellabe@redhat.com&gt;
diff --git a/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml b/tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml
@@ -3,7 +3,7 @@ quant_stage:
     SmoothQuantModifier:
       smoothing_strength: 0.8
     GPTQModifier:
-      ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*"]
+      ignore: ["lm_head"]
       config_groups:
         group_0:
           weights: {num_bits: 8, type: int, symmetric: true, strategy: channel}
diff --git a/tests/e2e/vLLM/recipes/INT8/recipe_int8_tensor_weight_static_per_tensor_act.yaml b/tests/e2e/vLLM/recipes/INT8/recipe_int8_tensor_weight_static_per_tensor_act.yaml
@@ -3,7 +3,7 @@ quant_stage:
     SmoothQuantModifier:
       smoothing_strength: 0.8
     QuantizationModifier:
-      ignore: [lm_head]
+      ignore: ["lm_head", "re:vision_tower.*", "re:multi_modal_projector.*", "re:visual.*", "re:vision_model.*"]
       config_groups:
         group_0:
           weights: {num_bits: 8, type: int, symmetric: true, strategy: tensor}
diff --git a/tests/lmeval/configs/int8_w8a8_dynamic_per_token.yaml b/tests/lmeval/configs/int8_w8a8_dynamic_per_token.yaml
@@ -1,6 +1,5 @@
 cadence: "weekly"
 model: meta-llama/Meta-Llama-3-8B-Instruct
-scheme: INT8_dyn_per_token
 recipe: tests/e2e/vLLM/recipes/INT8/recipe_int8_channel_weight_dynamic_per_token.yaml
 dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
diff --git a/tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml b/tests/lmeval/configs/vl_int8_w8a8_dynamic_per_token.yaml
diff --git a/tests/lmeval/configs/vl_int8_w8a8_static_per_tensor.yaml b/tests/lmeval/configs/vl_int8_w8a8_static_per_tensor.yaml
@@ -0,0 +1,19 @@
+cadence: "weekly"
+model: Qwen/Qwen2.5-VL-7B-Instruct
+model_class: TraceableQwen2_5_VLForConditionalGeneration
+recipe: tests/e2e/vLLM/recipes/INT8/recipe_int8_tensor_weight_static_per_tensor_act.yaml
+dataset_id: lmms-lab/flickr30k
+dataset_split: "test[:512]"
+lmeval:
+  model: "hf-multimodal"
+  model_args:
+    dtype: bfloat16
+    add_bos_token: True
+    convert_img_format: True
+  task: mmmu_val_literature
+  num_fewshot: 0
+  use_stderr_atol: True
+  batch_size: 8
+  # dense model achieves 0.9 accuracy
+  metrics:
+    acc,none: 0.667
diff --git a/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml b/tests/lmeval/configs/vl_w4a16_actorder_weight.yaml
@@ -3,8 +3,7 @@ model: Qwen/Qwen2.5-VL-7B-Instruct
 model_class: TraceableQwen2_5_VLForConditionalGeneration
 recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight_dampfrac1e-1.yaml
 dataset_id: lmms-lab/flickr30k
-dataset_split: "test[:256]"
-scheme: W4A16_actorder_group
+dataset_split: "test[:512]"
 lmeval:
   model: "hf-multimodal"
   model_args:
diff --git a/tests/lmeval/configs/w4a16_actorder_group.yaml b/tests/lmeval/configs/w4a16_actorder_group.yaml
@@ -1,6 +1,5 @@
 cadence: "weekly"
 model: meta-llama/Meta-Llama-3-8B-Instruct
-scheme: W4A16_actorder_group
 recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_group.yaml
 dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
diff --git a/tests/lmeval/configs/w4a16_actorder_weight.yaml b/tests/lmeval/configs/w4a16_actorder_weight.yaml
@@ -1,6 +1,5 @@
 cadence: "weekly"
 model: meta-llama/Meta-Llama-3-8B-Instruct
-scheme: W4A16_actorder_weight
 recipe: tests/e2e/vLLM/recipes/actorder/recipe_w4a16_actorder_weight.yaml
 dataset_id: HuggingFaceH4/ultrachat_200k
 dataset_split: train_sft
diff --git a/tests/lmeval/test_lmeval.py b/tests/lmeval/test_lmeval.py
@@ -59,8 +59,9 @@ class TestLMEval:
     W4N16 with channel quantization). To add a new test case, a new config has to be
     added to the lm_eval_configs folder. The tests run on a cadence defined by the
     `cadence` field. Each config defines the model to quantize. Optionally, a dataset
-    id and split can be provided for calibration. Finally, all config files must list
-    a scheme. The scheme can be a preset scheme from
+    id and split can be provided for calibration.
+    Either a recipe or a scheme should be provided. If a recipe is not provided, the
+    config file must list a scheme. The scheme can be a preset scheme from
     https://github.com/neuralmagic/compressed-tensors/blob/main/src/compressed_tensors/quantization/quant_scheme.py
     or another identifier which can be used for the particular test case. If a recipe
     is not provided, it is assumed that the scheme provided is a preset scheme and will