vllm-project · kylesayrs · Apr 8, 2025 · Apr 3, 2025 · Apr 3, 2025 · Apr 8, 2025
diff --git a/tests/e2e/vLLM/run_tests.sh b/tests/e2e/vLLM/run_tests.sh
@@ -4,7 +4,7 @@ SUCCESS=0
 
 while getopts "c:t:" OPT; do
   case ${OPT} in
-    c ) 
+    c )
         CONFIG="$OPTARG"
         ;;
     t )
@@ -25,9 +25,7 @@ do
 
     export TEST_DATA_FILE="$MODEL_CONFIG"
     pytest \
-        -r a \
         --capture=tee-sys \
-        --junitxml="test-results/e2e-$(date +%s).xml" \
         "$TEST" || LOCAL_SUCCESS=$?
 
     if [[ $LOCAL_SUCCESS == 0 ]]; then

diff --git a/tests/e2e/vLLM/test_vllm.py b/tests/e2e/vLLM/test_vllm.py
@@ -8,7 +8,6 @@
 import yaml
 from huggingface_hub import HfApi
 from loguru import logger
-from parameterized import parameterized_class
 
 from llmcompressor.core import active_session
 from tests.e2e.e2e_utils import run_oneshot_for_e2e_testing
@@ -43,7 +42,9 @@
 # Will run each test case in its own process through run_tests.sh
 # emulating vLLM CI testing
 @requires_gpu_count(1)
-@parameterized_class("test_data_file", [(TEST_DATA_FILE,)])
+@pytest.mark.parametrize(
+    "test_data_file", [pytest.param(TEST_DATA_FILE, id=TEST_DATA_FILE)]
+)
 @pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test")
 class TestvLLM:
     """
@@ -62,10 +63,8 @@ class TestvLLM:
     be used for quantization. Otherwise, the recipe will always be used if given.
     """  # noqa: E501
 
-    def set_up(self):
-        eval_config = yaml.safe_load(
-            Path(self.test_data_file).read_text(encoding="utf-8")
-        )
+    def set_up(self, test_data_file: str):
+        eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8"))
 
         if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
             pytest.skip("Skipping test; cadence mismatch")
@@ -97,10 +96,10 @@ def set_up(self):
         ]
         self.api = HfApi()
 
-    def test_vllm(self):
+    def test_vllm(self, test_data_file: str):
         # Run vLLM with saved model
 
-        self.set_up()
+        self.set_up(test_data_file)
         if not self.save_dir:
             self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
         oneshot_model, tokenizer = run_oneshot_for_e2e_testing(

diff --git a/tests/lmeval/test_lmeval.py b/tests/lmeval/test_lmeval.py
@@ -39,6 +39,9 @@ class LmEvalConfig(BaseModel):
 # Will run each test case in its own process through run_tests.sh
 # emulating vLLM CI testing
 @requires_gpu_count(1)
+@pytest.mark.parametrize(
+    "test_data_file", [pytest.param(TEST_DATA_FILE, id=TEST_DATA_FILE)]
+)
 @pytest.mark.skipif(
     not lm_eval_installed, reason="lm eval is not installed, skipping test"
 )
@@ -58,8 +61,8 @@ class TestLMEval:
     be used for quantization. Otherwise, the recipe will always be used if given.
     """  # noqa: E501
 
-    def set_up(self):
-        eval_config = yaml.safe_load(Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
+    def set_up(self, test_data_file: str):
+        eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8"))
 
         if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
             pytest.skip("Skipping test; cadence mismatch")
@@ -88,9 +91,9 @@ def set_up(self):
         self.num_calibration_samples = 512
         self.max_seq_length = 2048
 
-    def test_lm_eval(self):
+    def test_lm_eval(self, test_data_file: str):
         # Run vLLM with saved model
-        self.set_up()
+        self.set_up(test_data_file)
         if not self.save_dir:
             self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
         oneshot_model, processor = run_oneshot_for_e2e_testing(