Skip to content

Update e2e/lm-eval test infrastructure #1323

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions tests/e2e/vLLM/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ SUCCESS=0

while getopts "c:t:" OPT; do
case ${OPT} in
c )
c )
CONFIG="$OPTARG"
;;
t )
Expand All @@ -25,9 +25,7 @@ do

export TEST_DATA_FILE="$MODEL_CONFIG"
pytest \
-r a \
--capture=tee-sys \
--junitxml="test-results/e2e-$(date +%s).xml" \
"$TEST" || LOCAL_SUCCESS=$?

if [[ $LOCAL_SUCCESS == 0 ]]; then
Expand Down
15 changes: 7 additions & 8 deletions tests/e2e/vLLM/test_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import yaml
from huggingface_hub import HfApi
from loguru import logger
from parameterized import parameterized_class

from llmcompressor.core import active_session
from tests.e2e.e2e_utils import run_oneshot_for_e2e_testing
Expand Down Expand Up @@ -43,7 +42,9 @@
# Will run each test case in its own process through run_tests.sh
# emulating vLLM CI testing
@requires_gpu_count(1)
@parameterized_class("test_data_file", [(TEST_DATA_FILE,)])
@pytest.mark.parametrize(
"test_data_file", [pytest.param(TEST_DATA_FILE, id=TEST_DATA_FILE)]
)
@pytest.mark.skipif(not vllm_installed, reason="vLLM is not installed, skipping test")
class TestvLLM:
"""
Expand All @@ -62,10 +63,8 @@ class TestvLLM:
be used for quantization. Otherwise, the recipe will always be used if given.
""" # noqa: E501

def set_up(self):
eval_config = yaml.safe_load(
Path(self.test_data_file).read_text(encoding="utf-8")
)
def set_up(self, test_data_file: str):
eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8"))

if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
pytest.skip("Skipping test; cadence mismatch")
Expand Down Expand Up @@ -97,10 +96,10 @@ def set_up(self):
]
self.api = HfApi()

def test_vllm(self):
def test_vllm(self, test_data_file: str):
# Run vLLM with saved model

self.set_up()
self.set_up(test_data_file)
if not self.save_dir:
self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
oneshot_model, tokenizer = run_oneshot_for_e2e_testing(
Expand Down
11 changes: 7 additions & 4 deletions tests/lmeval/test_lmeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ class LmEvalConfig(BaseModel):
# Will run each test case in its own process through run_tests.sh
# emulating vLLM CI testing
@requires_gpu_count(1)
@pytest.mark.parametrize(
"test_data_file", [pytest.param(TEST_DATA_FILE, id=TEST_DATA_FILE)]
)
@pytest.mark.skipif(
not lm_eval_installed, reason="lm eval is not installed, skipping test"
)
Expand All @@ -58,8 +61,8 @@ class TestLMEval:
be used for quantization. Otherwise, the recipe will always be used if given.
""" # noqa: E501

def set_up(self):
eval_config = yaml.safe_load(Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
def set_up(self, test_data_file: str):
eval_config = yaml.safe_load(Path(test_data_file).read_text(encoding="utf-8"))

if os.environ.get("CADENCE", "commit") != eval_config.get("cadence"):
pytest.skip("Skipping test; cadence mismatch")
Expand Down Expand Up @@ -88,9 +91,9 @@ def set_up(self):
self.num_calibration_samples = 512
self.max_seq_length = 2048

def test_lm_eval(self):
def test_lm_eval(self, test_data_file: str):
# Run vLLM with saved model
self.set_up()
self.set_up(test_data_file)
if not self.save_dir:
self.save_dir = self.model.split("/")[1] + f"-{self.scheme}"
oneshot_model, processor = run_oneshot_for_e2e_testing(
Expand Down