Upstream compatibility tests (#343)

maxdebayser · web-flow · commit 88ca01e6f307 · 2025-07-31T21:23:50.000Z
Since we test against "main" and "default" vLLM versions in our tests,
we often add code to make "main" work ahead of our official support. But
it's easy to forget this compatibility code and it can become technical
debt later. In this PR I'm adding a new test file to that verifies
"main" and "default" imports from vLLM so that the "default" tests
starts failing one a compatibility code is not longer needed.

---------

Signed-off-by: Max de Bayser &lt;mbayser@br.ibm.com&gt;
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -50,6 +50,19 @@ jobs:
           - name: "worker and utils"
             markers: "not e2e"
             flags: "--timeout=300"
+          - name: "compatibility"
+            markers: "compat"
+            flags: "--timeout=300"
+        include:
+          - vllm_version:
+              name: "vLLM:lowest"
+              repo: "git+https://github.com/vllm-project/vllm --tag v0.9.2"
+            test_suite:
+              name: "backward compat"
+              markers: "compat or (cpu and basic)"
+              flags: "--timeout=300"
+            os: "ubuntu-latest"
+            python_version: "3.12"
 
     name: "${{ matrix.test_suite.name }} (${{ matrix.vllm_version.name }})"
 
@@ -90,6 +103,7 @@ jobs:
         if: (steps.changed-src-files.outputs.any_changed == 'true' && matrix.vllm_version.repo)
         run: |
           uv add ${{ matrix.vllm_version.repo }}
+          echo "TEST_VLLM_VERSION=${{ matrix.vllm_version.name }}" >> "$GITHUB_ENV"
 
       - name: "Install vLLM with Spyre plugin"
         if: steps.changed-src-files.outputs.any_changed == 'true'
diff --git a/pyproject.toml b/pyproject.toml
@@ -122,8 +122,10 @@ asyncio_default_fixture_loop_scope = "function"
 markers = [
     "skip_global_cleanup",
     "e2e: Tests using end-to-end engine spin-up",
+    "basic: Basic correctness tests",
     "cb: Continuous batching tests",
     "cpu: Tests using CPU (i.e. eager) backend",
+    "compat: backward compatibility tests",
     "spyre: Tests using Spyre hardware backend",
     "decoder: Tests for decoder models",
     "embedding: Tests for embedding models",
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -18,7 +18,7 @@
 def pytest_collection_modifyitems(config, items):
     """ Mark all tests in e2e directory"""
     for item in items:
-        if "tests/e2e" in str(item.nodeid):
+        if "e2e" in str(item.nodeid):
             item.add_marker(pytest.mark.e2e)
 
 
diff --git a/tests/e2e/test_spyre_embeddings.py b/tests/e2e/test_spyre_embeddings.py
@@ -14,9 +14,14 @@
 
 
 @pytest.mark.parametrize("model", get_spyre_model_list(isEmbeddings=True))
-@pytest.mark.parametrize("warmup_shape",
-                         [(64, 4), (64, 8), (128, 4),
-                          (128, 8)])  # (prompt_length/batch_size)
+@pytest.mark.parametrize(
+    "warmup_shape",
+    [  # (prompt_length/batch_size)
+        pytest.param((64, 4), marks=pytest.mark.basic),
+        pytest.param((64, 8)),
+        pytest.param((128, 4)),
+        pytest.param((128, 8))
+    ])
 @pytest.mark.parametrize("backend", get_spyre_backend_list())
 def test_output(
     model: str,
diff --git a/tests/e2e/test_spyre_online.py b/tests/e2e/test_spyre_online.py
@@ -5,7 +5,7 @@
 
 @pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("tp_size", [
-    pytest.param(1),
+    pytest.param(1, marks=pytest.mark.basic),
     pytest.param(2, marks=pytest.mark.multi),
     pytest.param(4, marks=pytest.mark.multi),
     pytest.param(8, marks=pytest.mark.multi),
@@ -82,6 +82,7 @@ def test_openai_serving_gptq(remote_openai_server, model, backend,
     assert len(completion.choices[0].text) > 0
 
 
+@pytest.mark.basic
 @pytest.mark.parametrize("model", get_spyre_model_list())
 @pytest.mark.parametrize("cb",
                          [pytest.param(1, marks=pytest.mark.cb, id="cb")])
diff --git a/tests/utils/test_spyre_backend_list.py b/tests/utils/test_spyre_backend_list.py
@@ -3,6 +3,7 @@
 
 
 @pytest.mark.utils
+@pytest.mark.cpu
 def test_get_spyre_backend_list(monkeypatch):
     '''
     Ensure we return the backend list correctly
diff --git a/tests/utils/test_spyre_model_list.py b/tests/utils/test_spyre_model_list.py
@@ -3,6 +3,7 @@
 
 
 @pytest.mark.utils
+@pytest.mark.cpu
 def test_get_spyre_model_list(monkeypatch):
     '''
     Tests returning the expected models
diff --git a/tests/utils/test_upstream_compatibility.py b/tests/utils/test_upstream_compatibility.py
@@ -0,0 +1,85 @@
+import os
+
+import pytest
+from spyre_util import get_spyre_model_list
+
+pytestmark = pytest.mark.compat
+
+VLLM_VERSION = os.getenv("TEST_VLLM_VERSION", "default")
+
+
+@pytest.mark.cpu
+def test_vllm_bert_support():
+    '''
+    Test if the vllm version under test already has Bert support for V1
+    '''
+
+    from vllm.model_executor.models.bert import BertEmbeddingModel
+
+    bert_supports_v0_only = getattr(BertEmbeddingModel, "supports_v0_only",
+                                    False)
+
+    if VLLM_VERSION == "vLLM:main":
+        assert not bert_supports_v0_only
+    elif VLLM_VERSION == "vLLM:lowest":
+        assert bert_supports_v0_only, (
+            "The lowest supported vLLM version already"
+            "supports Bert in V1. Remove the compatibility workarounds.")
+        # The compat code introduced in the PR below can now be removed:
+        # https://github.com/vllm-project/vllm-spyre/pull/277
+
+
+@pytest.mark.cpu
+@pytest.mark.parametrize("model", get_spyre_model_list())
+def test_model_config_task(model: str):
+
+    from vllm.engine.arg_utils import EngineArgs
+
+    vllm_config = EngineArgs(model=model).create_engine_config()
+    model_config = vllm_config.model_config
+
+    task = getattr(model_config, "task", None)
+
+    if VLLM_VERSION == "vLLM:main":
+        assert task is None
+    elif VLLM_VERSION == "vLLM:lowest":
+        assert task is not None, (
+            "The lowest supported vLLM version already"
+            "switched to the new definition of runners and task.")
+        # The compat code introduced in the PR below can now be removed:
+        # https://github.com/vllm-project/vllm-spyre/pull/341
+
+
+@pytest.mark.cpu
+def test_has_tasks():
+
+    try:
+        from vllm import tasks  # noqa
+        has_tasks = True
+    except Exception:
+        has_tasks = False
+
+    if VLLM_VERSION == "vLLM:main":
+        assert has_tasks
+    elif VLLM_VERSION == "vLLM:lowest":
+        assert not has_tasks, (
+            "The lowest supported vLLM version already"
+            "switched to the new definition of runners and task.")
+        # The compat code introduced in the PR below can now be removed:
+        # https://github.com/vllm-project/vllm-spyre/pull/338
+
+
+@pytest.mark.cpu
+def test_pooler_from_config():
+
+    from vllm.model_executor.layers.pooler import Pooler
+    has_from_config = hasattr(Pooler, "from_config_with_defaults")
+
+    if VLLM_VERSION == "vLLM:main":
+        assert not has_from_config
+    elif VLLM_VERSION == "vLLM:lowest":
+        assert has_from_config, (
+            "The lowest supported vLLM version already"
+            "switched to the new definition of runners and task.")
+        # The compat code introduced in the PR below can now be removed:
+        # https://github.com/vllm-project/vllm-spyre/pull/338
diff --git a/tests/v1/worker/test_spyre_input_batch.py b/tests/v1/worker/test_spyre_input_batch.py
@@ -211,6 +211,7 @@ def same(t1: Optional[torch.Tensor], t2: Optional[torch.Tensor]) -> bool:
         sampling_metadata.bad_words_token_ids
 
 
+@pytest.mark.cpu
 @pytest.mark.worker
 @pytest.mark.parametrize("batch_size", [1, 2, 32, 64])
 def test_sampling_metadata_in_input_batch(batch_size: int):