Test CUDA initialization

DarkLight1337 · DarkLight1337 · commit 7280766dfd82 · 2024-10-01T15:01:26.000Z
diff --git a/tests/models/test_registry.py b/tests/models/test_registry.py
@@ -1,25 +1,54 @@
+import warnings
+
 import pytest
+import torch.cuda
 
 from vllm.model_executor.models import _MODELS, ModelRegistry
 
+from ..utils import fork_new_process_for_each_test
+
 
-@pytest.mark.parametrize("model_cls", _MODELS)
-def test_registry_imports(model_cls):
+@pytest.mark.parametrize("model_arch", _MODELS)
+def test_registry_imports(model_arch):
     # Ensure all model classes can be imported successfully
-    ModelRegistry.resolve_model_cls([model_cls])
+    ModelRegistry.resolve_model_cls(model_arch)
 
 
-@pytest.mark.parametrize("model_cls,is_mm", [
-    ("LlamaForCausalLM", False),
-    ("MllamaForConditionalGeneration", True),
+@fork_new_process_for_each_test
+@pytest.mark.parametrize("model_arch,is_mm,init_cuda", [
+    ("LlamaForCausalLM", False, False),
+    ("MllamaForConditionalGeneration", True, False),
+    ("LlavaForConditionalGeneration", True, True),
 ])
-def test_registry_is_multimodal(model_cls, is_mm):
-    assert ModelRegistry.is_multimodal_model(model_cls) is is_mm
+def test_registry_is_multimodal(model_arch, is_mm, init_cuda):
+    assert ModelRegistry.is_multimodal_model(model_arch) is is_mm
+
+    if init_cuda:
+        assert not torch.cuda.is_initialized()
 
+        ModelRegistry.resolve_model_cls(model_arch)
+        if not torch.cuda.is_initialized():
+            warnings.warn(
+                "This model no longer initializes CUDA on import. "
+                "Please test using a different model.",
+                stacklevel=2)
 
-@pytest.mark.parametrize("model_cls,is_pp", [
-    ("MLPSpeculatorPreTrainedModel", False),
-    ("DeepseekV2ForCausalLM", True),
+
+@fork_new_process_for_each_test
+@pytest.mark.parametrize("model_arch,is_pp,init_cuda", [
+    ("MLPSpeculatorPreTrainedModel", False, False),
+    ("DeepseekV2ForCausalLM", True, False),
+    ("Qwen2VLForConditionalGeneration", True, True),
 ])
-def test_registry_is_pp(model_cls, is_pp):
-    assert ModelRegistry.is_pp_supported_model(model_cls) is is_pp
+def test_registry_is_pp(model_arch, is_pp, init_cuda):
+    assert ModelRegistry.is_pp_supported_model(model_arch) is is_pp
+
+    if init_cuda:
+        assert not torch.cuda.is_initialized()
+
+        ModelRegistry.resolve_model_cls(model_arch)
+        if not torch.cuda.is_initialized():
+            warnings.warn(
+                "This model no longer initializes CUDA on import. "
+                "Please test using a different model.",
+                stacklevel=2)
diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
@@ -221,7 +221,12 @@ def _try_load_model_cls(model_arch: str) -> Optional[Type[nn.Module]]:
 
     @staticmethod
     def resolve_model_cls(
-            architectures: List[str]) -> Tuple[Type[nn.Module], str]:
+        architectures: Union[str, List[str]], ) -> Tuple[Type[nn.Module], str]:
+        if isinstance(architectures, str):
+            architectures = [architectures]
+        if not architectures:
+            logger.warning("No model architectures are specified")
+
         for arch in architectures:
             model_cls = ModelRegistry._try_load_model_cls(arch)
             if model_cls is not None: