fix torch.compile issues (vllm-project#204)

jikunshang · zou3519 · chaojun-zhang · commit 117a2ed7bd3d · 2025-06-17T11:02:13.000+08:00
* use 2025.1.1 instead (vllm-project#196) Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> * Use standalone_compile by default in torch >= 2.8.0 (vllm-project#18846) Signed-off-by: rzou <zou3519@gmail.com> * fix xpu compile issue --------- Signed-off-by: Kunshang Ji <kunshang.ji@intel.com> Signed-off-by: rzou <zou3519@gmail.com> Co-authored-by: Richard Zou <zou3519@users.noreply.github.com>
diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
@@ -15,7 +15,7 @@
 from vllm.config import CompilationConfig, VllmConfig
 from vllm.logger import init_logger
 from vllm.platforms import current_platform
-from vllm.utils import resolve_obj_by_qualname
+from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
 
 from .compiler_interface import (CompilerInterface, EagerAdaptor,
                                  InductorAdaptor, InductorStandaloneAdaptor)
@@ -28,7 +28,9 @@
 
 def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
     if compilation_config.use_inductor:
-        if envs.VLLM_TEST_STANDALONE_COMPILE:
+        # For XPU 2.8.0.dev wheel, it's lower than 2.8.0 so we change to 2.7.9 here
+        if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
+                "2.7.9"):
             logger.info("Using InductorStandaloneAdaptor")
             return InductorStandaloneAdaptor()
         else:
diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py
@@ -154,7 +154,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
     This is not on by default yet, but we plan to turn it on by default for
     PyTorch 2.8.
 
-    Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off.
+    Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
     """
     name = "inductor_standalone"
 
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -143,10 +143,10 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
 
 def get_vllm_port() -> Optional[int]:
     """Get the port from VLLM_PORT environment variable.
-    
+
     Returns:
         The port number as an integer if VLLM_PORT is set, None otherwise.
-        
+
     Raises:
         ValueError: If VLLM_PORT is a URI, suggest k8s service discovery issue.
     """
@@ -301,9 +301,11 @@ def get_vllm_port() -> Optional[int]:
     lambda: bool(
         os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
 
-    # Internal flag to enable/disable Inductor standalone compile
-    "VLLM_TEST_STANDALONE_COMPILE":
-    lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0",
+    # Feature flag to enable/disable Inductor standalone compile.
+    # In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
+    # enabled by default.
+    "VLLM_USE_STANDALONE_COMPILE":
+    lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
 
     # local rank of the process in the distributed setting, used to determine
     # the GPU device id
@@ -889,7 +891,7 @@ def factorize(name: str):
         "VLLM_USE_TRITON_AWQ",
         "VLLM_DP_RANK",
         "VLLM_DP_SIZE",
-        "VLLM_TEST_STANDALONE_COMPILE",
+        "VLLM_USE_STANDALONE_COMPILE",
     ]
     for key in environment_variables_to_hash:
         if key in environment_variables:
diff --git a/vllm/v1/worker/xpu_worker.py b/vllm/v1/worker/xpu_worker.py
@@ -53,9 +53,6 @@ def __init__(
         else:
             self.profiler = None
 
-    def compile_or_warm_up_model(self) -> None:
-        pass
-
     # we provide this function due to `torch.xpu.mem_get_info()` doesn't
     # return correct free_gpu_memory on intel client GPU. We need to
     # calculate/estiamte it.