Skip to content

Commit 117a2ed

Browse files
jikunshangzou3519
authored andcommitted
fix torch.compile issues (vllm-project#204)
* use 2025.1.1 instead (vllm-project#196) Signed-off-by: Kunshang Ji <[email protected]> * Use standalone_compile by default in torch >= 2.8.0 (vllm-project#18846) Signed-off-by: rzou <[email protected]> * fix xpu compile issue --------- Signed-off-by: Kunshang Ji <[email protected]> Signed-off-by: rzou <[email protected]> Co-authored-by: Richard Zou <[email protected]>
1 parent 8c250c9 commit 117a2ed

File tree

4 files changed

+13
-12
lines changed

4 files changed

+13
-12
lines changed

vllm/compilation/backends.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
from vllm.config import CompilationConfig, VllmConfig
1616
from vllm.logger import init_logger
1717
from vllm.platforms import current_platform
18-
from vllm.utils import resolve_obj_by_qualname
18+
from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
1919

2020
from .compiler_interface import (CompilerInterface, EagerAdaptor,
2121
InductorAdaptor, InductorStandaloneAdaptor)
@@ -28,7 +28,9 @@
2828

2929
def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
3030
if compilation_config.use_inductor:
31-
if envs.VLLM_TEST_STANDALONE_COMPILE:
31+
# For XPU 2.8.0.dev wheel, it's lower than 2.8.0 so we change to 2.7.9 here
32+
if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
33+
"2.7.9"):
3234
logger.info("Using InductorStandaloneAdaptor")
3335
return InductorStandaloneAdaptor()
3436
else:

vllm/compilation/compiler_interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
154154
This is not on by default yet, but we plan to turn it on by default for
155155
PyTorch 2.8.
156156
157-
Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off.
157+
Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
158158
"""
159159
name = "inductor_standalone"
160160

vllm/envs.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,10 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
143143

144144
def get_vllm_port() -> Optional[int]:
145145
"""Get the port from VLLM_PORT environment variable.
146-
146+
147147
Returns:
148148
The port number as an integer if VLLM_PORT is set, None otherwise.
149-
149+
150150
Raises:
151151
ValueError: If VLLM_PORT is a URI, suggest k8s service discovery issue.
152152
"""
@@ -301,9 +301,11 @@ def get_vllm_port() -> Optional[int]:
301301
lambda: bool(
302302
os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
303303

304-
# Internal flag to enable/disable Inductor standalone compile
305-
"VLLM_TEST_STANDALONE_COMPILE":
306-
lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0",
304+
# Feature flag to enable/disable Inductor standalone compile.
305+
# In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
306+
# enabled by default.
307+
"VLLM_USE_STANDALONE_COMPILE":
308+
lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
307309

308310
# local rank of the process in the distributed setting, used to determine
309311
# the GPU device id
@@ -889,7 +891,7 @@ def factorize(name: str):
889891
"VLLM_USE_TRITON_AWQ",
890892
"VLLM_DP_RANK",
891893
"VLLM_DP_SIZE",
892-
"VLLM_TEST_STANDALONE_COMPILE",
894+
"VLLM_USE_STANDALONE_COMPILE",
893895
]
894896
for key in environment_variables_to_hash:
895897
if key in environment_variables:

vllm/v1/worker/xpu_worker.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,6 @@ def __init__(
5353
else:
5454
self.profiler = None
5555

56-
def compile_or_warm_up_model(self) -> None:
57-
pass
58-
5956
# we provide this function due to `torch.xpu.mem_get_info()` doesn't
6057
# return correct free_gpu_memory on intel client GPU. We need to
6158
# calculate/estiamte it.

0 commit comments

Comments
 (0)