Skip to content

Commit ec26191

Browse files
committed
Use standalone_compile by default in torch >= 2.8.0
This includes the current PyTorch nightlies. It also renames the VLLM_TEST_STANDALONE_COMPILE envvar to VLLM_USE_STANDALONE_COMPILE to make it clearer. Test Plan: - in #17057, I verified that running https://gist.github.com/zou3519/aebb622714e80f4cd4c369472f2372cd with or without VLLM_TEST_STANDALONE_COMPILE resulted in Inductor producing the same exact output code (via tlparse). I did this for the cold-start case and the warm start case. - there are vllm x torch nightly tests in CI that I will trigger on this PR. Signed-off-by: rzou <[email protected]>
1 parent b169d5f commit ec26191

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

vllm/compilation/backends.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from vllm.config import CompilationConfig, VllmConfig
1717
from vllm.logger import init_logger
1818
from vllm.platforms import current_platform
19-
from vllm.utils import resolve_obj_by_qualname
19+
from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
2020

2121
from .compiler_interface import (CompilerInterface, EagerAdaptor,
2222
InductorAdaptor, InductorStandaloneAdaptor)
@@ -29,7 +29,8 @@
2929

3030
def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface:
3131
if compilation_config.use_inductor:
32-
if envs.VLLM_TEST_STANDALONE_COMPILE:
32+
if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer(
33+
"2.8.0"):
3334
logger.info("Using InductorStandaloneAdaptor")
3435
return InductorStandaloneAdaptor()
3536
else:

vllm/compilation/compiler_interface.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ class InductorStandaloneAdaptor(CompilerInterface):
155155
This is not on by default yet, but we plan to turn it on by default for
156156
PyTorch 2.8.
157157
158-
Use VLLM_TEST_STANDALONE_COMPILE to toggle this on or off.
158+
Use VLLM_USE_STANDALONE_COMPILE to toggle this on or off.
159159
"""
160160
name = "inductor_standalone"
161161

vllm/envs.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,10 +142,10 @@ def maybe_convert_int(value: Optional[str]) -> Optional[int]:
142142

143143
def get_vllm_port() -> Optional[int]:
144144
"""Get the port from VLLM_PORT environment variable.
145-
145+
146146
Returns:
147147
The port number as an integer if VLLM_PORT is set, None otherwise.
148-
148+
149149
Raises:
150150
ValueError: If VLLM_PORT is a URI, suggest k8s service discovery issue.
151151
"""
@@ -300,9 +300,11 @@ def get_vllm_port() -> Optional[int]:
300300
lambda: bool(
301301
os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"),
302302

303-
# Internal flag to enable/disable Inductor standalone compile
304-
"VLLM_TEST_STANDALONE_COMPILE":
305-
lambda: os.environ.get("VLLM_TEST_STANDALONE_COMPILE", "0") != "0",
303+
# Feature flag to enable/disable Inductor standalone compile.
304+
# In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is
305+
# enabled by default.
306+
"VLLM_USE_STANDALONE_COMPILE":
307+
lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1",
306308

307309
# local rank of the process in the distributed setting, used to determine
308310
# the GPU device id
@@ -884,7 +886,7 @@ def factorize(name: str):
884886
"VLLM_USE_TRITON_AWQ",
885887
"VLLM_DP_RANK",
886888
"VLLM_DP_SIZE",
887-
"VLLM_TEST_STANDALONE_COMPILE",
889+
"VLLM_USE_STANDALONE_COMPILE",
888890
]
889891
for key in environment_variables_to_hash:
890892
if key in environment_variables:

0 commit comments

Comments
 (0)