Skip to content

Commit d1af8b7

Browse files
authored
enable Docker-aware precompiled wheel setup (vllm-project#22106)
Signed-off-by: dougbtv <[email protected]>
1 parent 68b254d commit d1af8b7

File tree

3 files changed

+116
-95
lines changed

3 files changed

+116
-95
lines changed

docker/Dockerfile

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,16 +210,7 @@ ARG SCCACHE_REGION_NAME=us-west-2
210210
ARG SCCACHE_S3_NO_CREDENTIALS=0
211211

212212
# Flag to control whether to use pre-built vLLM wheels
213-
ARG VLLM_USE_PRECOMPILED
214-
# TODO: in setup.py VLLM_USE_PRECOMPILED is sensitive to truthiness, it will take =0 as "true", this should be fixed
215-
ENV VLLM_USE_PRECOMPILED=""
216-
RUN if [ "${VLLM_USE_PRECOMPILED}" = "1" ]; then \
217-
export VLLM_USE_PRECOMPILED=1 && \
218-
echo "Using precompiled wheels"; \
219-
else \
220-
unset VLLM_USE_PRECOMPILED && \
221-
echo "Leaving VLLM_USE_PRECOMPILED unset to build wheels from source"; \
222-
fi
213+
ARG VLLM_USE_PRECOMPILED=""
223214

224215
# if USE_SCCACHE is set, use sccache to speed up compilation
225216
RUN --mount=type=cache,target=/root/.cache/uv \
@@ -236,6 +227,8 @@ RUN --mount=type=cache,target=/root/.cache/uv \
236227
&& export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \
237228
&& export SCCACHE_IDLE_TIMEOUT=0 \
238229
&& export CMAKE_BUILD_TYPE=Release \
230+
&& export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" \
231+
&& export VLLM_DOCKER_BUILD_CONTEXT=1 \
239232
&& sccache --show-stats \
240233
&& python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 \
241234
&& sccache --show-stats; \
@@ -249,6 +242,8 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
249242
# Clean any existing CMake artifacts
250243
rm -rf .deps && \
251244
mkdir -p .deps && \
245+
export VLLM_USE_PRECOMPILED="${VLLM_USE_PRECOMPILED}" && \
246+
export VLLM_DOCKER_BUILD_CONTEXT=1 && \
252247
python3 setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38; \
253248
fi
254249

setup.py

Lines changed: 102 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import logging
88
import os
99
import re
10+
import shutil
1011
import subprocess
1112
import sys
1213
from pathlib import Path
@@ -281,10 +282,81 @@ def run(self):
281282
self.copy_file(file, dst_file)
282283

283284

284-
class repackage_wheel(build_ext):
285+
class precompiled_build_ext(build_ext):
286+
"""Disables extension building when using precompiled binaries."""
287+
288+
def run(self) -> None:
289+
assert _is_cuda(
290+
), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
291+
292+
def build_extensions(self) -> None:
293+
print("Skipping build_ext: using precompiled extensions.")
294+
return
295+
296+
297+
class precompiled_wheel_utils:
285298
"""Extracts libraries and other files from an existing wheel."""
286299

287-
def get_base_commit_in_main_branch(self) -> str:
300+
@staticmethod
301+
def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict:
302+
import tempfile
303+
import zipfile
304+
305+
temp_dir = None
306+
try:
307+
if not os.path.isfile(wheel_url_or_path):
308+
wheel_filename = wheel_url_or_path.split("/")[-1]
309+
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
310+
wheel_path = os.path.join(temp_dir, wheel_filename)
311+
print(f"Downloading wheel from {wheel_url_or_path} "
312+
f"to {wheel_path}")
313+
from urllib.request import urlretrieve
314+
urlretrieve(wheel_url_or_path, filename=wheel_path)
315+
else:
316+
wheel_path = wheel_url_or_path
317+
print(f"Using existing wheel at {wheel_path}")
318+
319+
package_data_patch = {}
320+
321+
with zipfile.ZipFile(wheel_path) as wheel:
322+
files_to_copy = [
323+
"vllm/_C.abi3.so",
324+
"vllm/_moe_C.abi3.so",
325+
"vllm/_flashmla_C.abi3.so",
326+
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
327+
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
328+
"vllm/cumem_allocator.abi3.so",
329+
]
330+
331+
compiled_regex = re.compile(
332+
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
333+
file_members = list(
334+
filter(lambda x: x.filename in files_to_copy,
335+
wheel.filelist))
336+
file_members += list(
337+
filter(lambda x: compiled_regex.match(x.filename),
338+
wheel.filelist))
339+
340+
for file in file_members:
341+
print(f"[extract] {file.filename}")
342+
target_path = os.path.join(".", file.filename)
343+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
344+
with wheel.open(file.filename) as src, open(
345+
target_path, "wb") as dst:
346+
shutil.copyfileobj(src, dst)
347+
348+
pkg = os.path.dirname(file.filename).replace("/", ".")
349+
package_data_patch.setdefault(pkg, []).append(
350+
os.path.basename(file.filename))
351+
352+
return package_data_patch
353+
finally:
354+
if temp_dir is not None:
355+
print(f"Removing temporary directory {temp_dir}")
356+
shutil.rmtree(temp_dir)
357+
358+
@staticmethod
359+
def get_base_commit_in_main_branch() -> str:
288360
# Force to use the nightly wheel. This is mainly used for CI testing.
289361
if envs.VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL:
290362
return "nightly"
@@ -297,6 +369,10 @@ def get_base_commit_in_main_branch(self) -> str:
297369
]).decode("utf-8")
298370
upstream_main_commit = json.loads(resp_json)["sha"]
299371

372+
# In Docker build context, .git may be immutable or missing.
373+
if envs.VLLM_DOCKER_BUILD_CONTEXT:
374+
return upstream_main_commit
375+
300376
# Check if the upstream_main_commit exists in the local repo
301377
try:
302378
subprocess.check_output(
@@ -329,86 +405,6 @@ def get_base_commit_in_main_branch(self) -> str:
329405
"wheel may not be compatible with your dev branch: %s", err)
330406
return "nightly"
331407

332-
def run(self) -> None:
333-
assert _is_cuda(
334-
), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
335-
336-
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
337-
if wheel_location is None:
338-
base_commit = self.get_base_commit_in_main_branch()
339-
wheel_location = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
340-
# Fallback to nightly wheel if latest commit wheel is unavailable,
341-
# in this rare case, the nightly release CI hasn't finished on main.
342-
if not is_url_available(wheel_location):
343-
wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
344-
345-
import zipfile
346-
347-
if os.path.isfile(wheel_location):
348-
wheel_path = wheel_location
349-
print(f"Using existing wheel={wheel_path}")
350-
else:
351-
# Download the wheel from a given URL, assume
352-
# the filename is the last part of the URL
353-
wheel_filename = wheel_location.split("/")[-1]
354-
355-
import tempfile
356-
357-
# create a temporary directory to store the wheel
358-
temp_dir = tempfile.mkdtemp(prefix="vllm-wheels")
359-
wheel_path = os.path.join(temp_dir, wheel_filename)
360-
361-
print(f"Downloading wheel from {wheel_location} to {wheel_path}")
362-
363-
from urllib.request import urlretrieve
364-
365-
try:
366-
urlretrieve(wheel_location, filename=wheel_path)
367-
except Exception as e:
368-
from setuptools.errors import SetupError
369-
370-
raise SetupError(
371-
f"Failed to get vLLM wheel from {wheel_location}") from e
372-
373-
with zipfile.ZipFile(wheel_path) as wheel:
374-
files_to_copy = [
375-
"vllm/_C.abi3.so",
376-
"vllm/_moe_C.abi3.so",
377-
"vllm/_flashmla_C.abi3.so",
378-
"vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so",
379-
"vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so",
380-
"vllm/cumem_allocator.abi3.so",
381-
# "vllm/_version.py", # not available in nightly wheels yet
382-
]
383-
384-
file_members = list(
385-
filter(lambda x: x.filename in files_to_copy, wheel.filelist))
386-
387-
# vllm_flash_attn python code:
388-
# Regex from
389-
# `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
390-
compiled_regex = re.compile(
391-
r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py")
392-
file_members += list(
393-
filter(lambda x: compiled_regex.match(x.filename),
394-
wheel.filelist))
395-
396-
for file in file_members:
397-
print(f"Extracting and including {file.filename} "
398-
"from existing wheel")
399-
package_name = os.path.dirname(file.filename).replace("/", ".")
400-
file_name = os.path.basename(file.filename)
401-
402-
if package_name not in package_data:
403-
package_data[package_name] = []
404-
405-
wheel.extract(file)
406-
if file_name.endswith(".py"):
407-
# python files shouldn't be added to package_data
408-
continue
409-
410-
package_data[package_name].append(file_name)
411-
412408

413409
def _no_device() -> bool:
414410
return VLLM_TARGET_DEVICE == "empty"
@@ -639,6 +635,29 @@ def _read_requirements(filename: str) -> list[str]:
639635
]
640636
}
641637

638+
# If using precompiled, extract and patch package_data (in advance of setup)
639+
if envs.VLLM_USE_PRECOMPILED:
640+
assert _is_cuda(), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
641+
wheel_location = os.getenv("VLLM_PRECOMPILED_WHEEL_LOCATION", None)
642+
if wheel_location is not None:
643+
wheel_url = wheel_location
644+
else:
645+
base_commit = precompiled_wheel_utils.get_base_commit_in_main_branch()
646+
wheel_url = f"https://wheels.vllm.ai/{base_commit}/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
647+
from urllib.request import urlopen
648+
try:
649+
with urlopen(wheel_url) as resp:
650+
if resp.status != 200:
651+
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
652+
except Exception as e:
653+
print(f"[warn] Falling back to nightly wheel: {e}")
654+
wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
655+
656+
patch = precompiled_wheel_utils.extract_precompiled_and_patch_package(
657+
wheel_url)
658+
for pkg, files in patch.items():
659+
package_data.setdefault(pkg, []).extend(files)
660+
642661
if _no_device():
643662
ext_modules = []
644663

@@ -647,7 +666,7 @@ def _read_requirements(filename: str) -> list[str]:
647666
else:
648667
cmdclass = {
649668
"build_ext":
650-
repackage_wheel if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
669+
precompiled_build_ext if envs.VLLM_USE_PRECOMPILED else cmake_build_ext
651670
}
652671

653672
setup(

vllm/envs.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
MAX_JOBS: Optional[str] = None
7171
NVCC_THREADS: Optional[str] = None
7272
VLLM_USE_PRECOMPILED: bool = False
73+
VLLM_DOCKER_BUILD_CONTEXT: bool = False
7374
VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False
7475
VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False
7576
CMAKE_BUILD_TYPE: Optional[str] = None
@@ -234,8 +235,14 @@ def get_vllm_port() -> Optional[int]:
234235

235236
# If set, vllm will use precompiled binaries (*.so)
236237
"VLLM_USE_PRECOMPILED":
237-
lambda: bool(os.environ.get("VLLM_USE_PRECOMPILED")) or bool(
238-
os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
238+
lambda: os.environ.get("VLLM_USE_PRECOMPILED", "").strip().lower() in
239+
("1", "true") or bool(os.environ.get("VLLM_PRECOMPILED_WHEEL_LOCATION")),
240+
241+
# Used to mark that setup.py is running in a Docker build context,
242+
# in order to force the use of precompiled binaries.
243+
"VLLM_DOCKER_BUILD_CONTEXT":
244+
lambda: os.environ.get("VLLM_DOCKER_BUILD_CONTEXT", "").strip().lower() in
245+
("1", "true"),
239246

240247
# Whether to force using nightly wheel in python build.
241248
# This is used for testing the nightly wheel in python build.

0 commit comments

Comments
 (0)