7
7
import logging
8
8
import os
9
9
import re
10
+ import shutil
10
11
import subprocess
11
12
import sys
12
13
from pathlib import Path
@@ -281,10 +282,81 @@ def run(self):
281
282
self .copy_file (file , dst_file )
282
283
283
284
284
- class repackage_wheel (build_ext ):
285
+ class precompiled_build_ext (build_ext ):
286
+ """Disables extension building when using precompiled binaries."""
287
+
288
+ def run (self ) -> None :
289
+ assert _is_cuda (
290
+ ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
291
+
292
+ def build_extensions (self ) -> None :
293
+ print ("Skipping build_ext: using precompiled extensions." )
294
+ return
295
+
296
+
297
+ class precompiled_wheel_utils :
285
298
"""Extracts libraries and other files from an existing wheel."""
286
299
287
- def get_base_commit_in_main_branch (self ) -> str :
300
+ @staticmethod
301
+ def extract_precompiled_and_patch_package (wheel_url_or_path : str ) -> dict :
302
+ import tempfile
303
+ import zipfile
304
+
305
+ temp_dir = None
306
+ try :
307
+ if not os .path .isfile (wheel_url_or_path ):
308
+ wheel_filename = wheel_url_or_path .split ("/" )[- 1 ]
309
+ temp_dir = tempfile .mkdtemp (prefix = "vllm-wheels" )
310
+ wheel_path = os .path .join (temp_dir , wheel_filename )
311
+ print (f"Downloading wheel from { wheel_url_or_path } "
312
+ f"to { wheel_path } " )
313
+ from urllib .request import urlretrieve
314
+ urlretrieve (wheel_url_or_path , filename = wheel_path )
315
+ else :
316
+ wheel_path = wheel_url_or_path
317
+ print (f"Using existing wheel at { wheel_path } " )
318
+
319
+ package_data_patch = {}
320
+
321
+ with zipfile .ZipFile (wheel_path ) as wheel :
322
+ files_to_copy = [
323
+ "vllm/_C.abi3.so" ,
324
+ "vllm/_moe_C.abi3.so" ,
325
+ "vllm/_flashmla_C.abi3.so" ,
326
+ "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
327
+ "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
328
+ "vllm/cumem_allocator.abi3.so" ,
329
+ ]
330
+
331
+ compiled_regex = re .compile (
332
+ r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
333
+ file_members = list (
334
+ filter (lambda x : x .filename in files_to_copy ,
335
+ wheel .filelist ))
336
+ file_members += list (
337
+ filter (lambda x : compiled_regex .match (x .filename ),
338
+ wheel .filelist ))
339
+
340
+ for file in file_members :
341
+ print (f"[extract] { file .filename } " )
342
+ target_path = os .path .join ("." , file .filename )
343
+ os .makedirs (os .path .dirname (target_path ), exist_ok = True )
344
+ with wheel .open (file .filename ) as src , open (
345
+ target_path , "wb" ) as dst :
346
+ shutil .copyfileobj (src , dst )
347
+
348
+ pkg = os .path .dirname (file .filename ).replace ("/" , "." )
349
+ package_data_patch .setdefault (pkg , []).append (
350
+ os .path .basename (file .filename ))
351
+
352
+ return package_data_patch
353
+ finally :
354
+ if temp_dir is not None :
355
+ print (f"Removing temporary directory { temp_dir } " )
356
+ shutil .rmtree (temp_dir )
357
+
358
+ @staticmethod
359
+ def get_base_commit_in_main_branch () -> str :
288
360
# Force to use the nightly wheel. This is mainly used for CI testing.
289
361
if envs .VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL :
290
362
return "nightly"
@@ -297,6 +369,10 @@ def get_base_commit_in_main_branch(self) -> str:
297
369
]).decode ("utf-8" )
298
370
upstream_main_commit = json .loads (resp_json )["sha" ]
299
371
372
+ # In Docker build context, .git may be immutable or missing.
373
+ if envs .VLLM_DOCKER_BUILD_CONTEXT :
374
+ return upstream_main_commit
375
+
300
376
# Check if the upstream_main_commit exists in the local repo
301
377
try :
302
378
subprocess .check_output (
@@ -329,86 +405,6 @@ def get_base_commit_in_main_branch(self) -> str:
329
405
"wheel may not be compatible with your dev branch: %s" , err )
330
406
return "nightly"
331
407
332
- def run (self ) -> None :
333
- assert _is_cuda (
334
- ), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
335
-
336
- wheel_location = os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
337
- if wheel_location is None :
338
- base_commit = self .get_base_commit_in_main_branch ()
339
- wheel_location = f"https://wheels.vllm.ai/{ base_commit } /vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
340
- # Fallback to nightly wheel if latest commit wheel is unavailable,
341
- # in this rare case, the nightly release CI hasn't finished on main.
342
- if not is_url_available (wheel_location ):
343
- wheel_location = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
344
-
345
- import zipfile
346
-
347
- if os .path .isfile (wheel_location ):
348
- wheel_path = wheel_location
349
- print (f"Using existing wheel={ wheel_path } " )
350
- else :
351
- # Download the wheel from a given URL, assume
352
- # the filename is the last part of the URL
353
- wheel_filename = wheel_location .split ("/" )[- 1 ]
354
-
355
- import tempfile
356
-
357
- # create a temporary directory to store the wheel
358
- temp_dir = tempfile .mkdtemp (prefix = "vllm-wheels" )
359
- wheel_path = os .path .join (temp_dir , wheel_filename )
360
-
361
- print (f"Downloading wheel from { wheel_location } to { wheel_path } " )
362
-
363
- from urllib .request import urlretrieve
364
-
365
- try :
366
- urlretrieve (wheel_location , filename = wheel_path )
367
- except Exception as e :
368
- from setuptools .errors import SetupError
369
-
370
- raise SetupError (
371
- f"Failed to get vLLM wheel from { wheel_location } " ) from e
372
-
373
- with zipfile .ZipFile (wheel_path ) as wheel :
374
- files_to_copy = [
375
- "vllm/_C.abi3.so" ,
376
- "vllm/_moe_C.abi3.so" ,
377
- "vllm/_flashmla_C.abi3.so" ,
378
- "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so" ,
379
- "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so" ,
380
- "vllm/cumem_allocator.abi3.so" ,
381
- # "vllm/_version.py", # not available in nightly wheels yet
382
- ]
383
-
384
- file_members = list (
385
- filter (lambda x : x .filename in files_to_copy , wheel .filelist ))
386
-
387
- # vllm_flash_attn python code:
388
- # Regex from
389
- # `glob.translate('vllm/vllm_flash_attn/**/*.py', recursive=True)`
390
- compiled_regex = re .compile (
391
- r"vllm/vllm_flash_attn/(?:[^/.][^/]*/)*(?!\.)[^/]*\.py" )
392
- file_members += list (
393
- filter (lambda x : compiled_regex .match (x .filename ),
394
- wheel .filelist ))
395
-
396
- for file in file_members :
397
- print (f"Extracting and including { file .filename } "
398
- "from existing wheel" )
399
- package_name = os .path .dirname (file .filename ).replace ("/" , "." )
400
- file_name = os .path .basename (file .filename )
401
-
402
- if package_name not in package_data :
403
- package_data [package_name ] = []
404
-
405
- wheel .extract (file )
406
- if file_name .endswith (".py" ):
407
- # python files shouldn't be added to package_data
408
- continue
409
-
410
- package_data [package_name ].append (file_name )
411
-
412
408
413
409
def _no_device () -> bool :
414
410
return VLLM_TARGET_DEVICE == "empty"
@@ -639,6 +635,29 @@ def _read_requirements(filename: str) -> list[str]:
639
635
]
640
636
}
641
637
638
+ # If using precompiled, extract and patch package_data (in advance of setup)
639
+ if envs .VLLM_USE_PRECOMPILED :
640
+ assert _is_cuda (), "VLLM_USE_PRECOMPILED is only supported for CUDA builds"
641
+ wheel_location = os .getenv ("VLLM_PRECOMPILED_WHEEL_LOCATION" , None )
642
+ if wheel_location is not None :
643
+ wheel_url = wheel_location
644
+ else :
645
+ base_commit = precompiled_wheel_utils .get_base_commit_in_main_branch ()
646
+ wheel_url = f"https://wheels.vllm.ai/{ base_commit } /vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
647
+ from urllib .request import urlopen
648
+ try :
649
+ with urlopen (wheel_url ) as resp :
650
+ if resp .status != 200 :
651
+ wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
652
+ except Exception as e :
653
+ print (f"[warn] Falling back to nightly wheel: { e } " )
654
+ wheel_url = "https://wheels.vllm.ai/nightly/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl"
655
+
656
+ patch = precompiled_wheel_utils .extract_precompiled_and_patch_package (
657
+ wheel_url )
658
+ for pkg , files in patch .items ():
659
+ package_data .setdefault (pkg , []).extend (files )
660
+
642
661
if _no_device ():
643
662
ext_modules = []
644
663
@@ -647,7 +666,7 @@ def _read_requirements(filename: str) -> list[str]:
647
666
else :
648
667
cmdclass = {
649
668
"build_ext" :
650
- repackage_wheel if envs .VLLM_USE_PRECOMPILED else cmake_build_ext
669
+ precompiled_build_ext if envs .VLLM_USE_PRECOMPILED else cmake_build_ext
651
670
}
652
671
653
672
setup (
0 commit comments