Skip to content

Commit 87c8a20

Browse files
committed
[lmi] Upgrade lmi dockerfile for 0.29.0 release
1 parent 0ff540b commit 87c8a20

File tree

3 files changed

+22
-21
lines changed

3 files changed

+22
-21
lines changed

engines/python/setup/djl_python/properties_manager/lmi_dist_rb_properties.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,13 @@
2020

2121
class LmiDistQuantizeMethods(str, Enum):
2222
awq = 'awq'
23+
deepspeedfp = 'deepspeedfp'
24+
fp8 = 'fp8'
2325
gptq = 'gptq'
26+
gptq_marlin = 'gptq_marlin'
27+
gptq_marlin_24 = 'gptq_marlin_24'
28+
marlin = 'marlin'
2429
squeezellm = 'squeezellm'
25-
deepspeedfp = 'deepspeedfp'
2630

2731

2832
class LmiDistRbProperties(Properties):

engines/python/setup/djl_python/properties_manager/vllm_rb_properties.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,13 @@
2020

2121
class VllmQuantizeMethods(str, Enum):
2222
awq = 'awq'
23+
deepspeedfp = 'deepspeedfp'
24+
fp8 = 'fp8'
2325
gptq = 'gptq'
26+
gptq_marlin = 'gptq_marlin'
27+
gptq_marlin_24 = 'gptq_marlin_24'
28+
marlin = 'marlin'
2429
squeezellm = 'squeezellm'
25-
deepspeedfp = 'deepspeedfp'
2630

2731

2832
class VllmRbProperties(Properties):

serving/docker/lmi.Dockerfile

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@ ARG torch_version=2.3.0
1919
ARG torch_vision_version=0.18.0
2020
ARG onnx_version=1.18.0
2121
ARG onnxruntime_wheel="https://publish.djl.ai/onnxruntime/1.18.0/onnxruntime_gpu-1.18.0-cp310-cp310-linux_x86_64.whl"
22-
ARG pydantic_version=2.7.1
22+
ARG pydantic_version=2.8.2
2323
ARG djl_converter_wheel="https://publish.djl.ai/djl_converter/djl_converter-0.28.0-py3-none-any.whl"
24-
ARG vllm_cuda_name="cu12"
25-
ARG vllm_nccl_version=2.18.1
2624
# HF Deps
2725
ARG protobuf_version=3.20.3
28-
ARG transformers_version=4.41.1
29-
ARG accelerate_version=0.30.1
26+
ARG transformers_version=4.42.3
27+
ARG accelerate_version=0.32.1
3028
ARG bitsandbytes_version=0.43.1
31-
ARG optimum_version=1.20.0
29+
ARG optimum_version=1.21.2
3230
ARG auto_gptq_version=0.7.1
33-
ARG datasets_version=2.19.1
31+
ARG datasets_version=2.20.0
3432
ARG autoawq_version=0.2.5
33+
ARG tokenizers_version=0.19.1
3534
# LMI-Dist Deps
36-
ARG vllm_wheel="https://publish.djl.ai/vllm/cu124-pt230/vllm-0.4.2%2Bcu124-cp310-cp310-linux_x86_64.whl"
37-
ARG flash_attn_2_wheel="https://publish.djl.ai/flash_attn/cu124-pt230/flash_attn-2.5.8-cp310-cp310-linux_x86_64.whl"
35+
ARG vllm_wheel="https://github.com/vllm-project/vllm/releases/download/v0.5.0.post1/vllm-0.5.0.post1-cp310-cp310-manylinux1_x86_64.whl"
36+
ARG flash_attn_2_wheel="https://github.com/vllm-project/flash-attention/releases/download/v2.5.9/vllm_flash_attn-2.5.9-cp310-cp310-manylinux1_x86_64.whl"
37+
ARG flash_infer_wheel="https://github.com/flashinfer-ai/flashinfer/releases/download/v0.0.8/flashinfer-0.0.8+cu121torch2.3-cp310-cp310-linux_x86_64.whl"
3838
# %2B is the url escape for the '+' character
3939
ARG lmi_dist_wheel="https://publish.djl.ai/lmi_dist/lmi_dist-11.0.0%2Bnightly-py3-none-any.whl"
4040
ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1.0-py3-none-any.whl"
@@ -57,7 +57,6 @@ ENV PYTORCH_PRECXX11=true
5757
ENV PYTORCH_VERSION=${torch_version}
5858
ENV PYTORCH_FLAVOR=cu121-precxx11
5959
ENV VLLM_NO_USAGE_STATS=1
60-
ENV VLLM_CONFIG_ROOT=/opt/djl/vllm/.config
6160

6261

6362
ENV HF_HOME=/tmp/.cache/huggingface
@@ -102,23 +101,17 @@ RUN pip3 install torch==${torch_version} torchvision==${torch_vision_version} --
102101
transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \
103102
mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} bitsandbytes==${bitsandbytes_version} \
104103
optimum==${optimum_version} auto-gptq==${auto_gptq_version} pandas pyarrow jinja2 \
105-
opencv-contrib-python-headless safetensors scipy onnx sentence_transformers ${onnxruntime_wheel} autoawq==${autoawq_version} && \
104+
opencv-contrib-python-headless safetensors scipy onnx sentence_transformers ${onnxruntime_wheel} autoawq==${autoawq_version} \
105+
tokenizers==${tokenizers_version} pydantic==${pydantic_version} && \
106106
pip3 install ${djl_converter_wheel} --no-deps && \
107107
pip3 cache purge
108108

109-
RUN pip3 install ${flash_attn_2_wheel} ${lmi_dist_wheel} ${vllm_wheel} pydantic==${pydantic_version} && \
109+
RUN pip3 install ${flash_attn_2_wheel} ${lmi_dist_wheel} ${vllm_wheel} ${flash_infer_wheel} && \
110110
pip3 cache purge
111111

112112
# Add CUDA-Compat
113113
RUN apt-get update && apt-get install -y cuda-compat-12-4 && apt-get clean -y && rm -rf /var/lib/apt/lists/*
114114

115-
# We use the same NCCL version as vLLM for lmi-dist https://github.com/vllm-project/vllm/blob/v0.4.2/vllm/utils.py#L641-L646
116-
# This is due to https://github.com/vllm-project/vllm/blob/v0.4.2/vllm/distributed/device_communicators/pynccl.py#L1-L9
117-
RUN mkdir -p ${VLLM_CONFIG_ROOT}/vllm/nccl/$vllm_cuda_name/ && curl -L -o ${VLLM_CONFIG_ROOT}/vllm/nccl/$vllm_cuda_name/libnccl.so.$vllm_nccl_version \
118-
https://github.com/vllm-project/vllm-nccl/releases/download/v0.1.0/$vllm_cuda_name-libnccl.so.$vllm_nccl_version && \
119-
# The following is done only so that we can run the CI with `-u djl`. Sagemaker wouldn't require this.
120-
chmod -R a+w ${VLLM_CONFIG_ROOT}
121-
122115
RUN scripts/patch_oss_dlc.sh python && \
123116
scripts/security_patch.sh lmi && \
124117
useradd -m -d /home/djl djl && \

0 commit comments

Comments
 (0)