Skip to content

Commit bc59bd3

Browse files
committed
Revert "Hack up de build to use as base-image 🚀"
This reverts commit baeec70. Signed-off-by: Jefferson Fialho <[email protected]>
1 parent 1297cc8 commit bc59bd3

File tree

1 file changed

+200
-5
lines changed

1 file changed

+200
-5
lines changed

Dockerfile.ubi

Lines changed: 200 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,205 @@
1-
# Start from released image
2-
FROM quay.io/opendatahub/vllm:cuda-pr-182 as vllm-grpc-adapter
1+
## Global Args #################################################################
2+
ARG BASE_UBI_IMAGE_TAG=9.4
3+
ARG PYTHON_VERSION=3.12
34

4-
USER root
5+
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
6+
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
7+
8+
## Base Layer ##################################################################
9+
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
10+
ARG PYTHON_VERSION
11+
ENV PYTHON_VERSION=${PYTHON_VERSION}
12+
RUN microdnf -y update && microdnf install -y \
13+
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
14+
&& microdnf clean all
15+
16+
WORKDIR /workspace
17+
18+
ENV LANG=C.UTF-8 \
19+
LC_ALL=C.UTF-8
20+
21+
# Some utils for dev purposes - tar required for kubectl cp
22+
RUN microdnf install -y \
23+
which procps findutils tar vim git\
24+
&& microdnf clean all
25+
26+
27+
## Python Installer ############################################################
28+
FROM base as python-install
29+
ARG PYTHON_VERSION
30+
31+
ENV VIRTUAL_ENV=/opt/vllm
32+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
33+
ENV PYTHON_VERSION=${PYTHON_VERSION}
34+
RUN microdnf install -y \
35+
python${PYTHON_VERSION}-devel && \
36+
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all
37+
38+
39+
## CUDA Base ###################################################################
40+
FROM python-install as cuda-base
41+
42+
RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
43+
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
44+
45+
RUN microdnf install -y \
46+
cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
47+
microdnf clean all
48+
49+
ENV CUDA_HOME="/usr/local/cuda" \
50+
PATH="${CUDA_HOME}/bin:${PATH}" \
51+
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
52+
53+
## Python cuda base #################################################################
54+
FROM cuda-base AS python-cuda-base
55+
56+
ENV VIRTUAL_ENV=/opt/vllm
57+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
58+
59+
# install cuda and common dependencies
60+
RUN --mount=type=cache,target=/root/.cache/pip \
61+
--mount=type=cache,target=/root/.cache/uv \
62+
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
63+
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
64+
uv pip install \
65+
-r requirements-cuda.txt
66+
67+
68+
## Development #################################################################
69+
FROM python-cuda-base AS dev
70+
71+
# install build and runtime dependencies
72+
RUN --mount=type=cache,target=/root/.cache/pip \
73+
--mount=type=cache,target=/root/.cache/uv \
74+
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
75+
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
76+
--mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
77+
--mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \
78+
--mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \
79+
uv pip install \
80+
-r requirements-cuda.txt \
81+
-r requirements-dev.txt
82+
83+
## Builder #####################################################################
84+
FROM dev AS build
85+
86+
# install build dependencies
87+
RUN --mount=type=cache,target=/root/.cache/pip \
88+
--mount=type=cache,target=/root/.cache/uv \
89+
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
90+
uv pip install -r requirements-build.txt
91+
92+
# install compiler cache to speed up compilation leveraging local or remote caching
93+
# git is required for the cutlass kernels
94+
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all
95+
96+
COPY . .
97+
98+
ARG TORCH_CUDA_ARCH_LIST
99+
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
100+
ARG vllm_fa_cmake_gpu_arches
101+
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
5102

6-
# Copy source code changes into the installed location to overwrite the installed python code
7-
COPY vllm /opt/vllm/lib64/python3.12/site-packages/vllm
103+
# max jobs used by Ninja to build extensions
104+
ARG max_jobs=2
105+
ENV MAX_JOBS=${max_jobs}
106+
# number of threads used by nvcc
107+
ARG nvcc_threads=8
108+
ENV NVCC_THREADS=$nvcc_threads
109+
# make sure punica kernels are built (for LoRA)
110+
ENV VLLM_INSTALL_PUNICA_KERNELS=1
111+
112+
# Make sure the cuda environment is in the PATH
113+
ENV PATH=/usr/local/cuda/bin:$PATH
114+
115+
ENV CCACHE_DIR=/root/.cache/ccache
116+
RUN --mount=type=cache,target=/root/.cache/ccache \
117+
--mount=type=cache,target=/root/.cache/pip \
118+
--mount=type=cache,target=/root/.cache/uv \
119+
--mount=type=bind,src=.git,target=/workspace/.git \
120+
env CFLAGS="-march=haswell" \
121+
CXXFLAGS="$CFLAGS $CXXFLAGS" \
122+
CMAKE_BUILD_TYPE=Release \
123+
python3 setup.py bdist_wheel --dist-dir=dist
124+
125+
#################### libsodium Build IMAGE ####################
126+
FROM base as libsodium-builder
127+
128+
RUN microdnf install -y gcc gzip \
129+
&& microdnf clean all
130+
131+
WORKDIR /usr/src/libsodium
132+
133+
ARG LIBSODIUM_VERSION=1.0.20
134+
RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \
135+
&& tar -xzvf libsodium*.tar.gz \
136+
&& rm -f libsodium*.tar.gz \
137+
&& mv libsodium*/* ./
138+
139+
RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection"\
140+
./configure --prefix="/usr/" && make -j $MAX_JOBS && make check
141+
142+
## Release #####################################################################
143+
FROM python-install AS vllm-openai
144+
ARG PYTHON_VERSION
145+
146+
WORKDIR /workspace
147+
148+
ENV VIRTUAL_ENV=/opt/vllm
149+
ENV PATH=$VIRTUAL_ENV/bin/:$PATH
150+
151+
# force using the python venv's cuda runtime libraries
152+
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_nvrtc/lib:${LD_LIBRARY_PATH}"
153+
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_runtime/lib:${LD_LIBRARY_PATH}"
154+
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/nvtx/lib:${LD_LIBRARY_PATH}"
155+
156+
# Triton needs a CC compiler
157+
RUN microdnf install -y gcc \
158+
&& microdnf clean all
159+
160+
# install vllm wheel first, so that torch etc will be installed
161+
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
162+
--mount=type=cache,target=/root/.cache/pip \
163+
--mount=type=cache,target=/root/.cache/uv \
164+
uv pip install $(echo dist/*.whl)'[tensorizer]' --verbose
165+
166+
# Install libsodium for Tensorizer encryption
167+
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
168+
cd /usr/src/libsodium \
169+
&& make install
170+
171+
RUN --mount=type=cache,target=/root/.cache/pip \
172+
--mount=type=cache,target=/root/.cache/uv \
173+
uv pip install \
174+
"https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu124torch2.4-cp312-cp312-linux_x86_64.whl"
175+
176+
ENV HF_HUB_OFFLINE=1 \
177+
HOME=/home/vllm \
178+
# Allow requested max length to exceed what is extracted from the
179+
# config.json
180+
# see: https://github.com/vllm-project/vllm/pull/7080
181+
VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
182+
VLLM_USAGE_SOURCE=production-docker-image \
183+
VLLM_WORKER_MULTIPROC_METHOD=fork \
184+
VLLM_NO_USAGE_STATS=1
185+
186+
# setup non-root user for OpenShift
187+
RUN umask 002 \
188+
&& useradd --uid 2000 --gid 0 vllm \
189+
&& chmod g+rwx $HOME /usr/src /workspace
190+
191+
COPY LICENSE /licenses/vllm.md
192+
193+
# Copy only .jinja files from example directory to template directory
194+
COPY examples/*.jinja /app/data/template/
195+
196+
USER 2000
197+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
198+
199+
200+
FROM vllm-openai as vllm-grpc-adapter
201+
202+
USER root
8203

9204
# RUN --mount=type=cache,target=/root/.cache/pip \
10205
# pip install vllm-tgis-adapter==0.5.1

0 commit comments

Comments
 (0)