Skip to content

Commit baeec70

Browse files
committed
Hack up de build to use as base-image 🚀
Signed-off-by: Jefferson Fialho <[email protected]>
1 parent 6b500af commit baeec70

File tree

1 file changed

+5
-200
lines changed

1 file changed

+5
-200
lines changed

Dockerfile.ubi

Lines changed: 5 additions & 200 deletions
Original file line numberDiff line numberDiff line change
@@ -1,206 +1,11 @@
1-
## Global Args #################################################################
2-
ARG BASE_UBI_IMAGE_TAG=9.4
3-
ARG PYTHON_VERSION=3.12
4-
5-
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
6-
ARG vllm_fa_cmake_gpu_arches='80-real;90-real'
7-
8-
## Base Layer ##################################################################
9-
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
10-
ARG PYTHON_VERSION
11-
ENV PYTHON_VERSION=${PYTHON_VERSION}
12-
RUN microdnf -y update && microdnf install -y \
13-
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
14-
&& microdnf clean all
15-
16-
WORKDIR /workspace
17-
18-
ENV LANG=C.UTF-8 \
19-
LC_ALL=C.UTF-8
20-
21-
# Some utils for dev purposes - tar required for kubectl cp
22-
RUN microdnf install -y \
23-
which procps findutils tar vim git\
24-
&& microdnf clean all
25-
26-
27-
## Python Installer ############################################################
28-
FROM base as python-install
29-
ARG PYTHON_VERSION
30-
31-
ENV VIRTUAL_ENV=/opt/vllm
32-
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
33-
ENV PYTHON_VERSION=${PYTHON_VERSION}
34-
RUN microdnf install -y \
35-
python${PYTHON_VERSION}-devel && \
36-
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all
37-
38-
39-
## CUDA Base ###################################################################
40-
FROM python-install as cuda-base
41-
42-
RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
43-
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
44-
45-
RUN microdnf install -y \
46-
cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \
47-
microdnf clean all
48-
49-
ENV CUDA_HOME="/usr/local/cuda" \
50-
PATH="${CUDA_HOME}/bin:${PATH}" \
51-
LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
52-
53-
## Python cuda base #################################################################
54-
FROM cuda-base AS python-cuda-base
55-
56-
ENV VIRTUAL_ENV=/opt/vllm
57-
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
58-
59-
# install cuda and common dependencies
60-
RUN --mount=type=cache,target=/root/.cache/pip \
61-
--mount=type=cache,target=/root/.cache/uv \
62-
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
63-
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
64-
uv pip install \
65-
-r requirements-cuda.txt
66-
67-
68-
## Development #################################################################
69-
FROM python-cuda-base AS dev
70-
71-
# install build and runtime dependencies
72-
RUN --mount=type=cache,target=/root/.cache/pip \
73-
--mount=type=cache,target=/root/.cache/uv \
74-
--mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \
75-
--mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \
76-
--mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \
77-
--mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \
78-
--mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \
79-
uv pip install \
80-
-r requirements-cuda.txt \
81-
-r requirements-dev.txt
82-
83-
## Builder #####################################################################
84-
FROM dev AS build
85-
86-
# install build dependencies
87-
RUN --mount=type=cache,target=/root/.cache/pip \
88-
--mount=type=cache,target=/root/.cache/uv \
89-
--mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \
90-
uv pip install -r requirements-build.txt
91-
92-
# install compiler cache to speed up compilation leveraging local or remote caching
93-
# git is required for the cutlass kernels
94-
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all
95-
96-
COPY . .
97-
98-
ARG TORCH_CUDA_ARCH_LIST
99-
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
100-
ARG vllm_fa_cmake_gpu_arches
101-
ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches}
102-
103-
# max jobs used by Ninja to build extensions
104-
ARG max_jobs=2
105-
ENV MAX_JOBS=${max_jobs}
106-
# number of threads used by nvcc
107-
ARG nvcc_threads=8
108-
ENV NVCC_THREADS=$nvcc_threads
109-
# make sure punica kernels are built (for LoRA)
110-
ENV VLLM_INSTALL_PUNICA_KERNELS=1
111-
112-
# Make sure the cuda environment is in the PATH
113-
ENV PATH=/usr/local/cuda/bin:$PATH
114-
115-
ENV CCACHE_DIR=/root/.cache/ccache
116-
RUN --mount=type=cache,target=/root/.cache/ccache \
117-
--mount=type=cache,target=/root/.cache/pip \
118-
--mount=type=cache,target=/root/.cache/uv \
119-
--mount=type=bind,src=.git,target=/workspace/.git \
120-
env CFLAGS="-march=haswell" \
121-
CXXFLAGS="$CFLAGS $CXXFLAGS" \
122-
CMAKE_BUILD_TYPE=Release \
123-
python3 setup.py bdist_wheel --dist-dir=dist
124-
125-
#################### libsodium Build IMAGE ####################
126-
FROM base as libsodium-builder
127-
128-
RUN microdnf install -y gcc gzip \
129-
&& microdnf clean all
130-
131-
WORKDIR /usr/src/libsodium
132-
133-
ARG LIBSODIUM_VERSION=1.0.20
134-
RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \
135-
&& tar -xzvf libsodium*.tar.gz \
136-
&& rm -f libsodium*.tar.gz \
137-
&& mv libsodium*/* ./
138-
139-
RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection"\
140-
./configure --prefix="/usr/" && make -j $MAX_JOBS && make check
141-
142-
## Release #####################################################################
143-
FROM python-install AS vllm-openai
144-
ARG PYTHON_VERSION
145-
146-
WORKDIR /workspace
147-
148-
ENV VIRTUAL_ENV=/opt/vllm
149-
ENV PATH=$VIRTUAL_ENV/bin/:$PATH
150-
151-
# force using the python venv's cuda runtime libraries
152-
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_nvrtc/lib:${LD_LIBRARY_PATH}"
153-
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_runtime/lib:${LD_LIBRARY_PATH}"
154-
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/nvtx/lib:${LD_LIBRARY_PATH}"
155-
156-
# Triton needs a CC compiler
157-
RUN microdnf install -y gcc \
158-
&& microdnf clean all
159-
160-
# install vllm wheel first, so that torch etc will be installed
161-
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
162-
--mount=type=cache,target=/root/.cache/pip \
163-
--mount=type=cache,target=/root/.cache/uv \
164-
uv pip install $(echo dist/*.whl)'[tensorizer]' --verbose
165-
166-
# Install libsodium for Tensorizer encryption
167-
RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \
168-
cd /usr/src/libsodium \
169-
&& make install
170-
171-
RUN --mount=type=cache,target=/root/.cache/pip \
172-
--mount=type=cache,target=/root/.cache/uv \
173-
uv pip install \
174-
"https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu124torch2.4-cp312-cp312-linux_x86_64.whl"
175-
176-
ENV HF_HUB_OFFLINE=1 \
177-
HOME=/home/vllm \
178-
# Allow requested max length to exceed what is extracted from the
179-
# config.json
180-
# see: https://github.com/vllm-project/vllm/pull/7080
181-
VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \
182-
VLLM_USAGE_SOURCE=production-docker-image \
183-
VLLM_WORKER_MULTIPROC_METHOD=fork \
184-
VLLM_NO_USAGE_STATS=1
185-
186-
# setup non-root user for OpenShift
187-
RUN umask 002 \
188-
&& useradd --uid 2000 --gid 0 vllm \
189-
&& chmod g+rwx $HOME /usr/src /workspace
190-
191-
COPY LICENSE /licenses/vllm.md
192-
193-
# Copy only .jinja files from example directory to template directory
194-
COPY examples/*.jinja /app/data/template/
195-
196-
USER 2000
197-
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
198-
199-
200-
FROM vllm-openai as vllm-grpc-adapter
1+
# Start from released image
2+
FROM quay.io/opendatahub/vllm:cuda-pr-182 as vllm-grpc-adapter
2013

2024
USER root
2035

6+
# Copy source code changes into the installed location to overwrite the installed python code
7+
COPY vllm /opt/vllm/lib64/python3.12/site-packages/vllm
8+
2049
# RUN --mount=type=cache,target=/root/.cache/pip \
20510
# pip install vllm-tgis-adapter==0.5.1
20611
RUN --mount=type=cache,target=/root/.cache/pip \

0 commit comments

Comments
 (0)