|
1 |
| -## Global Args ################################################################# |
2 |
| -ARG BASE_UBI_IMAGE_TAG=9.4 |
3 |
| -ARG PYTHON_VERSION=3.12 |
4 |
| - |
5 |
| -ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX" |
6 |
| -ARG vllm_fa_cmake_gpu_arches='80-real;90-real' |
7 |
| - |
8 |
| -## Base Layer ################################################################## |
9 |
| -FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base |
10 |
| -ARG PYTHON_VERSION |
11 |
| -ENV PYTHON_VERSION=${PYTHON_VERSION} |
12 |
| -RUN microdnf -y update && microdnf install -y \ |
13 |
| - python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \ |
14 |
| - && microdnf clean all |
15 |
| - |
16 |
| -WORKDIR /workspace |
17 |
| - |
18 |
| -ENV LANG=C.UTF-8 \ |
19 |
| - LC_ALL=C.UTF-8 |
20 |
| - |
21 |
| -# Some utils for dev purposes - tar required for kubectl cp |
22 |
| -RUN microdnf install -y \ |
23 |
| - which procps findutils tar vim git\ |
24 |
| - && microdnf clean all |
25 |
| - |
26 |
| - |
27 |
| -## Python Installer ############################################################ |
28 |
| -FROM base as python-install |
29 |
| -ARG PYTHON_VERSION |
30 |
| - |
31 |
| -ENV VIRTUAL_ENV=/opt/vllm |
32 |
| -ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
33 |
| -ENV PYTHON_VERSION=${PYTHON_VERSION} |
34 |
| -RUN microdnf install -y \ |
35 |
| - python${PYTHON_VERSION}-devel && \ |
36 |
| - python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && pip install --no-cache -U pip wheel uv && microdnf clean all |
37 |
| - |
38 |
| - |
39 |
| -## CUDA Base ################################################################### |
40 |
| -FROM python-install as cuda-base |
41 |
| - |
42 |
| -RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \ |
43 |
| - https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo |
44 |
| - |
45 |
| -RUN microdnf install -y \ |
46 |
| - cuda-nvcc-12-4 cuda-nvtx-12-4 cuda-libraries-devel-12-4 && \ |
47 |
| - microdnf clean all |
48 |
| - |
49 |
| -ENV CUDA_HOME="/usr/local/cuda" \ |
50 |
| - PATH="${CUDA_HOME}/bin:${PATH}" \ |
51 |
| - LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}" |
52 |
| - |
53 |
| -## Python cuda base ################################################################# |
54 |
| -FROM cuda-base AS python-cuda-base |
55 |
| - |
56 |
| -ENV VIRTUAL_ENV=/opt/vllm |
57 |
| -ENV PATH="$VIRTUAL_ENV/bin:$PATH" |
58 |
| - |
59 |
| -# install cuda and common dependencies |
60 |
| -RUN --mount=type=cache,target=/root/.cache/pip \ |
61 |
| - --mount=type=cache,target=/root/.cache/uv \ |
62 |
| - --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ |
63 |
| - --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ |
64 |
| - uv pip install \ |
65 |
| - -r requirements-cuda.txt |
66 |
| - |
67 |
| - |
68 |
| -## Development ################################################################# |
69 |
| -FROM python-cuda-base AS dev |
70 |
| - |
71 |
| -# install build and runtime dependencies |
72 |
| -RUN --mount=type=cache,target=/root/.cache/pip \ |
73 |
| - --mount=type=cache,target=/root/.cache/uv \ |
74 |
| - --mount=type=bind,source=requirements-common.txt,target=requirements-common.txt \ |
75 |
| - --mount=type=bind,source=requirements-cuda.txt,target=requirements-cuda.txt \ |
76 |
| - --mount=type=bind,source=requirements-dev.txt,target=requirements-dev.txt \ |
77 |
| - --mount=type=bind,source=requirements-lint.txt,target=requirements-lint.txt \ |
78 |
| - --mount=type=bind,source=requirements-test.txt,target=requirements-test.txt \ |
79 |
| - uv pip install \ |
80 |
| - -r requirements-cuda.txt \ |
81 |
| - -r requirements-dev.txt |
82 |
| - |
83 |
| -## Builder ##################################################################### |
84 |
| -FROM dev AS build |
85 |
| - |
86 |
| -# install build dependencies |
87 |
| -RUN --mount=type=cache,target=/root/.cache/pip \ |
88 |
| - --mount=type=cache,target=/root/.cache/uv \ |
89 |
| - --mount=type=bind,source=requirements-build.txt,target=requirements-build.txt \ |
90 |
| - uv pip install -r requirements-build.txt |
91 |
| - |
92 |
| -# install compiler cache to speed up compilation leveraging local or remote caching |
93 |
| -# git is required for the cutlass kernels |
94 |
| -RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y git ccache && microdnf clean all |
95 |
| - |
96 |
| -COPY . . |
97 |
| - |
98 |
| -ARG TORCH_CUDA_ARCH_LIST |
99 |
| -ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST |
100 |
| -ARG vllm_fa_cmake_gpu_arches |
101 |
| -ENV VLLM_FA_CMAKE_GPU_ARCHES=${vllm_fa_cmake_gpu_arches} |
102 |
| - |
103 |
| -# max jobs used by Ninja to build extensions |
104 |
| -ARG max_jobs=2 |
105 |
| -ENV MAX_JOBS=${max_jobs} |
106 |
| -# number of threads used by nvcc |
107 |
| -ARG nvcc_threads=8 |
108 |
| -ENV NVCC_THREADS=$nvcc_threads |
109 |
| -# make sure punica kernels are built (for LoRA) |
110 |
| -ENV VLLM_INSTALL_PUNICA_KERNELS=1 |
111 |
| - |
112 |
| -# Make sure the cuda environment is in the PATH |
113 |
| -ENV PATH=/usr/local/cuda/bin:$PATH |
114 |
| - |
115 |
| -ENV CCACHE_DIR=/root/.cache/ccache |
116 |
| -RUN --mount=type=cache,target=/root/.cache/ccache \ |
117 |
| - --mount=type=cache,target=/root/.cache/pip \ |
118 |
| - --mount=type=cache,target=/root/.cache/uv \ |
119 |
| - --mount=type=bind,src=.git,target=/workspace/.git \ |
120 |
| - env CFLAGS="-march=haswell" \ |
121 |
| - CXXFLAGS="$CFLAGS $CXXFLAGS" \ |
122 |
| - CMAKE_BUILD_TYPE=Release \ |
123 |
| - python3 setup.py bdist_wheel --dist-dir=dist |
124 |
| - |
125 |
| -#################### libsodium Build IMAGE #################### |
126 |
| -FROM base as libsodium-builder |
127 |
| - |
128 |
| -RUN microdnf install -y gcc gzip \ |
129 |
| - && microdnf clean all |
130 |
| - |
131 |
| -WORKDIR /usr/src/libsodium |
132 |
| - |
133 |
| -ARG LIBSODIUM_VERSION=1.0.20 |
134 |
| -RUN curl -LO https://github.com/jedisct1/libsodium/releases/download/${LIBSODIUM_VERSION}-RELEASE/libsodium-${LIBSODIUM_VERSION}.tar.gz \ |
135 |
| - && tar -xzvf libsodium*.tar.gz \ |
136 |
| - && rm -f libsodium*.tar.gz \ |
137 |
| - && mv libsodium*/* ./ |
138 |
| - |
139 |
| -RUN CFLAGS="-O3 -Wall -Werror=format-security -Wno-unused-function -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection"\ |
140 |
| - ./configure --prefix="/usr/" && make -j $MAX_JOBS && make check |
141 |
| - |
142 |
| -## Release ##################################################################### |
143 |
| -FROM python-install AS vllm-openai |
144 |
| -ARG PYTHON_VERSION |
145 |
| - |
146 |
| -WORKDIR /workspace |
147 |
| - |
148 |
| -ENV VIRTUAL_ENV=/opt/vllm |
149 |
| -ENV PATH=$VIRTUAL_ENV/bin/:$PATH |
150 |
| - |
151 |
| -# force using the python venv's cuda runtime libraries |
152 |
| -ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_nvrtc/lib:${LD_LIBRARY_PATH}" |
153 |
| -ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_runtime/lib:${LD_LIBRARY_PATH}" |
154 |
| -ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/nvtx/lib:${LD_LIBRARY_PATH}" |
155 |
| - |
156 |
| -# Triton needs a CC compiler |
157 |
| -RUN microdnf install -y gcc \ |
158 |
| - && microdnf clean all |
159 |
| - |
160 |
| -# install vllm wheel first, so that torch etc will be installed |
161 |
| -RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \ |
162 |
| - --mount=type=cache,target=/root/.cache/pip \ |
163 |
| - --mount=type=cache,target=/root/.cache/uv \ |
164 |
| - uv pip install $(echo dist/*.whl)'[tensorizer]' --verbose |
165 |
| - |
166 |
| -# Install libsodium for Tensorizer encryption |
167 |
| -RUN --mount=type=bind,from=libsodium-builder,src=/usr/src/libsodium,target=/usr/src/libsodium \ |
168 |
| - cd /usr/src/libsodium \ |
169 |
| - && make install |
170 |
| - |
171 |
| -RUN --mount=type=cache,target=/root/.cache/pip \ |
172 |
| - --mount=type=cache,target=/root/.cache/uv \ |
173 |
| - uv pip install \ |
174 |
| - "https://github.com/flashinfer-ai/flashinfer/releases/download/v0.1.6/flashinfer-0.1.6+cu124torch2.4-cp312-cp312-linux_x86_64.whl" |
175 |
| - |
176 |
| -ENV HF_HUB_OFFLINE=1 \ |
177 |
| - HOME=/home/vllm \ |
178 |
| - # Allow requested max length to exceed what is extracted from the |
179 |
| - # config.json |
180 |
| - # see: https://github.com/vllm-project/vllm/pull/7080 |
181 |
| - VLLM_ALLOW_LONG_MAX_MODEL_LEN=1 \ |
182 |
| - VLLM_USAGE_SOURCE=production-docker-image \ |
183 |
| - VLLM_WORKER_MULTIPROC_METHOD=fork \ |
184 |
| - VLLM_NO_USAGE_STATS=1 |
185 |
| - |
186 |
| -# setup non-root user for OpenShift |
187 |
| -RUN umask 002 \ |
188 |
| - && useradd --uid 2000 --gid 0 vllm \ |
189 |
| - && chmod g+rwx $HOME /usr/src /workspace |
190 |
| - |
191 |
| -COPY LICENSE /licenses/vllm.md |
192 |
| - |
193 |
| -# Copy only .jinja files from example directory to template directory |
194 |
| -COPY examples/*.jinja /app/data/template/ |
195 |
| - |
196 |
| -USER 2000 |
197 |
| -ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] |
198 |
| - |
199 |
| - |
200 |
| -FROM vllm-openai as vllm-grpc-adapter |
| 1 | +# Start from released image |
| 2 | +FROM quay.io/opendatahub/vllm:cuda-pr-182 as vllm-grpc-adapter |
201 | 3 |
|
202 | 4 | USER root
|
203 | 5 |
|
| 6 | +# Copy source code changes into the installed location to overwrite the installed python code |
| 7 | +COPY vllm /opt/vllm/lib64/python3.12/site-packages/vllm |
| 8 | + |
204 | 9 | # RUN --mount=type=cache,target=/root/.cache/pip \
|
205 | 10 | # pip install vllm-tgis-adapter==0.5.1
|
206 | 11 | RUN --mount=type=cache,target=/root/.cache/pip \
|
|
0 commit comments