Skip to content

Commit 6da1e9c

Browse files
authored
Merge pull request #1741 from containers/vllm-cuda
CUDA vLLM variant
2 parents 1466150 + c26c140 commit 6da1e9c

File tree

5 files changed

+119
-38
lines changed

5 files changed

+119
-38
lines changed
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
ARG PARENT=quay.io/ramalama/cuda:latest
2+
FROM $PARENT
3+
4+
ENV UV_PYTHON_INSTALL_DIR="/opt/uv/python"
5+
ENV VIRTUAL_ENV="/opt/venv"
6+
ENV PATH="$VIRTUAL_ENV/bin:/root/.local/bin:$PATH"
7+
8+
ENV UV_HTTP_TIMEOUT=500
9+
10+
ENV UV_INDEX_STRATEGY="unsafe-best-match"
11+
ENV UV_LINK_MODE="copy"
12+
13+
COPY . /src/ramalama
14+
WORKDIR /src/ramalama
15+
RUN container-images/scripts/build-vllm.sh "cuda"
16+
WORKDIR /
17+

container-images/cuda/Containerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
1-
ARG VERSION=12.8.1
1+
ARG CUDA_VERSION=12.8.1
22
# Base image with CUDA for compilation
3-
FROM docker.io/nvidia/cuda:${VERSION}-devel-ubi9 AS builder
3+
FROM docker.io/nvidia/cuda:${CUDA_VERSION}-devel-ubi9 AS builder
44

55
COPY . /src/ramalama
66
WORKDIR /src/ramalama
77
RUN container-images/scripts/build_llama_and_whisper.sh cuda
88

99
# Final runtime image
10-
FROM docker.io/nvidia/cuda:${VERSION}-runtime-ubi9
10+
FROM docker.io/nvidia/cuda:${CUDA_VERSION}-runtime-ubi9
1111

1212
# Copy the entire installation directory from the builder
1313
COPY --from=builder /tmp/install /usr

container-images/ramalama-vllm/Containerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,6 @@ ENV UV_LINK_MODE="copy"
1212

1313
COPY . /src/ramalama
1414
WORKDIR /src/ramalama
15-
RUN container-images/scripts/build-vllm.sh
15+
RUN container-images/scripts/build-vllm.sh "ramalama"
1616
WORKDIR /
1717

container-images/scripts/build-vllm.sh

Lines changed: 97 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,51 @@ available() {
44
command -v "$1" >/dev/null
55
}
66

7-
install_deps() {
8-
set -eux -o pipefail
7+
is_rhel_based() { # doesn't include openEuler
8+
# shellcheck disable=SC1091
9+
source /etc/os-release
10+
[ "$ID" = "rhel" ] || [ "$ID" = "redhat" ] || [ "$ID" == "centos" ]
11+
}
912

13+
dnf_install_epel() {
14+
local rpm_exclude_list="selinux-policy,container-selinux"
15+
local url="https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm"
16+
dnf reinstall -y "$url" || dnf install -y "$url" --exclude "$rpm_exclude_list"
17+
crb enable # this is in epel-release, can only install epel-release via url
18+
}
19+
20+
add_stream_repo() {
21+
local url="https://mirror.stream.centos.org/9-stream/$1/$uname_m/os/"
22+
dnf config-manager --add-repo "$url"
23+
url="http://mirror.centos.org/centos/RPM-GPG-KEY-CentOS-Official"
24+
local file="/etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-Official"
25+
if [ ! -e $file ]; then
26+
curl --retry 8 --retry-all-errors -o $file "$url"
27+
rpm --import $file
28+
fi
29+
}
30+
31+
rm_non_ubi_repos() {
32+
local dir="/etc/yum.repos.d"
33+
rm -rf $dir/mirror.stream.centos.org_9-stream_* $dir/epel*
34+
}
35+
36+
install_deps() {
1037
if available dnf; then
11-
dnf install -y git curl wget ca-certificates gcc gcc-c++ \
12-
gperftools-libs numactl-devel ffmpeg libSM libXext mesa-libGL jq lsof \
13-
vim numactl
38+
dnf install -y git wget ca-certificates gcc gcc-c++ libSM libXext \
39+
mesa-libGL jq lsof vim numactl
40+
if is_rhel_based; then
41+
add_stream_repo "AppStream"
42+
dnf install -y numactl-devel
43+
rm_non_ubi_repos
44+
45+
dnf_install_epel
46+
dnf install -y gperftools-libs
47+
rm_non_ubi_repos
48+
else
49+
dnf install -y numactl-devel gperftools-libs
50+
fi
51+
1452
dnf -y clean all
1553
rm -rf /var/cache/*dnf*
1654
elif available apt-get; then
@@ -25,25 +63,33 @@ install_deps() {
2563
}
2664

2765
preload_and_ulimit() {
28-
local ld_preload_file="libtcmalloc_minimal.so.4"
29-
local ld_preload_file_1="/usr/lib/$arch-linux-gnu/$ld_preload_file"
30-
local ld_preload_file_2="/usr/lib64/$ld_preload_file"
31-
if [ -e "$ld_preload_file_1" ]; then
32-
ld_preload_file="$ld_preload_file_1"
33-
elif [ -e "$ld_preload_file_2" ]; then
34-
ld_preload_file="$ld_preload_file_2"
66+
if [ "$containerfile" = "ramalama" ]; then
67+
local ld_preload_file="libtcmalloc_minimal.so.4"
68+
local ld_preload_file_1="/usr/lib/$uname_m-linux-gnu/$ld_preload_file"
69+
local ld_preload_file_2="/usr/lib64/$ld_preload_file"
70+
if [ -e "$ld_preload_file_1" ]; then
71+
ld_preload_file="$ld_preload_file_1"
72+
elif [ -e "$ld_preload_file_2" ]; then
73+
ld_preload_file="$ld_preload_file_2"
74+
fi
75+
76+
if [ -e "$ld_preload_file" ]; then
77+
echo "LD_PRELOAD=$ld_preload_file" >> /etc/environment
78+
fi
79+
80+
echo 'ulimit -c 0' >> ~/.bashrc
3581
fi
36-
37-
if [ -e "$ld_preload_file" ]; then
38-
echo "LD_PRELOAD=$ld_preload_file" >> /etc/environment
39-
fi
40-
41-
echo 'ulimit -c 0' >> ~/.bashrc
4282
}
4383

4484
pip_install() {
45-
local url="https://download.pytorch.org/whl/cpu"
46-
uv pip install -v -r "$1" --extra-index-url $url
85+
local url="https://download.pytorch.org/whl"
86+
if [ "$containerfile" = "ramalama" ]; then
87+
url="$url/cpu"
88+
elif [ "$containerfile" = "cuda" ]; then
89+
url="$url/cu$(echo "$CUDA_VERSION" | cut -d. -f1,2 | tr -d '.')"
90+
fi
91+
92+
uv pip install -v -r "$1" --extra-index-url "$url"
4793
}
4894

4995
git_clone_specific_commit() {
@@ -55,33 +101,51 @@ git_clone_specific_commit() {
55101
git reset --hard $commit
56102
}
57103

104+
pip_install_all() {
105+
if [ "$containerfile" = "ramalama" ]; then
106+
pip_install requirements/cpu-build.txt
107+
pip_install requirements/cpu.txt
108+
elif [ "$containerfile" = "cuda" ]; then
109+
pip_install requirements/cuda.txt
110+
fi
111+
}
112+
58113
main() {
59114
set -eux -o pipefail
60115

61-
install_deps
116+
local containerfile=$1
117+
if [ "$containerfile" != "ramalama" ] && [ "$containerfile" != "cuda" ]; then
118+
echo "First argument must be 'ramalama' or 'cuda'. Got: '$containerfile'"
119+
return 1
120+
fi
62121

63-
local arch
64-
arch=$(uname -m)
65-
preload_and_ulimit
122+
local uname_m
123+
uname_m=$(uname -m)
66124

125+
install_deps
126+
preload_and_ulimit
67127
uv venv --python 3.12 --seed "$VIRTUAL_ENV"
68128
uv pip install --upgrade pip
69129

70130
local vllm_url="https://github.com/vllm-project/vllm"
71131
local commit="ac9fb732a5c0b8e671f8c91be8b40148282bb14a"
72132
git_clone_specific_commit
73-
if [ "$arch" == "x86_64" ]; then
74-
export VLLM_CPU_DISABLE_AVX512="0"
75-
export VLLM_CPU_AVX512BF16="0"
76-
export VLLM_CPU_AVX512VNNI="0"
77-
elif [ "$arch" == "aarch64" ]; then
78-
export VLLM_CPU_DISABLE_AVX512="true"
133+
if [ "$containerfile" = "ramalama" ]; then
134+
export VLLM_TARGET_DEVICE="cpu"
135+
if [ "$uname_m" == "x86_64" ]; then
136+
export VLLM_CPU_DISABLE_AVX512="0"
137+
export VLLM_CPU_AVX512BF16="0"
138+
export VLLM_CPU_AVX512VNNI="0"
139+
elif [ "$uname_m" == "aarch64" ]; then
140+
export VLLM_CPU_DISABLE_AVX512="true"
141+
fi
142+
elif [ "$containerfile" = "cuda" ]; then
143+
export VLLM_TARGET_DEVICE="cuda"
79144
fi
80145

81-
pip_install requirements/cpu-build.txt
82-
pip_install requirements/cpu.txt
146+
pip_install_all
147+
MAX_JOBS=2 python3 setup.py install
83148

84-
MAX_JOBS=2 VLLM_TARGET_DEVICE=cpu python3 setup.py install
85149
cd -
86150
rm -rf vllm /root/.cache
87151
}

container-images/scripts/build_llama_and_whisper.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ dnf_install_ffmpeg() {
128128
add_stream_repo "CRB"
129129
fi
130130

131-
if [[ "${ID}" == "openEuler" ]]; then
131+
if [ "${ID}" = "openEuler" ]; then
132132
dnf install -y ffmpeg
133133
else
134134
dnf install -y ffmpeg-free

0 commit comments

Comments
 (0)