Add vllm to cpu inferencing Containerfile

ericcurtin · ericcurtin · commit fd8006cc7557 · 2025-07-17T14:08:36.000+01:00
To be built upon "ramalama" image

Signed-off-by: Eric Curtin &lt;ecurtin@redhat.com&gt;
diff --git a/container-images/ramalama-vllm/Containerfile b/container-images/ramalama-vllm/Containerfile
@@ -0,0 +1,101 @@
+FROM quay.io/ramalama/ramalama
+
+WORKDIR /workspace/
+
+ENV PATH="/root/.local/bin:$PATH"
+ENV VIRTUAL_ENV="/opt/venv"
+ENV UV_PYTHON_INSTALL_DIR="/opt/uv/python"
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+
+ENV UV_HTTP_TIMEOUT=500
+
+ENV UV_INDEX_STRATEGY="unsafe-best-match"
+ENV UV_LINK_MODE="copy"
+
+RUN <<EOF
+#!/bin/bash
+
+available() {
+  command -v "$1" >/dev/null
+}
+
+main() {
+  set -eux -o pipefail
+
+  if available dnf; then
+    dnf install -y git curl wget ca-certificates gcc gcc-c++ \
+      gperftools-libs numactl-devel ffmpeg libSM libXext mesa-libGL jq lsof \
+      vim numactl
+    dnf -y clean all
+    rm -rf /var/cache/*dnf*
+  elif available apt-get; then
+    apt-get update -y
+    apt-get install -y --no-install-recommends git curl wget ca-certificates \
+      gcc g++ libtcmalloc-minimal4 libnuma-dev ffmpeg libsm6 libxext6 libgl1 \
+      jq lsof vim numactl
+    rm -rf /var/lib/apt/lists/*
+  fi
+
+  curl -LsSf https://astral.sh/uv/0.7.21/install.sh | bash
+}
+
+main "$@"
+EOF
+
+RUN <<EOF
+#!/bin/bash
+
+preload_and_ulimit() {
+  local ld_preload_file="libtcmalloc_minimal.so.4"
+  local ld_preload_file_1="/usr/lib/$arch-linux-gnu/$ld_preload_file"
+  local ld_preload_file_2="/usr/lib64/$ld_preload_file"
+  if [ -e "$ld_preload_file1" ]; then
+    ld_preload_file="$ld_preload_file1"
+  elif [ -e "$ld_preload_file2" ]; then
+    ld_preload_file="$ld_preload_file2"
+  fi
+
+  if [ -e "$ld_preload_file" ]; then
+    echo "LD_PRELOAD=$ld_preload_file" >> /etc/environment
+  fi
+
+  echo 'ulimit -c 0' >> ~/.bashrc
+}
+
+pip_install() {
+  local url="https://download.pytorch.org/whl/cpu"
+  uv pip install -v -r $1 --extra-index-url $url
+}
+
+main() {
+  set -eux -o pipefail
+
+  local arch=$(uname -m)
+  preload_and_ulimit
+
+  uv venv --python 3.12 --seed ${VIRTUAL_ENV}
+  uv pip install --upgrade pip
+
+  local v_vn="0.9.2"
+  local vllm_url="https://github.com/vllm-project/vllm"
+  curl -LsSf $vllm_url/releases/download/v$v_vn/vllm-$v_vn.tar.gz | tar xz
+  cd vllm-$v_vn
+  if [ "$arch" == "x86_64" ]; then
+    export VLLM_CPU_DISABLE_AVX512="0"
+    export VLLM_CPU_AVX512BF16="0"
+    export VLLM_CPU_AVX512VNNI="0"
+  elif [ "$arch" == "aarch64" ]; then
+    export VLLM_CPU_DISABLE_AVX512="true"
+  fi
+
+  pip_install requirements/cpu-build.txt
+  pip_install requirements/cpu.txt
+
+  MAX_JOBS=2 VLLM_TARGET_DEVICE=cpu python3 setup.py install
+  cd -
+  rm -rf vllm-$v_vn /root/.cache
+}
+
+main "$@"
+EOF
+