huggingface
diff --git a/‎Dockerfile
Lines changed: 4 additions & 1 deletion b/‎Dockerfile
Lines changed: 4 additions & 1 deletion
diff --git a/‎docs/source/supported_models.md
Lines changed: 1 addition & 0 deletions b/‎docs/source/supported_models.md
Lines changed: 1 addition & 0 deletions
@@ -65,7 +65,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-ins
 COPY --from=ghcr.io/astral-sh/uv:0.5.31 /uv /uvx /bin/
 ENV PATH="$PATH:/root/.local/bin"
 RUN uv python install ${PYTHON_VERSION}
-RUN uv venv --python ${PYTHON_VERSION} && uv pip install torch==${PYTORCH_VERSION} pip setuptools packaging
+RUN uv venv --python ${PYTHON_VERSION} && uv pip install torch==${PYTORCH_VERSION} torchvision pip setuptools packaging
 ENV VIRTUAL_ENV=/usr/src/.venv/
 ENV PATH="$PATH:/usr/src/.venv/bin/"
 
@@ -193,6 +193,9 @@ RUN cd server && \
     pwd && \
     text-generation-server --help
 
+# This shouldn't be necessary.
+# RUN uv pip install torchvision --no-deps
+
 # Copy build artifacts from flash attention builder
 COPY --from=flash-att-builder /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages
 COPY --from=flash-att-builder /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages
 
@@ -9,6 +9,7 @@ Text Generation Inference enables serving optimized models. The following sectio
 - [Idefics 3](https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3) (Multimodal)
 - [Llava Next (1.6)](https://huggingface.co/llava-hf/llava-v1.6-vicuna-13b-hf) (Multimodal)
 - [Llama](https://huggingface.co/collections/meta-llama/llama-31-669fc079a0c406a149a5738f)
+- [Llama4](https://huggingface.co/collections/meta-llama/llama-31-669fc079a0c406a149a5738f)
 - [Phi 3](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct)
 - [Granite](https://huggingface.co/ibm-granite/granite-3.0-8b-instruct)
 - [Gemma](https://huggingface.co/google/gemma-7b)