Skip to content

Commit 1b44916

Browse files
committed
add konflux dockerfile
1 parent 9ee9522 commit 1b44916

File tree

1 file changed

+211
-0
lines changed

1 file changed

+211
-0
lines changed

Dockerfile.konflux.cuda

Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
## Global Args #################################################################
2+
ARG BASE_UBI_IMAGE_TAG=9.5-1742914212
3+
ARG PYTHON_VERSION=3.12
4+
ARG VLLM_VERSION="v0.10.0.2"
5+
ARG VLLM_TGIS_ADAPTER_VERSION="0.8.0"
6+
ARG TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX"
7+
ARG max_jobs=2
8+
ARG nvcc_threads=8
9+
10+
## Base Layer ##################################################################
11+
FROM registry.access.redhat.com/ubi9/ubi-minimal:${BASE_UBI_IMAGE_TAG} as base
12+
ARG PYTHON_VERSION
13+
ENV PYTHON_VERSION=${PYTHON_VERSION}
14+
RUN microdnf -y update && microdnf install -y --nodocs \
15+
python${PYTHON_VERSION}-pip python${PYTHON_VERSION}-wheel \
16+
&& microdnf clean all
17+
18+
WORKDIR /workspace
19+
20+
ENV LANG=C.UTF-8 \
21+
LC_ALL=C.UTF-8
22+
23+
# Some utils for dev purposes - tar required for kubectl cp
24+
RUN microdnf install -y --nodocs \
25+
which procps findutils tar vim git\
26+
&& microdnf clean all
27+
28+
29+
## Python Installer ############################################################
30+
FROM base as python-install
31+
ARG PYTHON_VERSION
32+
ARG CUDA_MAJOR
33+
ARG CUDA_MINOR
34+
35+
ENV CUDA_MAJOR=${CUDA_MAJOR}
36+
ENV CUDA_MINOR=${CUDA_MINOR}
37+
ENV UV_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR}
38+
ENV UV_INDEX_STRATEGY=unsafe-best-match
39+
ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu${CUDA_MAJOR}${CUDA_MINOR}
40+
41+
ENV VIRTUAL_ENV=/opt/vllm
42+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
43+
ENV PYTHON_VERSION=${PYTHON_VERSION}
44+
RUN microdnf install -y --nodocs \
45+
python${PYTHON_VERSION}-devel && \
46+
python${PYTHON_VERSION} -m venv $VIRTUAL_ENV && \
47+
pip install --no-cache -U pip wheel uv && \
48+
microdnf clean all
49+
50+
51+
## CUDA Base ###################################################################
52+
FROM python-install as cuda-base
53+
54+
RUN curl -Lo /etc/yum.repos.d/cuda-rhel9.repo \
55+
https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64/cuda-rhel9.repo
56+
57+
ENV CUDA_HOME="/usr/local/cuda" \
58+
PATH="${CUDA_HOME}/bin:${PATH}"
59+
ENV LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${CUDA_HOME}/lib64/stubs/:${CUDA_HOME}/extras/CUPTI/lib64:${LD_LIBRARY_PATH}"
60+
61+
RUN microdnf install -y \
62+
cuda-nvcc-12-8 cuda-nvtx-12-8 cuda-libraries-devel-12-8 && \
63+
microdnf clean all && \
64+
ln -s ${CUDA_HOME}/lib64/stubs/libcuda.so /usr/lib64/
65+
66+
67+
## Python cuda base #################################################################
68+
FROM cuda-base AS python-cuda-base
69+
70+
ENV VIRTUAL_ENV=/opt/vllm
71+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
72+
73+
# install cuda and common dependencies
74+
RUN --mount=type=cache,target=/root/.cache/uv \
75+
--mount=type=bind,source=requirements/common.txt,target=requirements/common.txt \
76+
--mount=type=bind,source=requirements/cuda.txt,target=requirements/cuda.txt \
77+
uv pip install \
78+
-r requirements/cuda.txt
79+
80+
## Builder #####################################################################
81+
FROM python-cuda-base AS build
82+
83+
# install build dependencies
84+
RUN --mount=type=cache,target=/root/.cache/uv \
85+
--mount=type=bind,source=requirements/build.txt,target=requirements/build.txt \
86+
uv pip install -r requirements/build.txt
87+
88+
# install compiler cache to speed up compilation leveraging local or remote caching
89+
# git is required for the cutlass kernels
90+
RUN rpm -ivh https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && rpm -ql epel-release && microdnf install -y --nodocs git ccache && microdnf clean all
91+
92+
COPY . .
93+
94+
ARG TORCH_CUDA_ARCH_LIST
95+
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
96+
97+
# max jobs used by Ninja to build extensions
98+
ARG max_jobs
99+
ENV MAX_JOBS=${max_jobs}
100+
# number of threads used by nvcc
101+
ARG nvcc_threads
102+
ENV NVCC_THREADS=$nvcc_threads
103+
# make sure punica kernels are built (for LoRA)
104+
ENV VLLM_INSTALL_PUNICA_KERNELS=1
105+
ARG VLLM_VERSION
106+
107+
# Make sure the cuda environment is in the PATH
108+
ENV PATH=/usr/local/cuda/bin:$PATH
109+
110+
ENV CCACHE_DIR=/root/.cache/ccache
111+
RUN --mount=type=cache,target=/root/.cache/ccache \
112+
--mount=type=cache,target=/root/.cache/pip \
113+
--mount=type=cache,target=/root/.cache/uv \
114+
--mount=type=bind,src=.git,target=/workspace/.git \
115+
env CFLAGS="-march=haswell" \
116+
CXXFLAGS="$CFLAGS $CXXFLAGS" \
117+
CMAKE_BUILD_TYPE=Release \
118+
SETUPTOOLS_SCM_PRETEND_VERSION="${VLLM_VERSION}" \
119+
python3 setup.py bdist_wheel --dist-dir=dist
120+
121+
## Release #####################################################################
122+
FROM python-install AS vllm-openai
123+
ARG PYTHON_VERSION
124+
125+
WORKDIR /workspace
126+
127+
ENV VIRTUAL_ENV=/opt/vllm
128+
ENV PATH=$VIRTUAL_ENV/bin:$PATH
129+
130+
# force using the python venv's cuda runtime libraries
131+
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_nvrtc/lib:${LD_LIBRARY_PATH}"
132+
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/cuda_runtime/lib:${LD_LIBRARY_PATH}"
133+
ENV LD_LIBRARY_PATH="${VIRTUAL_ENV}/lib/python${PYTHON_VERSION}/site-packages/nvidia/nvtx/lib:${LD_LIBRARY_PATH}"
134+
135+
# Triton needs a CC compiler
136+
RUN microdnf install -y --nodocs gcc \
137+
rsync \
138+
&& microdnf clean all
139+
140+
# install vllm wheel first, so that torch etc will be installed
141+
# nccl install is temp until torch is upgraded: https://github.com/vllm-project/vllm/issues/19166
142+
RUN --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
143+
--mount=type=cache,target=/root/.cache/uv \
144+
uv pip install \
145+
--extra-index-url="https://download.pytorch.org/whl/cu128" --index-strategy='unsafe-best-match' \
146+
"$(echo dist/*.whl)[audio,video,tensorizer]" --verbose \
147+
"https://storage.googleapis.com/nm-public-pypi/dist/flashinfer_python-0.2.8-cp39-abi3-linux_x86_64.whl" \
148+
&& uv pip install -U nvidia-nccl-cu12==2.26.5 blobfile
149+
150+
ENV HF_HUB_OFFLINE=1 \
151+
HOME=/home/vllm \
152+
# Allow requested max length to exceed what is extracted from the
153+
# config.json
154+
# see: https://github.com/vllm-project/vllm/pull/7080
155+
VLLM_USAGE_SOURCE=production-docker-image \
156+
VLLM_WORKER_MULTIPROC_METHOD=fork \
157+
VLLM_NO_USAGE_STATS=1 \
158+
OUTLINES_CACHE_DIR=/tmp/outlines \
159+
NUMBA_CACHE_DIR=/tmp/numba \
160+
TRITON_CACHE_DIR=/tmp/triton \
161+
# Setup NCCL monitoring with torch
162+
# For tensor-parallel workloads, this monitors for NCCL deadlocks when
163+
# one rank dies, and tears down the NCCL process groups so that the driver
164+
# can cleanly exit.
165+
TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC=15 \
166+
TORCH_NCCL_DUMP_ON_TIMEOUT=0
167+
168+
# setup non-root user for OpenShift
169+
RUN umask 002 && \
170+
useradd --uid 2000 --gid 0 vllm && \
171+
mkdir -p /home/vllm && \
172+
chown -R vllm /home/vllm && \
173+
chmod g+rwx /home/vllm
174+
175+
COPY LICENSE /licenses/vllm.md
176+
COPY examples/*.jinja /app/data/template/
177+
COPY examples/*.jinja /opt/app-root/template/
178+
RUN chown -R vllm /opt/app-root/template && chmod -R g+r /opt/app-root/template
179+
180+
USER 2000
181+
WORKDIR /home/vllm
182+
183+
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
184+
185+
186+
FROM vllm-openai as vllm-grpc-adapter
187+
188+
USER root
189+
190+
ARG VLLM_TGIS_ADAPTER_VERSION
191+
RUN --mount=type=cache,target=/root/.cache/uv \
192+
--mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
193+
HOME=/root uv pip install \
194+
--extra-index-url="https://download.pytorch.org/whl/cu128" --index-strategy='unsafe-best-match' \
195+
"$(echo /workspace/dist/*.whl)[audio,video,tensorizer]" \
196+
vllm-tgis-adapter==${VLLM_TGIS_ADAPTER_VERSION}
197+
198+
# Upgrade NCCL back to required version after vllm-tgis-adapter installation
199+
RUN --mount=type=cache,target=/root/.cache/uv \
200+
HOME=/root uv pip install -U nvidia-nccl-cu12==2.26.5
201+
202+
ENV GRPC_PORT=8033 \
203+
PORT=8000 \
204+
# As an optimization, vLLM disables logprobs when using spec decoding by
205+
# default, but this would be unexpected to users of a hosted model that
206+
# happens to have spec decoding
207+
# see: https://github.com/vllm-project/vllm/pull/6485
208+
DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
209+
210+
USER 2000
211+
ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]

0 commit comments

Comments
 (0)