Skip to content

Commit de832b3

Browse files
Merge pull request #207 from vajain-rhods/fix_dockerfile
Fix dockerfile
2 parents 12fe62a + 77d9234 commit de832b3

File tree

1 file changed

+241
-22
lines changed

1 file changed

+241
-22
lines changed

Dockerfile.konflux.gaudi

Lines changed: 241 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,231 @@
1-
## Global Args #################################################################
2-
ARG BASE_IMAGE=vault.habana.ai/gaudi-docker/1.21.3/rhel9.4/habanalabs/pytorch-installer-2.6.0:latest
3-
ARG VLLM_VERSION="v0.8.5"
4-
ARG VLLM_TGIS_ADAPTER_VERSION="0.7.1"
5-
ARG max_jobs=6
6-
ARG nvcc_threads=2
1+
# Copyright (c) 2025 Habana Labs, Ltd.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
######### cloned repo layer ########
6+
ARG BASE_IMAGE
7+
FROM ${BASE_IMAGE} as clone_repo
8+
ARG REPO=https://github.com/HabanaAI/Setup_and_Install.git
9+
ARG VERSION
10+
11+
# Minimal deps to clone over HTTPS
12+
RUN dnf -y install git ca-certificates && update-ca-trust && dnf clean all
13+
WORKDIR /src/sai
14+
15+
# This is to get the install script needed by the pytorch layer
16+
RUN git clone --branch r"${VERSION}" --single-branch --depth 1 "${REPO}" .
17+
18+
# Done
19+
20+
######### base layer ########
21+
ARG BASE_IMAGE
22+
FROM ${BASE_IMAGE} as base
23+
ARG ARTIFACTORY_URL
24+
ARG VERSION
25+
ARG REVISION
26+
27+
# for RHEL certification
28+
LABEL vendor="Habanalabs Ltd."
29+
LABEL release="${VERSION}-${REVISION}"
30+
31+
COPY --from=clone_repo /src/sai/dockerfiles/base/LICENSE /licenses/
32+
33+
RUN dnf -y update && dnf install -y \
34+
python3-dnf-plugin-versionlock && \
35+
dnf versionlock add redhat-release* && \
36+
dnf clean all
37+
38+
# This is to prevent a conflict between a 9.5 & 9.6 version
39+
RUN rpm -e --nodeps openssl-fips-provider-so
40+
41+
RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
42+
dnf clean all
43+
44+
RUN echo "[BaseOS]" > /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \
45+
echo "name=CentOS Linux 9 - BaseOS" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \
46+
echo "baseurl=https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \
47+
echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \
48+
echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo
49+
50+
RUN echo "[centos9]" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
51+
echo "name=CentOS Linux 9 - AppStream" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
52+
echo "baseurl=https://mirror.stream.centos.org/9-stream/AppStream/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
53+
echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
54+
echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo
55+
56+
RUN echo "[CRB]" > /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
57+
echo "name=CentOS Linux 9 - CRB" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
58+
echo "baseurl=https://mirror.stream.centos.org/9-stream/CRB/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
59+
echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
60+
echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo
61+
62+
RUN dnf install -y --setopt=install_weak_deps=False \
63+
bzip2 \
64+
bzip2-devel \
65+
clang \
66+
cmake3 \
67+
cpp \
68+
ffmpeg-free \
69+
gcc \
70+
gcc-c++ \
71+
git \
72+
glibc \
73+
glibc-devel \
74+
glibc-headers \
75+
iproute \
76+
jemalloc \
77+
libarchive \
78+
libffi-devel \
79+
libjpeg-devel \
80+
libksba \
81+
llvm \
82+
lsb_release \
83+
lsof \
84+
mesa-libGL \
85+
openssh-clients \
86+
openssh-server \
87+
openssl \
88+
openssl-devel \
89+
perl-Net-SSLeay \
90+
python3-devel \
91+
python3.12 \
92+
python3.12-devel \
93+
python3.12-pip \
94+
unzip \
95+
wget \
96+
zlib-devel \
97+
ibacm \
98+
infiniband-diags \
99+
libibumad \
100+
libibverbs \
101+
libibverbs-utils \
102+
librdmacm \
103+
librdmacm-utils \
104+
python3-pyverbs \
105+
rdma-core \
106+
rdma-core-devel && \
107+
dnf clean all && \
108+
rm -f /etc/ssh/ssh_host_*_key* && \
109+
ln -s /usr/bin/pip3.12 /usr/bin/pip
110+
111+
RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 2 && \
112+
alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 && \
113+
alternatives --set python3 /usr/bin/python3.12
114+
115+
ENV PIP_DISABLE_PIP_VERSION_CHECK=1
116+
ENV PIP_NO_CACHE_DIR=on
117+
118+
RUN python3 -m pip install setuptools==79.0.1 wheel && \
119+
python3 -m pip install --upgrade Jinja2 protobuf urllib3 requests
120+
121+
ENV OPENMPI_VERSION=4.1.6
122+
ENV OPENMPI_SHA256="44da277b8cdc234e71c62473305a09d63f4dcca292ca40335aab7c4bf0e6a566"
123+
ENV MPI_ROOT=/opt/habanalabs/openmpi
124+
ENV LD_LIBRARY_PATH=${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
125+
ENV PATH=${MPI_ROOT}/bin:$PATH
126+
ENV OPAL_PREFIX=${MPI_ROOT}
127+
ENV MPICC=${MPI_ROOT}/bin/mpicc
128+
ENV RDMAV_FORK_SAFE=1
129+
ENV FI_EFA_USE_DEVICE_RDMA=0
130+
ENV OMPI_MCA_btl=^openib
7131

8-
## Base Layer ##################################################################
9-
FROM ${BASE_IMAGE} as habana-base
132+
RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \
133+
echo "name=Habana RH9 Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \
134+
echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.6" >> /etc/yum.repos.d/habanalabs.repo && \
135+
echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.6/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo && \
136+
echo "gpgcheck=1" >> /etc/yum.repos.d/habanalabs.repo
137+
138+
RUN rpm --import "https://${ARTIFACTORY_URL}/artifactory/gaudi-general/keyPairs/primary/public" && dnf install -y --setopt=install_weak_deps=False \
139+
habanalabs-rdma-core-"$VERSION"-"$REVISION".el9 \
140+
habanalabs-thunk-"$VERSION"-"$REVISION".el9 \
141+
habanalabs-firmware-tools-"$VERSION"-"$REVISION".el9 \
142+
habanalabs-graph-"$VERSION"-"$REVISION".el9 && \
143+
dnf clean all && \
144+
chmod +t /var/log/habana_logs && \
145+
rm -f /etc/yum.repos.d/habanalabs.repo
146+
147+
ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
148+
ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
149+
150+
RUN set -e; \
151+
wget -q -O /tmp/openmpi-${OPENMPI_VERSION}.tar.gz \
152+
https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz && \
153+
SUM="$(sha256sum /tmp/openmpi-${OPENMPI_VERSION}.tar.gz | cut -d ' ' -f1)"; \
154+
if [ "$SUM" != "$OPENMPI_SHA256" ]; then \
155+
echo "Open MPI tarball mismatch detected (sha256=$SUM)."; \
156+
exit 1; \
157+
fi; \
158+
tar -xzf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz -C /tmp && \
159+
cd /tmp/openmpi-${OPENMPI_VERSION} && \
160+
./configure --prefix=${MPI_ROOT} --with-verbs && \
161+
make -j"$(nproc)" && make install && \
162+
cd / && rm -rf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz /tmp/openmpi-${OPENMPI_VERSION}
163+
164+
RUN ln -s /usr/bin/python3 /usr/bin/python && \
165+
python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}"
166+
167+
# SSH configuration necessary to support mpi-operator v2
168+
# Convert ENTRYPOINTs into scripts so that sshd can be started
169+
RUN mkdir -p /var/run/sshd && \
170+
sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
171+
sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
172+
echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
173+
sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
174+
COPY --chmod=0755 *-entrypoint.sh /usr/bin/
175+
176+
ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
177+
ENV HABANA_LOGS=/var/log/habana_logs/
178+
ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw
179+
ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins
180+
181+
######## pytorch layer ########
182+
FROM base as pytorch
183+
ARG PT_VERSION
184+
ARG VERSION
185+
ARG REVISION
186+
ARG ARTIFACTORY_URL
187+
ARG TORCH_TYPE
188+
ARG BASE_NAME
189+
190+
LABEL name="PyTorch Installer"
191+
LABEL summary="Habanalabs PyTorch installer layer for RHEL9.6"
192+
LABEL description="Image with pre installed Habanalabs packages for PyTorch"
193+
194+
RUN echo "/usr/lib/habanalabs" > $(python3 -c "import sysconfig; print(sysconfig.get_path('platlib'))")/habanalabs-graph.pth
195+
196+
RUN dnf install --nobest --nodocs --setopt=install_weak_deps=false --allowerasing -y \
197+
cairo-devel \
198+
gperftools-devel \
199+
iproute \
200+
jq \
201+
lapack-devel \
202+
numactl \
203+
numactl-devel \
204+
openblas-devel \
205+
which \
206+
zlib-devel && \
207+
dnf clean all
208+
209+
COPY --from=clone_repo /src/sai/dockerfiles/pytorch/install_packages.sh ./install_packages.sh
210+
RUN ./install_packages.sh && rm -f install_packages.sh && /sbin/ldconfig
211+
212+
# Set LD_PRELOAD after all required installations to
213+
# avoid warnings during docker creation
214+
ENV LD_PRELOAD=/usr/lib64/libtcmalloc.so.4
215+
ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768
216+
217+
######## vllm Layer ########
218+
219+
######## Base Layer ###########################################################
220+
FROM pytorch as habana-base
10221

11222
USER root
12223

13224
WORKDIR /workspace
14225

15226
ENV PIP_NO_CACHE_DIR=0
16227

17-
## Python Habana base #################################################################
228+
######## Python Habana base ###################################################
18229
FROM habana-base as python-habana-base
19230

20231
COPY requirements/common.txt requirements/common.txt
@@ -25,7 +236,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
25236
pip install \
26237
-r requirements/hpu.txt
27238

28-
## Builder #####################################################################
239+
######## Builder ##############################################################
29240
FROM python-habana-base AS build
30241

31242
# install build dependencies
@@ -41,13 +252,10 @@ COPY pyproject.toml pyproject.toml
41252
COPY vllm vllm
42253

43254
# max jobs used by Ninja to build extensions
44-
ARG max_jobs
255+
ARG max_jobs=6
45256
ENV MAX_JOBS=${max_jobs}
46-
# number of threads used by nvcc
47-
ARG nvcc_threads
48-
ENV NVCC_THREADS=$nvcc_threads
49257

50-
ARG VLLM_VERSION
258+
ARG VLLM_VERSION="v0.8.5"
51259
# # make sure punica kernels are built (for LoRA)
52260
# HPU currently doesn't support LoRA
53261
# ENV VLLM_INSTALL_PUNICA_KERNELS=1
@@ -64,7 +272,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
64272
SETUPTOOLS_SCM_PRETEND_VERSION="${VLLM_VERSION}" \
65273
python3 setup.py bdist_wheel --dist-dir=dist
66274

67-
## Release #####################################################################
275+
######### Release #############################################################
68276
FROM habana-base AS vllm-openai
69277

70278
WORKDIR /workspace
@@ -93,19 +301,28 @@ RUN umask 002 && \
93301
COPY LICENSE /licenses/vllm.md
94302
COPY examples/*.jinja /app/data/template/
95303

96-
USER 2000
304+
#USER 2000
305+
# Note: staying root because entrypoint starts sshd and then changes to vllm
97306
WORKDIR /home/vllm
98307

99-
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
308+
ENTRYPOINT ["/usr/bin/vllm-entrypoint.sh"]
100309

101-
## vllm-grpc-adapter #####################################################################
310+
######## vllm-grpc-adapter ####################################################
102311
FROM vllm-openai as vllm-grpc-adapter
103312

104313
USER root
105314

106-
ARG VLLM_TGIS_ADAPTER_VERSION
315+
ARG VLLM_TGIS_ADAPTER_VERSION="0.7.1"
107316
RUN --mount=type=cache,target=/root/.cache/pip \
108317
--mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
318+
pip install \
319+
prometheus_client==0.21.1 \
320+
grpcio==1.70.0 \
321+
grpcio-health-checking==1.70.0 \
322+
grpcio-reflection==1.70.0 \
323+
accelerate==1.7.0 \
324+
hf-transfer==0.1.9 \
325+
cachetools~=5.5 && \
109326
pip install vllm-tgis-adapter==${VLLM_TGIS_ADAPTER_VERSION} --no-deps
110327

111328
ENV GRPC_PORT=8033 \
@@ -116,8 +333,9 @@ ENV GRPC_PORT=8033 \
116333
# see: https://github.com/vllm-project/vllm/pull/6485
117334
DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
118335

119-
USER 2000
120-
ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]
336+
#USER 2000
337+
# Note: staying root because entrypoint starts sshd and then changes to vllm
338+
ENTRYPOINT ["/usr/bin/tgis-entrypoint.sh"]
121339

122340
LABEL name="rhoai/odh-vllm-gaudi-rhel9" \
123341
com.redhat.component="odh-vllm-gaudi-rhel9" \
@@ -127,3 +345,4 @@ LABEL name="rhoai/odh-vllm-gaudi-rhel9" \
127345
summary="GPU-accelerated vLLM build using Intel Gaudi (Habana) for high-performance inference." \
128346
com.redhat.license_terms="https://www.redhat.com/licenses/Red_Hat_Standard_EULA_20191108.pdf" \
129347
vendor="Red Hat, Inc."
348+

0 commit comments

Comments
 (0)