1- ## Global Args #################################################################
2- ARG BASE_IMAGE=vault.habana.ai/gaudi-docker/1.21.3/rhel9.4/habanalabs/pytorch-installer-2.6.0:latest
3- ARG VLLM_VERSION="v0.8.5"
4- ARG VLLM_TGIS_ADAPTER_VERSION="0.7.1"
5- ARG max_jobs=6
6- ARG nvcc_threads=2
1+ # Copyright (c) 2025 Habana Labs, Ltd.
2+ #
3+ # SPDX-License-Identifier: Apache-2.0
4+ #
5+ ######### cloned repo layer ########
6+ ARG BASE_IMAGE
7+ FROM ${BASE_IMAGE} as clone_repo
8+ ARG REPO=https://github.com/HabanaAI/Setup_and_Install.git
9+ ARG VERSION
10+
11+ # Minimal deps to clone over HTTPS
12+ RUN dnf -y install git ca-certificates && update-ca-trust && dnf clean all
13+ WORKDIR /src/sai
14+
15+ # This is to get the install script needed by the pytorch layer
16+ RUN git clone --branch r"${VERSION}" --single-branch --depth 1 "${REPO}" .
17+
18+ # Done
19+
20+ ######### base layer ########
21+ ARG BASE_IMAGE
22+ FROM ${BASE_IMAGE} as base
23+ ARG ARTIFACTORY_URL
24+ ARG VERSION
25+ ARG REVISION
26+
27+ # for RHEL certification
28+ LABEL vendor="Habanalabs Ltd."
29+ LABEL release="${VERSION}-${REVISION}"
30+
31+ COPY --from=clone_repo /src/sai/dockerfiles/base/LICENSE /licenses/
32+
33+ RUN dnf -y update && dnf install -y \
34+ python3-dnf-plugin-versionlock && \
35+ dnf versionlock add redhat-release* && \
36+ dnf clean all
37+
38+ # This is to prevent a conflict between a 9.5 & 9.6 version
39+ RUN rpm -e --nodeps openssl-fips-provider-so
40+
41+ RUN dnf install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-9.noarch.rpm && \
42+ dnf clean all
43+
44+ RUN echo "[BaseOS]" > /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \
45+ echo "name=CentOS Linux 9 - BaseOS" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \
46+ echo "baseurl=https://mirror.stream.centos.org/9-stream/BaseOS/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \
47+ echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo && \
48+ echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-BaseOS.repo
49+
50+ RUN echo "[centos9]" > /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
51+ echo "name=CentOS Linux 9 - AppStream" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
52+ echo "baseurl=https://mirror.stream.centos.org/9-stream/AppStream/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
53+ echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo && \
54+ echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-AppStream.repo
55+
56+ RUN echo "[CRB]" > /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
57+ echo "name=CentOS Linux 9 - CRB" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
58+ echo "baseurl=https://mirror.stream.centos.org/9-stream/CRB/x86_64/os" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
59+ echo "gpgkey=https://www.centos.org/keys/RPM-GPG-KEY-CentOS-Official-SHA256" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo && \
60+ echo "gpgcheck=1" >> /etc/yum.repos.d/CentOS-Linux-CRB.repo
61+
62+ RUN dnf install -y --setopt=install_weak_deps=False \
63+ bzip2 \
64+ bzip2-devel \
65+ clang \
66+ cmake3 \
67+ cpp \
68+ ffmpeg-free \
69+ gcc \
70+ gcc-c++ \
71+ git \
72+ glibc \
73+ glibc-devel \
74+ glibc-headers \
75+ iproute \
76+ jemalloc \
77+ libarchive \
78+ libffi-devel \
79+ libjpeg-devel \
80+ libksba \
81+ llvm \
82+ lsb_release \
83+ lsof \
84+ mesa-libGL \
85+ openssh-clients \
86+ openssh-server \
87+ openssl \
88+ openssl-devel \
89+ perl-Net-SSLeay \
90+ python3-devel \
91+ python3.12 \
92+ python3.12-devel \
93+ python3.12-pip \
94+ unzip \
95+ wget \
96+ zlib-devel \
97+ ibacm \
98+ infiniband-diags \
99+ libibumad \
100+ libibverbs \
101+ libibverbs-utils \
102+ librdmacm \
103+ librdmacm-utils \
104+ python3-pyverbs \
105+ rdma-core \
106+ rdma-core-devel && \
107+ dnf clean all && \
108+ rm -f /etc/ssh/ssh_host_*_key* && \
109+ ln -s /usr/bin/pip3.12 /usr/bin/pip
110+
111+ RUN alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 2 && \
112+ alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 1 && \
113+ alternatives --set python3 /usr/bin/python3.12
114+
115+ ENV PIP_DISABLE_PIP_VERSION_CHECK=1
116+ ENV PIP_NO_CACHE_DIR=on
117+
118+ RUN python3 -m pip install setuptools==79.0.1 wheel && \
119+ python3 -m pip install --upgrade Jinja2 protobuf urllib3 requests
120+
121+ ENV OPENMPI_VERSION=4.1.6
122+ ENV OPENMPI_SHA256="44da277b8cdc234e71c62473305a09d63f4dcca292ca40335aab7c4bf0e6a566"
123+ ENV MPI_ROOT=/opt/habanalabs/openmpi
124+ ENV LD_LIBRARY_PATH=${MPI_ROOT}/lib:/usr/lib/habanalabs:$LD_LIBRARY_PATH
125+ ENV PATH=${MPI_ROOT}/bin:$PATH
126+ ENV OPAL_PREFIX=${MPI_ROOT}
127+ ENV MPICC=${MPI_ROOT}/bin/mpicc
128+ ENV RDMAV_FORK_SAFE=1
129+ ENV FI_EFA_USE_DEVICE_RDMA=0
130+ ENV OMPI_MCA_btl=^openib
7131
8- ## Base Layer ##################################################################
9- FROM ${BASE_IMAGE} as habana-base
132+ RUN echo "[habanalabs]" > /etc/yum.repos.d/habanalabs.repo && \
133+ echo "name=Habana RH9 Linux repo" >> /etc/yum.repos.d/habanalabs.repo && \
134+ echo "baseurl=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.6" >> /etc/yum.repos.d/habanalabs.repo && \
135+ echo "gpgkey=https://${ARTIFACTORY_URL}/artifactory/rhel/9/9.6/repodata/repomd.xml.key" >> /etc/yum.repos.d/habanalabs.repo && \
136+ echo "gpgcheck=1" >> /etc/yum.repos.d/habanalabs.repo
137+
138+ RUN rpm --import "https://${ARTIFACTORY_URL}/artifactory/gaudi-general/keyPairs/primary/public" && dnf install -y --setopt=install_weak_deps=False \
139+ habanalabs-rdma-core-"$VERSION"-"$REVISION".el9 \
140+ habanalabs-thunk-"$VERSION"-"$REVISION".el9 \
141+ habanalabs-firmware-tools-"$VERSION"-"$REVISION".el9 \
142+ habanalabs-graph-"$VERSION"-"$REVISION".el9 && \
143+ dnf clean all && \
144+ chmod +t /var/log/habana_logs && \
145+ rm -f /etc/yum.repos.d/habanalabs.repo
146+
147+ ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
148+ ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
149+
150+ RUN set -e; \
151+ wget -q -O /tmp/openmpi-${OPENMPI_VERSION}.tar.gz \
152+ https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-${OPENMPI_VERSION}.tar.gz && \
153+ SUM="$(sha256sum /tmp/openmpi-${OPENMPI_VERSION}.tar.gz | cut -d ' ' -f1)"; \
154+ if [ "$SUM" != "$OPENMPI_SHA256" ]; then \
155+ echo "Open MPI tarball mismatch detected (sha256=$SUM)."; \
156+ exit 1; \
157+ fi; \
158+ tar -xzf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz -C /tmp && \
159+ cd /tmp/openmpi-${OPENMPI_VERSION} && \
160+ ./configure --prefix=${MPI_ROOT} --with-verbs && \
161+ make -j"$(nproc)" && make install && \
162+ cd / && rm -rf /tmp/openmpi-${OPENMPI_VERSION}.tar.gz /tmp/openmpi-${OPENMPI_VERSION}
163+
164+ RUN ln -s /usr/bin/python3 /usr/bin/python && \
165+ python3 -m pip install habana_media_loader=="${VERSION}"."${REVISION}"
166+
167+ # SSH configuration necessary to support mpi-operator v2
168+ # Convert ENTRYPOINTs into scripts so that sshd can be started
169+ RUN mkdir -p /var/run/sshd && \
170+ sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
171+ sed -i 's/#\(ForwardAgent \).*/\1yes/g' /etc/ssh/ssh_config && \
172+ echo " UserKnownHostsFile /dev/null" >> /etc/ssh/ssh_config && \
173+ sed -i 's/#\(StrictModes \).*/\1no/g' /etc/ssh/sshd_config
174+ COPY --chmod=0755 *-entrypoint.sh /usr/bin/
175+
176+ ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
177+ ENV HABANA_LOGS=/var/log/habana_logs/
178+ ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw
179+ ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins
180+
181+ ######## pytorch layer ########
182+ FROM base as pytorch
183+ ARG PT_VERSION
184+ ARG VERSION
185+ ARG REVISION
186+ ARG ARTIFACTORY_URL
187+ ARG TORCH_TYPE
188+ ARG BASE_NAME
189+
190+ LABEL name="PyTorch Installer"
191+ LABEL summary="Habanalabs PyTorch installer layer for RHEL9.6"
192+ LABEL description="Image with pre installed Habanalabs packages for PyTorch"
193+
194+ RUN echo "/usr/lib/habanalabs" > $(python3 -c "import sysconfig; print(sysconfig.get_path('platlib'))")/habanalabs-graph.pth
195+
196+ RUN dnf install --nobest --nodocs --setopt=install_weak_deps=false --allowerasing -y \
197+ cairo-devel \
198+ gperftools-devel \
199+ iproute \
200+ jq \
201+ lapack-devel \
202+ numactl \
203+ numactl-devel \
204+ openblas-devel \
205+ which \
206+ zlib-devel && \
207+ dnf clean all
208+
209+ COPY --from=clone_repo /src/sai/dockerfiles/pytorch/install_packages.sh ./install_packages.sh
210+ RUN ./install_packages.sh && rm -f install_packages.sh && /sbin/ldconfig
211+
212+ # Set LD_PRELOAD after all required installations to
213+ # avoid warnings during docker creation
214+ ENV LD_PRELOAD=/usr/lib64/libtcmalloc.so.4
215+ ENV TCMALLOC_LARGE_ALLOC_REPORT_THRESHOLD=7516192768
216+
217+ ######## vllm Layer ########
218+
219+ ######## Base Layer ###########################################################
220+ FROM pytorch as habana-base
10221
11222USER root
12223
13224WORKDIR /workspace
14225
15226ENV PIP_NO_CACHE_DIR=0
16227
17- ## Python Habana base ############## ###################################################
228+ ######## Python Habana base ###################################################
18229FROM habana-base as python-habana-base
19230
20231COPY requirements/common.txt requirements/common.txt
@@ -25,7 +236,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \
25236 pip install \
26237 -r requirements/hpu.txt
27238
28- ## Builder ####### ##############################################################
239+ ######## Builder ##############################################################
29240FROM python-habana-base AS build
30241
31242# install build dependencies
@@ -41,13 +252,10 @@ COPY pyproject.toml pyproject.toml
41252COPY vllm vllm
42253
43254# max jobs used by Ninja to build extensions
44- ARG max_jobs
255+ ARG max_jobs=6
45256ENV MAX_JOBS=${max_jobs}
46- # number of threads used by nvcc
47- ARG nvcc_threads
48- ENV NVCC_THREADS=$nvcc_threads
49257
50- ARG VLLM_VERSION
258+ ARG VLLM_VERSION="v0.8.5"
51259# # make sure punica kernels are built (for LoRA)
52260# HPU currently doesn't support LoRA
53261# ENV VLLM_INSTALL_PUNICA_KERNELS=1
@@ -64,7 +272,7 @@ RUN --mount=type=cache,target=/root/.cache/ccache \
64272 SETUPTOOLS_SCM_PRETEND_VERSION="${VLLM_VERSION}" \
65273 python3 setup.py bdist_wheel --dist-dir=dist
66274
67- ## Release ######## #############################################################
275+ ######### Release #############################################################
68276FROM habana-base AS vllm-openai
69277
70278WORKDIR /workspace
@@ -93,19 +301,28 @@ RUN umask 002 && \
93301COPY LICENSE /licenses/vllm.md
94302COPY examples/*.jinja /app/data/template/
95303
96- USER 2000
304+ #USER 2000
305+ # Note: staying root because entrypoint starts sshd and then changes to vllm
97306WORKDIR /home/vllm
98307
99- ENTRYPOINT ["python3", "-m", " vllm.entrypoints.openai.api_server "]
308+ ENTRYPOINT ["/usr/bin/ vllm-entrypoint.sh "]
100309
101- ## vllm-grpc-adapter ################# ####################################################
310+ ######## vllm-grpc-adapter ####################################################
102311FROM vllm-openai as vllm-grpc-adapter
103312
104313USER root
105314
106- ARG VLLM_TGIS_ADAPTER_VERSION
315+ ARG VLLM_TGIS_ADAPTER_VERSION="0.7.1"
107316RUN --mount=type=cache,target=/root/.cache/pip \
108317 --mount=type=bind,from=build,src=/workspace/dist,target=/workspace/dist \
318+ pip install \
319+ prometheus_client==0.21.1 \
320+ grpcio==1.70.0 \
321+ grpcio-health-checking==1.70.0 \
322+ grpcio-reflection==1.70.0 \
323+ accelerate==1.7.0 \
324+ hf-transfer==0.1.9 \
325+ cachetools~=5.5 && \
109326 pip install vllm-tgis-adapter==${VLLM_TGIS_ADAPTER_VERSION} --no-deps
110327
111328ENV GRPC_PORT=8033 \
@@ -116,8 +333,9 @@ ENV GRPC_PORT=8033 \
116333 # see: https://github.com/vllm-project/vllm/pull/6485
117334 DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
118335
119- USER 2000
120- ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]
336+ #USER 2000
337+ # Note: staying root because entrypoint starts sshd and then changes to vllm
338+ ENTRYPOINT ["/usr/bin/tgis-entrypoint.sh"]
121339
122340LABEL name="rhoai/odh-vllm-gaudi-rhel9" \
123341 com.redhat.component="odh-vllm-gaudi-rhel9" \
@@ -127,3 +345,4 @@ LABEL name="rhoai/odh-vllm-gaudi-rhel9" \
127345 summary="GPU-accelerated vLLM build using Intel Gaudi (Habana) for high-performance inference." \
128346 com.redhat.license_terms="https://www.redhat.com/licenses/Red_Hat_Standard_EULA_20191108.pdf" \
129347 vendor="Red Hat, Inc."
348+
0 commit comments