Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions rhel8/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ ENV PATH /usr/local/go/bin:$PATH
WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work
go build -C driver/vgpu/src -o vgpu-util && \
mv driver/vgpu/src/vgpu-util /work && \
go build -C driver/gpu-driver-util -o gpu-driver-util && \
mv driver/gpu-driver-util/gpu-driver-util /work

FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi8

Expand All @@ -36,6 +37,8 @@ ENV DRIVER_VERSION=$DRIVER_VERSION
# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG DRIVER_BRANCH=550
ENV DRIVER_BRANCH=$DRIVER_BRANCH
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
Expand Down Expand Up @@ -84,6 +87,7 @@ COPY ocp_dtk_entrypoint /usr/local/bin
COPY common.sh /usr/local/bin

COPY --from=build /work/vgpu-util /usr/local/bin
COPY --from=build /work/gpu-driver-util /usr/local/bin

WORKDIR /drivers

Expand Down
24 changes: 21 additions & 3 deletions rhel8/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -eu
RUN_DIR=/run/nvidia
PID_FILE=${RUN_DIR}/${0##*/}.pid
DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"}
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing DRIVER_BRANCH env"}
KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
NUM_VGPU_DEVICES=0
NVIDIA_MODULE_PARAMS=()
Expand All @@ -17,9 +18,7 @@ USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
RHEL_VERSION=${RHEL_VERSION:-""}
RHEL_MAJOR_VERSION=8

OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}

DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
echo "DRIVER_ARCH is $DRIVER_ARCH"
Expand Down Expand Up @@ -577,6 +576,24 @@ _start_vgpu_topology_daemon() {
nvidia-topologyd
}

_resolve_kernel_type() {
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
KERNEL_TYPE=kernel
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
KERNEL_TYPE=kernel-open
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
if [ $? -ne 0 ]; then
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
tail -n 3 /var/log/gpu-driver-util.log
return 1
fi
else
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
return 1
fi
}

_prepare() {
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
_find_vgpu_driver_version || exit 1
Expand Down Expand Up @@ -797,5 +814,6 @@ if [ $# -ne 0 ]; then
fi

_resolve_rhel_version || exit 1
_resolve_kernel_type || exit 1

$command
2 changes: 2 additions & 0 deletions rhel8/ocp_dtk_entrypoint
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ nv-ctr-run-with-dtk() {
/usr/local/bin/nvidia-driver \
/usr/local/bin/common.sh \
/usr/local/bin/extract-vmlinux \
/usr/local/bin/gpu-driver-util \
/usr/local/bin/vgpu-util \
/drivers \
/licenses \
Expand Down Expand Up @@ -136,6 +137,7 @@ dtk-build-driver() {
"$DRIVER_TOOLKIT_SHARED_DIR/nvidia-driver" \
"$DRIVER_TOOLKIT_SHARED_DIR/common.sh" \
"$DRIVER_TOOLKIT_SHARED_DIR/extract-vmlinux" \
"$DRIVER_TOOLKIT_SHARED_DIR/gpu-driver-util" \
"$DRIVER_TOOLKIT_SHARED_DIR/vgpu-util" \
"${DRIVER_TOOLKIT_SHARED_DIR}/bin"

Expand Down
10 changes: 7 additions & 3 deletions rhel9/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ ENV PATH /usr/local/go/bin:$PATH
WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work
go build -C driver/vgpu/src -o vgpu-util && \
mv driver/vgpu/src/vgpu-util /work && \
go build -C driver/gpu-driver-util -o gpu-driver-util && \
mv driver/gpu-driver-util/gpu-driver-util /work

FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi9

Expand All @@ -36,6 +37,8 @@ ENV DRIVER_VERSION=$DRIVER_VERSION
# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
ARG DRIVER_TYPE=passthrough
ENV DRIVER_TYPE=$DRIVER_TYPE
ARG DRIVER_BRANCH=550
ENV DRIVER_BRANCH=$DRIVER_BRANCH
ARG VGPU_LICENSE_SERVER_TYPE=NLS
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
# Enable vGPU version compability check by default
Expand Down Expand Up @@ -78,6 +81,7 @@ COPY ocp_dtk_entrypoint /usr/local/bin
COPY common.sh /usr/local/bin

COPY --from=build /work/vgpu-util /usr/local/bin
COPY --from=build /work/gpu-driver-util /usr/local/bin

WORKDIR /drivers

Expand Down
24 changes: 21 additions & 3 deletions rhel9/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set -eu
RUN_DIR=/run/nvidia
PID_FILE=${RUN_DIR}/${0##*/}.pid
DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"}
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing DRIVER_BRANCH env"}
KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
NUM_VGPU_DEVICES=0
NVIDIA_MODULE_PARAMS=()
Expand All @@ -17,9 +18,7 @@ USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
RHEL_VERSION=${RHEL_VERSION:-""}
RHEL_MAJOR_VERSION=9

OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}

DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
echo "DRIVER_ARCH is $DRIVER_ARCH"
Expand Down Expand Up @@ -571,6 +570,24 @@ _find_vgpu_driver_version() {
return 0
}

_resolve_kernel_type() {
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
KERNEL_TYPE=kernel
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
KERNEL_TYPE=kernel-open
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
if [ $? -ne 0 ]; then
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
tail -n 3 /var/log/gpu-driver-util.log
return 1
fi
else
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
return 1
fi
}

_start_vgpu_topology_daemon() {
type nvidia-topologyd > /dev/null 2>&1 || return 0
echo "Starting nvidia-topologyd.."
Expand Down Expand Up @@ -797,5 +814,6 @@ if [ $# -ne 0 ]; then
fi

_resolve_rhel_version || exit 1
_resolve_kernel_type || exit 1

$command
2 changes: 2 additions & 0 deletions rhel9/ocp_dtk_entrypoint
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ nv-ctr-run-with-dtk() {
/usr/local/bin/nvidia-driver \
/usr/local/bin/common.sh \
/usr/local/bin/extract-vmlinux \
/usr/local/bin/gpu-driver-util \
/usr/local/bin/vgpu-util \
/drivers \
/licenses \
Expand Down Expand Up @@ -136,6 +137,7 @@ dtk-build-driver() {
"$DRIVER_TOOLKIT_SHARED_DIR/nvidia-driver" \
"$DRIVER_TOOLKIT_SHARED_DIR/common.sh" \
"$DRIVER_TOOLKIT_SHARED_DIR/extract-vmlinux" \
"$DRIVER_TOOLKIT_SHARED_DIR/gpu-driver-util" \
"$DRIVER_TOOLKIT_SHARED_DIR/vgpu-util" \
"${DRIVER_TOOLKIT_SHARED_DIR}/bin"

Expand Down
8 changes: 5 additions & 3 deletions ubuntu20.04/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH
WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work
go build -C driver/vgpu/src -o vgpu-util && \
mv driver/vgpu/src/vgpu-util /work && \
go build -C driver/gpu-driver-util -o gpu-driver-util && \
mv driver/gpu-driver-util/gpu-driver-util /work

FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu20.04

Expand Down Expand Up @@ -72,6 +73,7 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
COPY nvidia-driver /usr/local/bin

COPY --from=build /work/vgpu-util /usr/local/bin
COPY --from=build /work/gpu-driver-util /usr/local/bin

ADD drivers drivers/

Expand Down
25 changes: 23 additions & 2 deletions ubuntu20.04/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ NVIDIA_MODESET_MODULE_PARAMS=()
NVIDIA_PEERMEM_MODULE_PARAMS=()
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}

OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}

export DEBIAN_FRONTEND=noninteractive

Expand Down Expand Up @@ -477,6 +476,24 @@ _shutdown() {
return 1
}

_resolve_kernel_type() {
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
KERNEL_TYPE=kernel
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
KERNEL_TYPE=kernel-open
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
if [ $? -ne 0 ]; then
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
tail -n 3 /var/log/gpu-driver-util.log
return 1
fi
else
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
return 1
fi
}

_find_vgpu_driver_version() {
local count=""
local version=""
Expand Down Expand Up @@ -520,6 +537,8 @@ init() {
_find_vgpu_driver_version || exit 1
fi

_resolve_kernel_type || exit 1

# Install the userspace components and copy the kernel module sources.
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
Expand Down Expand Up @@ -592,6 +611,8 @@ update() {
fi
exec 3>&-

_resolve_kernel_type || exit 1

# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
# only non-vgpu driver types
if [ "${DRIVER_TYPE}" != "vgpu" ]; then
Expand Down
8 changes: 5 additions & 3 deletions ubuntu22.04/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH
WORKDIR /work

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
cd driver/vgpu/src && \
go build -o vgpu-util && \
mv vgpu-util /work
go build -C driver/vgpu/src -o vgpu-util && \
mv driver/vgpu/src/vgpu-util /work && \
go build -C driver/gpu-driver-util -o gpu-driver-util && \
mv driver/gpu-driver-util/gpu-driver-util /work

FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04

Expand Down Expand Up @@ -72,6 +73,7 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
COPY nvidia-driver /usr/local/bin

COPY --from=build /work/vgpu-util /usr/local/bin
COPY --from=build /work/gpu-driver-util /usr/local/bin

ADD drivers drivers/

Expand Down
26 changes: 23 additions & 3 deletions ubuntu22.04/nvidia-driver
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@ NVIDIA_UVM_MODULE_PARAMS=()
NVIDIA_MODESET_MODULE_PARAMS=()
NVIDIA_PEERMEM_MODULE_PARAMS=()
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}

OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}

export DEBIAN_FRONTEND=noninteractive

Expand Down Expand Up @@ -481,6 +479,24 @@ _shutdown() {
return 1
}

_resolve_kernel_type() {
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
KERNEL_TYPE=kernel
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
KERNEL_TYPE=kernel-open
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
if [ $? -ne 0 ]; then
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
tail -n 3 /var/log/gpu-driver-util.log
return 1
fi
else
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
return 1
fi
}

_find_vgpu_driver_version() {
local count=""
local version=""
Expand Down Expand Up @@ -524,6 +540,8 @@ init() {
_find_vgpu_driver_version || exit 1
fi

_resolve_kernel_type || exit 1

# Install the userspace components and copy the kernel module sources.
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
Expand Down Expand Up @@ -596,6 +614,8 @@ update() {
fi
exec 3>&-

_resolve_kernel_type || exit 1

# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
# only non-vgpu driver types
if [ "${DRIVER_TYPE}" != "vgpu" ]; then
Expand Down
17 changes: 17 additions & 0 deletions ubuntu22.04/precompiled/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ FROM nvcr.io/nvidia/cuda:12.6.2-base-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive

ARG TARGETARCH
ARG GOLANG_VERSION
ARG DRIVER_BRANCH=535
ENV DRIVER_BRANCH=$DRIVER_BRANCH
ARG DRIVER_VERSION=535.216.03
Expand All @@ -12,6 +14,8 @@ ENV KERNEL_VERSION=$KERNEL_VERSION

ENV NVIDIA_VISIBLE_DEVICES=void

SHELL ["/bin/bash", "-c"]

RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections

# Fetch GPG keys for CUDA repo
Expand All @@ -26,6 +30,7 @@ RUN dpkg --add-architecture i386 && \
curl \
kmod \
file \
git \
libelf-dev \
libglvnd-dev \
pkg-config && \
Expand All @@ -41,6 +46,18 @@ RUN echo "deb [arch=amd64] http://archive.ubuntu.com/ubuntu/ jammy main universe
RUN curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
chmod +x /usr/local/bin/donkey

# download appropriate binary based on the target architecture for multi-arch builds
RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \
curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \
| tar -C /usr/local -xz

ENV PATH=/usr/local/go/bin:$PATH

RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
go build -C driver/gpu-driver-util -o gpu-driver-util && \
mv driver/gpu-driver-util/gpu-driver-util /usr/local/bin && \
rm -rf driver

# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
Expand Down
Loading
Loading