Skip to content

Commit 181176c

Browse files
committed
integrate gpu-driver-util into the driver images
Signed-off-by: Tariq Ibrahim <[email protected]>
1 parent 0f4ecad commit 181176c

File tree

10 files changed

+165
-29
lines changed

10 files changed

+165
-29
lines changed

rhel8/Dockerfile

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,10 @@ ENV PATH /usr/local/go/bin:$PATH
1717
WORKDIR /work
1818

1919
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
20-
cd driver/vgpu/src && \
21-
go build -o vgpu-util && \
22-
mv vgpu-util /work
20+
go build -C driver/vgpu/src -o vgpu-util && \
21+
mv driver/vgpu/src/vgpu-util /work && \
22+
go build -C driver/gpu-driver-util -o gpu-driver-util && \
23+
mv driver/gpu-driver-util/gpu-driver-util /work
2324

2425
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi8
2526

rhel8/nvidia-driver

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@ USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
1717
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
1818
RHEL_VERSION=${RHEL_VERSION:-""}
1919
RHEL_MAJOR_VERSION=8
20-
21-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
22-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
20+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
2321

2422
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
2523
echo "DRIVER_ARCH is $DRIVER_ARCH"
@@ -577,6 +575,27 @@ _start_vgpu_topology_daemon() {
577575
nvidia-topologyd
578576
}
579577

578+
_resolve_kernel_type() {
579+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
580+
KERNEL_TYPE=kernel
581+
return 0
582+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
583+
KERNEL_TYPE=kernel-open
584+
return 0
585+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
586+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
587+
if [ $? -ne 0 ]; then
588+
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
589+
tail -n 3 /var/log/gpu-driver-util.log
590+
return 1
591+
fi
592+
return 0
593+
else
594+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
595+
return 1
596+
fi
597+
}
598+
580599
_prepare() {
581600
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
582601
_find_vgpu_driver_version || exit 1
@@ -797,5 +816,6 @@ if [ $# -ne 0 ]; then
797816
fi
798817

799818
_resolve_rhel_version || exit 1
819+
_resolve_kernel_type || exit 1
800820

801821
$command

rhel9/nvidia-driver

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,7 @@ USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
1717
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
1818
RHEL_VERSION=${RHEL_VERSION:-""}
1919
RHEL_MAJOR_VERSION=9
20-
21-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
22-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
20+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
2321

2422
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
2523
echo "DRIVER_ARCH is $DRIVER_ARCH"
@@ -571,6 +569,27 @@ _find_vgpu_driver_version() {
571569
return 0
572570
}
573571

572+
_resolve_kernel_type() {
573+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
574+
KERNEL_TYPE=kernel
575+
return 0
576+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
577+
KERNEL_TYPE=kernel-open
578+
return 0
579+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
580+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
581+
if [ $? -ne 0 ]; then
582+
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
583+
tail -n 3 /var/log/gpu-driver-util.log
584+
return 1
585+
fi
586+
return 0
587+
else
588+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
589+
return 1
590+
fi
591+
}
592+
574593
_start_vgpu_topology_daemon() {
575594
type nvidia-topologyd > /dev/null 2>&1 || return 0
576595
echo "Starting nvidia-topologyd.."
@@ -797,5 +816,6 @@ if [ $# -ne 0 ]; then
797816
fi
798817

799818
_resolve_rhel_version || exit 1
819+
_resolve_kernel_type || exit 1
800820

801821
$command

ubuntu20.04/Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH
2828
WORKDIR /work
2929

3030
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
31-
cd driver/vgpu/src && \
32-
go build -o vgpu-util && \
33-
mv vgpu-util /work
31+
go build -C driver/vgpu/src -o vgpu-util && \
32+
mv driver/vgpu/src/vgpu-util /work && \
33+
go build -C driver/gpu-driver-util -o gpu-driver-util && \
34+
mv driver/gpu-driver-util/gpu-driver-util /work
3435

3536
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu20.04
3637

@@ -72,6 +73,7 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
7273
COPY nvidia-driver /usr/local/bin
7374

7475
COPY --from=build /work/vgpu-util /usr/local/bin
76+
COPY --from=build /work/gpu-driver-util /usr/local/bin
7577

7678
ADD drivers drivers/
7779

ubuntu20.04/nvidia-driver

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ NVIDIA_MODESET_MODULE_PARAMS=()
1616
NVIDIA_PEERMEM_MODULE_PARAMS=()
1717
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
1818

19-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
20-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
19+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
2120

2221
export DEBIAN_FRONTEND=noninteractive
2322

@@ -477,6 +476,27 @@ _shutdown() {
477476
return 1
478477
}
479478

479+
_resolve_kernel_type() {
480+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
481+
KERNEL_TYPE=kernel
482+
return 0
483+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
484+
KERNEL_TYPE=kernel-open
485+
return 0
486+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
487+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
488+
if [ $? -ne 0 ]; then
489+
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
490+
tail -n 3 /var/log/gpu-driver-util.log
491+
return 1
492+
fi
493+
return 0
494+
else
495+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
496+
return 1
497+
fi
498+
}
499+
480500
_find_vgpu_driver_version() {
481501
local count=""
482502
local version=""
@@ -520,6 +540,8 @@ init() {
520540
_find_vgpu_driver_version || exit 1
521541
fi
522542

543+
_resolve_kernel_type || exit 1
544+
523545
# Install the userspace components and copy the kernel module sources.
524546
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
525547
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
@@ -592,6 +614,8 @@ update() {
592614
fi
593615
exec 3>&-
594616

617+
_resolve_kernel_type || exit 1
618+
595619
# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
596620
# only non-vgpu driver types
597621
if [ "${DRIVER_TYPE}" != "vgpu" ]; then

ubuntu22.04/Dockerfile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH
2828
WORKDIR /work
2929

3030
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
31-
cd driver/vgpu/src && \
32-
go build -o vgpu-util && \
33-
mv vgpu-util /work
31+
go build -C driver/vgpu/src -o vgpu-util && \
32+
mv driver/vgpu/src/vgpu-util /work && \
33+
go build -C driver/gpu-driver-util -o gpu-driver-util && \
34+
mv driver/gpu-driver-util/gpu-driver-util /work
3435

3536
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04
3637

@@ -72,6 +73,7 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
7273
COPY nvidia-driver /usr/local/bin
7374

7475
COPY --from=build /work/vgpu-util /usr/local/bin
76+
COPY --from=build /work/gpu-driver-util /usr/local/bin
7577

7678
ADD drivers drivers/
7779

ubuntu22.04/nvidia-driver

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ NVIDIA_UVM_MODULE_PARAMS=()
1515
NVIDIA_MODESET_MODULE_PARAMS=()
1616
NVIDIA_PEERMEM_MODULE_PARAMS=()
1717
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
18-
19-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
20-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
18+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
2119

2220
export DEBIAN_FRONTEND=noninteractive
2321

@@ -481,6 +479,27 @@ _shutdown() {
481479
return 1
482480
}
483481

482+
_resolve_kernel_type() {
483+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
484+
KERNEL_TYPE=kernel
485+
return 0
486+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
487+
KERNEL_TYPE=kernel-open
488+
return 0
489+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
490+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
491+
if [ $? -ne 0 ]; then
492+
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
493+
tail -n 3 /var/log/gpu-driver-util.log
494+
return 1
495+
fi
496+
return 0
497+
else
498+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
499+
return 1
500+
fi
501+
}
502+
484503
_find_vgpu_driver_version() {
485504
local count=""
486505
local version=""
@@ -524,6 +543,8 @@ init() {
524543
_find_vgpu_driver_version || exit 1
525544
fi
526545

546+
_resolve_kernel_type || exit 1
547+
527548
# Install the userspace components and copy the kernel module sources.
528549
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
529550
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
@@ -596,6 +617,8 @@ update() {
596617
fi
597618
exec 3>&-
598619

620+
_resolve_kernel_type || exit 1
621+
599622
# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
600623
# only non-vgpu driver types
601624
if [ "${DRIVER_TYPE}" != "vgpu" ]; then

ubuntu22.04/precompiled/nvidia-driver

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
set -eu
55

66
KERNEL_VERSION=$(uname -r)
7-
OPEN_KERNEL_MODULES_ENABLED="${OPEN_KERNEL_MODULES_ENABLED:-false}"
7+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
88
RUN_DIR=/run/nvidia
99
PID_FILE=${RUN_DIR}/${0##*/}.pid
1010
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing driver version"}
@@ -96,6 +96,27 @@ _get_module_params() {
9696
fi
9797
}
9898

99+
_resolve_kernel_type() {
100+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
101+
KERNEL_TYPE=kernel
102+
return 0
103+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
104+
KERNEL_TYPE=kernel-open
105+
return 0
106+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
107+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
108+
if [ $? -ne 0 ]; then
109+
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
110+
tail -n 3 /var/log/gpu-driver-util.log
111+
return 1
112+
fi
113+
return 0
114+
else
115+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
116+
return 1
117+
fi
118+
}
119+
99120
# Load the kernel modules and start persistenced.
100121
_load_driver() {
101122
echo "Parsing kernel module parameters..."
@@ -245,7 +266,7 @@ _install_driver() {
245266
xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server
246267

247268
# Now install the precompiled kernel module packages signed by Canonical
248-
if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then
269+
if [ "${KERNEL_TYPE}" == "kernel-open" ]; then
249270
echo "Installing Open NVIDIA driver kernel modules..."
250271
apt-get install --no-install-recommends -y \
251272
linux-signatures-nvidia-${KERNEL_VERSION} \
@@ -293,6 +314,7 @@ init() {
293314
_unload_driver || exit 1
294315
_unmount_rootfs
295316

317+
_resolve_kernel_type || exit 1
296318
_install_driver
297319
_load_driver || exit 1
298320
_mount_rootfs

ubuntu24.04/Dockerfile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1818
git && \
1919
rm -rf /var/lib/apt/lists/*
2020

21-
22-
2321
# download appropriate binary based on the target architecture for multi-arch builds
2422
RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \
2523
curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \
@@ -30,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH
3028
WORKDIR /work
3129

3230
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
33-
cd driver/vgpu/src && \
34-
go build -o vgpu-util && \
35-
mv vgpu-util /work
31+
go build -C driver/vgpu/src -o vgpu-util && \
32+
mv driver/vgpu/src/vgpu-util /work && \
33+
go build -C driver/gpu-driver-util -o gpu-driver-util && \
34+
mv driver/gpu-driver-util/gpu-driver-util /work
3635

3736
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu24.04
3837

@@ -69,6 +68,7 @@ RUN usermod -o -u 0 -g 0 _apt && \
6968
COPY nvidia-driver /usr/local/bin
7069

7170
COPY --from=build /work/vgpu-util /usr/local/bin
71+
COPY --from=build /work/gpu-driver-util /usr/local/bin
7272

7373
ADD drivers drivers/
7474

0 commit comments

Comments
 (0)