Skip to content

Commit 6761ca7

Browse files
committed
integrate gpu-driver-util into the driver images
Signed-off-by: Tariq Ibrahim <[email protected]>
1 parent f7b59de commit 6761ca7

File tree

8 files changed

+149
-22
lines changed

8 files changed

+149
-22
lines changed

ubuntu20.04/Dockerfile

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH
2828
WORKDIR /work
2929

3030
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
31-
cd driver/vgpu/src && \
32-
go build -o vgpu-util && \
33-
mv vgpu-util /work
31+
go build -C driver/vgpu/src -o vgpu-util && \
32+
mv driver/vgpu/src/vgpu-util /work && \
33+
go build -C driver/gpu-driver-util -o gpu-driver-util && \
34+
mv driver/gpu-driver-util/gpu-driver-util /work
3435

3536
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu20.04
3637

@@ -69,9 +70,15 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
6970
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
7071
chmod +x /usr/local/bin/donkey
7172

73+
# Download the nvidia-driver-assistant to get the latest supported-gpus.json file
74+
RUN apt-get update && \
75+
apt-get install -y --no-install-recommends nvidia-driver-assistant && \
76+
rm -rf /var/lib/apt/lists/*
77+
7278
COPY nvidia-driver /usr/local/bin
7379

7480
COPY --from=build /work/vgpu-util /usr/local/bin
81+
COPY --from=build /work/gpu-driver-util /usr/local/bin
7582

7683
ADD drivers drivers/
7784

ubuntu20.04/nvidia-driver

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ NVIDIA_MODESET_MODULE_PARAMS=()
1616
NVIDIA_PEERMEM_MODULE_PARAMS=()
1717
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
1818

19-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
20-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
19+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
2120

2221
export DEBIAN_FRONTEND=noninteractive
2322

@@ -477,6 +476,26 @@ _shutdown() {
477476
return 1
478477
}
479478

479+
_resolve_kernel_type() {
480+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
481+
KERNEL_TYPE=kernel
482+
return 0
483+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
484+
KERNEL_TYPE=kernel-open
485+
return 0
486+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
487+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
488+
if [ $? -ne 0 ]; then
489+
echo "cannot autodetect the kernel module type, please check /var/log/gpu-driver-util.log for more details..."
490+
return 1
491+
fi
492+
return 0
493+
else
494+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
495+
return 1
496+
fi
497+
}
498+
480499
_find_vgpu_driver_version() {
481500
local count=""
482501
local version=""
@@ -520,6 +539,8 @@ init() {
520539
_find_vgpu_driver_version || exit 1
521540
fi
522541

542+
_resolve_kernel_type || exit 1
543+
523544
# Install the userspace components and copy the kernel module sources.
524545
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
525546
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
@@ -592,6 +613,8 @@ update() {
592613
fi
593614
exec 3>&-
594615

616+
_resolve_kernel_type || exit 1
617+
595618
# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
596619
# only non-vgpu driver types
597620
if [ "${DRIVER_TYPE}" != "vgpu" ]; then

ubuntu22.04/Dockerfile

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH
2828
WORKDIR /work
2929

3030
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
31-
cd driver/vgpu/src && \
32-
go build -o vgpu-util && \
33-
mv vgpu-util /work
31+
go build -C driver/vgpu/src -o vgpu-util && \
32+
mv driver/vgpu/src/vgpu-util /work && \
33+
go build -C driver/gpu-driver-util -o gpu-driver-util && \
34+
mv driver/gpu-driver-util/gpu-driver-util /work
3435

3536
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04
3637

@@ -69,9 +70,15 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
6970
curl -fsSL -o /usr/local/bin/donkey https://github.com/3XX0/donkey/releases/download/v1.1.0/donkey && \
7071
chmod +x /usr/local/bin/donkey
7172

73+
# Download the nvidia-driver-assistant to get the latest supported-gpus.json file
74+
RUN apt-get update && \
75+
apt-get install -y --no-install-recommends nvidia-driver-assistant && \
76+
rm -rf /var/lib/apt/lists/*
77+
7278
COPY nvidia-driver /usr/local/bin
7379

7480
COPY --from=build /work/vgpu-util /usr/local/bin
81+
COPY --from=build /work/gpu-driver-util /usr/local/bin
7582

7683
ADD drivers drivers/
7784

ubuntu22.04/nvidia-driver

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@ NVIDIA_UVM_MODULE_PARAMS=()
1515
NVIDIA_MODESET_MODULE_PARAMS=()
1616
NVIDIA_PEERMEM_MODULE_PARAMS=()
1717
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
18-
19-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
20-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
18+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
2119

2220
export DEBIAN_FRONTEND=noninteractive
2321

@@ -481,6 +479,26 @@ _shutdown() {
481479
return 1
482480
}
483481

482+
_resolve_kernel_type() {
483+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
484+
KERNEL_TYPE=kernel
485+
return 0
486+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
487+
KERNEL_TYPE=kernel-open
488+
return 0
489+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
490+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
491+
if [ $? -ne 0 ]; then
492+
echo "cannot autodetect the kernel module type, please check /var/log/gpu-driver-util.log for more details..."
493+
return 1
494+
fi
495+
return 0
496+
else
497+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
498+
return 1
499+
fi
500+
}
501+
484502
_find_vgpu_driver_version() {
485503
local count=""
486504
local version=""
@@ -524,6 +542,8 @@ init() {
524542
_find_vgpu_driver_version || exit 1
525543
fi
526544

545+
_resolve_kernel_type || exit 1
546+
527547
# Install the userspace components and copy the kernel module sources.
528548
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
529549
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
@@ -596,6 +616,8 @@ update() {
596616
fi
597617
exec 3>&-
598618

619+
_resolve_kernel_type || exit 1
620+
599621
# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
600622
# only non-vgpu driver types
601623
if [ "${DRIVER_TYPE}" != "vgpu" ]; then

ubuntu22.04/precompiled/nvidia-driver

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
set -eu
55

66
KERNEL_VERSION=$(uname -r)
7-
OPEN_KERNEL_MODULES_ENABLED="${OPEN_KERNEL_MODULES_ENABLED:-false}"
7+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
88
RUN_DIR=/run/nvidia
99
PID_FILE=${RUN_DIR}/${0##*/}.pid
1010
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing driver version"}
@@ -96,6 +96,26 @@ _get_module_params() {
9696
fi
9797
}
9898

99+
_resolve_kernel_type() {
100+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
101+
KERNEL_TYPE=kernel
102+
return 0
103+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
104+
KERNEL_TYPE=kernel-open
105+
return 0
106+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
107+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
108+
if [ $? -ne 0 ]; then
109+
echo "cannot autodetect the kernel module type, please check /var/log/gpu-driver-util.log for more details..."
110+
return 1
111+
fi
112+
return 0
113+
else
114+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
115+
return 1
116+
fi
117+
}
118+
99119
# Load the kernel modules and start persistenced.
100120
_load_driver() {
101121
echo "Parsing kernel module parameters..."
@@ -245,7 +265,7 @@ _install_driver() {
245265
xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server
246266

247267
# Now install the precompiled kernel module packages signed by Canonical
248-
if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then
268+
if [ "${KERNEL_TYPE}" == "kernel-open" ]; then
249269
echo "Installing Open NVIDIA driver kernel modules..."
250270
apt-get install --no-install-recommends -y \
251271
linux-signatures-nvidia-${KERNEL_VERSION} \
@@ -293,6 +313,7 @@ init() {
293313
_unload_driver || exit 1
294314
_unmount_rootfs
295315

316+
_resolve_kernel_type || exit 1
296317
_install_driver
297318
_load_driver || exit 1
298319
_mount_rootfs

ubuntu24.04/Dockerfile

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1818
git && \
1919
rm -rf /var/lib/apt/lists/*
2020

21-
22-
2321
# download appropriate binary based on the target architecture for multi-arch builds
2422
RUN OS_ARCH=${TARGETARCH/x86_64/amd64} && OS_ARCH=${OS_ARCH/aarch64/arm64} && \
2523
curl https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${OS_ARCH}.tar.gz \
@@ -30,9 +28,10 @@ ENV PATH /usr/local/go/bin:$PATH
3028
WORKDIR /work
3129

3230
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
33-
cd driver/vgpu/src && \
34-
go build -o vgpu-util && \
35-
mv vgpu-util /work
31+
go build -C driver/vgpu/src -o vgpu-util && \
32+
mv driver/vgpu/src/vgpu-util /work && \
33+
go build -C driver/gpu-driver-util -o gpu-driver-util && \
34+
mv driver/gpu-driver-util/gpu-driver-util /work
3635

3736
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu24.04
3837

@@ -66,9 +65,15 @@ ADD install.sh /tmp
6665
RUN usermod -o -u 0 -g 0 _apt && \
6766
/tmp/install.sh depinstall && /tmp/install.sh setup_cuda_repo
6867

68+
# Download the nvidia-driver-assistant to get the latest supported-gpus.json file
69+
RUN apt-get update && \
70+
apt-get install -y --no-install-recommends nvidia-driver-assistant && \
71+
rm -rf /var/lib/apt/lists/*
72+
6973
COPY nvidia-driver /usr/local/bin
7074

7175
COPY --from=build /work/vgpu-util /usr/local/bin
76+
COPY --from=build /work/gpu-driver-util /usr/local/bin
7277

7378
ADD drivers drivers/
7479

ubuntu24.04/nvidia-driver

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,7 @@ NVIDIA_MODESET_MODULE_PARAMS=()
1616
NVIDIA_PEERMEM_MODULE_PARAMS=()
1717
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
1818

19-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
20-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
19+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
2120

2221
export DEBIAN_FRONTEND=noninteractive
2322

@@ -45,6 +44,27 @@ _update_ca_certificates() {
4544
fi
4645
}
4746

47+
_resolve_kernel_type() {
48+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
49+
KERNEL_TYPE=kernel
50+
return 0
51+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
52+
KERNEL_TYPE=kernel-open
53+
return 0
54+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
55+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
56+
if [ $? -ne 0 ]; then
57+
echo "cannot autodetect the kernel module type, please check /var/log/gpu-driver-util.log for more details..."
58+
return 1
59+
fi
60+
return 0
61+
else
62+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
63+
return 1
64+
fi
65+
}
66+
67+
4868
# Resolve the kernel version to the form major.minor.patch-revision-flavor where flavor defaults to generic.
4969
_resolve_kernel_version() {
5070
local version=$(apt-cache show "linux-headers-${KERNEL_VERSION}" 2> /dev/null | \
@@ -486,6 +506,7 @@ init() {
486506

487507
_update_ca_certificates
488508
_update_package_cache
509+
_resolve_kernel_type || exit 1
489510
_resolve_kernel_version || exit 1
490511
_install_prerequisites
491512
_link_ofa_kernel

ubuntu24.04/precompiled/nvidia-driver

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
set -eu
55

66
KERNEL_VERSION=$(uname -r)
7-
OPEN_KERNEL_MODULES_ENABLED="${OPEN_KERNEL_MODULES_ENABLED:-false}"
7+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
88
RUN_DIR=/run/nvidia
99
PID_FILE=${RUN_DIR}/${0##*/}.pid
1010
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing driver version"}
@@ -96,6 +96,26 @@ _get_module_params() {
9696
fi
9797
}
9898

99+
_resolve_kernel_type() {
100+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
101+
KERNEL_TYPE=kernel
102+
return 0
103+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
104+
KERNEL_TYPE=kernel-open
105+
return 0
106+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
107+
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
108+
if [ $? -ne 0 ]; then
109+
echo "cannot autodetect the kernel module type, please check /var/log/gpu-driver-util.log for more details..."
110+
return 1
111+
fi
112+
return 0
113+
else
114+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
115+
return 1
116+
fi
117+
}
118+
99119
# Load the kernel modules and start persistenced.
100120
_load_driver() {
101121
echo "Parsing kernel module parameters..."
@@ -245,7 +265,7 @@ _install_driver() {
245265
xserver-xorg-video-nvidia-${DRIVER_BRANCH}-server
246266

247267
# Now install the precompiled kernel module packages signed by Canonical
248-
if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then
268+
if [ "${KERNEL_TYPE}" == "kernel-open" ]; then
249269
echo "Installing Open NVIDIA driver kernel modules..."
250270
apt-get install --no-install-recommends -y \
251271
linux-signatures-nvidia-${KERNEL_VERSION} \
@@ -293,6 +313,7 @@ init() {
293313
_unload_driver || exit 1
294314
_unmount_rootfs
295315

316+
_resolve_kernel_type || exit 1
296317
_install_driver
297318
_load_driver || exit 1
298319
_mount_rootfs

0 commit comments

Comments
 (0)