Skip to content

Commit a2ca287

Browse files
authored
Merge pull request #289 from NVIDIA/precompile-kernel-mod-type
2 parents ffff66b + 152c342 commit a2ca287

File tree

6 files changed

+106
-4
lines changed

6 files changed

+106
-4
lines changed

ubuntu22.04/precompiled/Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ FROM nvcr.io/nvidia/cuda:12.8.1-base-ubuntu22.04
22

33
ENV DEBIAN_FRONTEND=noninteractive
44

5+
ARG BASE_URL=https://us.download.nvidia.com/tesla
6+
ARG TARGETARCH
7+
ENV TARGETARCH=$TARGETARCH
58
ARG DRIVER_BRANCH=535
69
ENV DRIVER_BRANCH=$DRIVER_BRANCH
710
ARG DRIVER_VERSION=535.230.02
@@ -60,6 +63,7 @@ ADD local-repo.sh /tmp
6063
RUN mkdir -p /usr/local/repos && \
6164
/tmp/local-repo.sh download_driver_package_deps && \
6265
/tmp/local-repo.sh build_local_apt_repo && \
66+
/tmp/local-repo.sh fetch_nvidia_installer && \
6367
# Remove cuda repository to avoid GPG errors
6468
rm -f /etc/apt/sources.list.d/cuda*
6569

ubuntu22.04/precompiled/local-repo.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
set -eu
44

55
LOCAL_REPO_DIR=/usr/local/repos
6+
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
7+
DRIVER_RUN_FILE=NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION
68

79
download_apt_with_dep () {
810
local package="$1"
@@ -40,10 +42,21 @@ build_local_apt_repo () {
4042
apt-get update
4143
}
4244

45+
fetch_nvidia_installer () {
46+
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/$DRIVER_RUN_FILE.run
47+
chmod +x $DRIVER_RUN_FILE.run
48+
sh $DRIVER_RUN_FILE.run -x
49+
mv $DRIVER_RUN_FILE/nvidia-installer /usr/bin/
50+
rm -rf $DRIVER_RUN_FILE
51+
rm $DRIVER_RUN_FILE.run
52+
}
53+
4354
if [ "$1" = "download_driver_package_deps" ]; then
4455
download_driver_package_deps
4556
elif [ "$1" = "build_local_apt_repo" ]; then
4657
build_local_apt_repo
58+
elif [ "$1" = "fetch_nvidia_installer" ]; then
59+
fetch_nvidia_installer
4760
else
4861
echo "Unknown function: $1"
4962
exit 1

ubuntu22.04/precompiled/nvidia-driver

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
set -eu
55

66
KERNEL_VERSION=$(uname -r)
7-
OPEN_KERNEL_MODULES_ENABLED="${OPEN_KERNEL_MODULES_ENABLED:-false}"
87
RUN_DIR=/run/nvidia
98
PID_FILE=${RUN_DIR}/${0##*/}.pid
109
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing driver version"}
@@ -14,6 +13,8 @@ NVIDIA_MODULE_PARAMS=()
1413
NVIDIA_UVM_MODULE_PARAMS=()
1514
NVIDIA_MODESET_MODULE_PARAMS=()
1615
NVIDIA_PEERMEM_MODULE_PARAMS=()
16+
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
17+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
1718

1819
_update_package_cache() {
1920
if [ "${PACKAGE_TAG:-}" != "builtin" ]; then
@@ -232,6 +233,36 @@ _unload_driver() {
232233
return 0
233234
}
234235

236+
_resolve_kernel_type_from_driver_branch() {
237+
[[ "${DRIVER_BRANCH}" -lt 560 ]] && KERNEL_TYPE=kernel || KERNEL_TYPE=kernel-open
238+
}
239+
240+
# _resolve_kernel_type determines which kernel module type, open or proprietary, to install.
241+
# This function assumes that the nvidia-installer binary is in the PATH, so this function
242+
# should only be invoked after the userspace driver components have been installed.
243+
#
244+
# KERNEL_MODULE_TYPE is the frontend interface that users can use to configure which module
245+
# to install. Valid values for KERNEL_MODULE_TYPE are 'auto' (default), 'open', and 'proprietary'.
246+
# When 'auto' is configured, we use the nvidia-installer to recommend the module type to install.
247+
_resolve_kernel_type() {
248+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
249+
KERNEL_TYPE=kernel
250+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
251+
KERNEL_TYPE=kernel-open
252+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
253+
kernel_module_type=$(nvidia-installer --print-recommended-kernel-module-type 2> /dev/null)
254+
if [ $? -ne 0 ]; then
255+
echo "failed to retrieve the recommended kernel module type from nvidia-installer, falling back to using the driver branch"
256+
_resolve_kernel_type_from_driver_branch
257+
return 0
258+
fi
259+
[[ "${kernel_module_type}" == "open" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
260+
else
261+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
262+
return 1
263+
fi
264+
}
265+
235266
# Link and install the kernel modules from a precompiled packages
236267
_install_driver() {
237268
# Install necessary driver userspace packages
@@ -244,7 +275,7 @@ _install_driver() {
244275
libnvidia-fbc1-${DRIVER_BRANCH}-server
245276

246277
# Now install the precompiled kernel module packages signed by Canonical
247-
if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then
278+
if [ "$KERNEL_TYPE" = "kernel-open" ]; then
248279
echo "Installing Open NVIDIA driver kernel modules..."
249280
apt-get install --no-install-recommends -y \
250281
linux-signatures-nvidia-${KERNEL_VERSION} \
@@ -276,6 +307,9 @@ _unmount_rootfs() {
276307
}
277308

278309
init() {
310+
# Determine the kernel module type
311+
_resolve_kernel_type || exit 1
312+
279313
echo -e "\n========== NVIDIA Software Installer ==========\n"
280314
echo -e "Starting installation of NVIDIA driver branch ${DRIVER_BRANCH} for Linux kernel version ${KERNEL_VERSION}\n"
281315

ubuntu24.04/precompiled/Dockerfile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ FROM nvcr.io/nvidia/cuda:12.8.1-base-ubuntu24.04
22

33
ENV DEBIAN_FRONTEND=noninteractive
44

5+
ARG BASE_URL=https://us.download.nvidia.com/tesla
6+
ARG TARGETARCH
7+
ENV TARGETARCH=$TARGETARCH
58
ARG DRIVER_BRANCH=550
69
ENV DRIVER_BRANCH=$DRIVER_BRANCH
710
ARG DRIVER_VERSION=550.90.12
@@ -55,6 +58,7 @@ ADD local-repo.sh /tmp
5558
RUN mkdir -p /usr/local/repos && \
5659
/tmp/local-repo.sh download_driver_package_deps && \
5760
/tmp/local-repo.sh build_local_apt_repo && \
61+
/tmp/local-repo.sh fetch_nvidia_installer && \
5862
# Remove all other ubuntu apt sources to ensure we only pull from the local apt repo
5963
rm /etc/apt/sources.list.d/*
6064

ubuntu24.04/precompiled/local-repo.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
set -eu
44

55
LOCAL_REPO_DIR=/usr/local/repos
6+
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
7+
DRIVER_RUN_FILE=NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION
68

79
download_apt_with_dep () {
810
local package="$1"
@@ -40,10 +42,21 @@ build_local_apt_repo () {
4042
apt-get update
4143
}
4244

45+
fetch_nvidia_installer () {
46+
curl -fSsl -O $BASE_URL/$DRIVER_VERSION/$DRIVER_RUN_FILE.run
47+
chmod +x $DRIVER_RUN_FILE.run
48+
sh $DRIVER_RUN_FILE.run -x
49+
mv $DRIVER_RUN_FILE/nvidia-installer /usr/bin/
50+
rm -rf $DRIVER_RUN_FILE
51+
rm $DRIVER_RUN_FILE.run
52+
}
53+
4354
if [ "$1" = "download_driver_package_deps" ]; then
4455
download_driver_package_deps
4556
elif [ "$1" = "build_local_apt_repo" ]; then
4657
build_local_apt_repo
58+
elif [ "$1" = "fetch_nvidia_installer" ]; then
59+
fetch_nvidia_installer
4760
else
4861
echo "Unknown function: $1"
4962
exit 1

ubuntu24.04/precompiled/nvidia-driver

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
set -eu
55

66
KERNEL_VERSION=$(uname -r)
7-
OPEN_KERNEL_MODULES_ENABLED="${OPEN_KERNEL_MODULES_ENABLED:-false}"
87
RUN_DIR=/run/nvidia
98
PID_FILE=${RUN_DIR}/${0##*/}.pid
109
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing driver version"}
@@ -14,6 +13,8 @@ NVIDIA_MODULE_PARAMS=()
1413
NVIDIA_UVM_MODULE_PARAMS=()
1514
NVIDIA_MODESET_MODULE_PARAMS=()
1615
NVIDIA_PEERMEM_MODULE_PARAMS=()
16+
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
17+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
1718

1819
_update_package_cache() {
1920
if [ "${PACKAGE_TAG:-}" != "builtin" ]; then
@@ -232,6 +233,36 @@ _unload_driver() {
232233
return 0
233234
}
234235

236+
_resolve_kernel_type_from_driver_branch() {
237+
[[ "${DRIVER_BRANCH}" -lt 560 ]] && KERNEL_TYPE=kernel || KERNEL_TYPE=kernel-open
238+
}
239+
240+
# _resolve_kernel_type determines which kernel module type, open or proprietary, to install.
241+
# This function assumes that the nvidia-installer binary is in the PATH, so this function
242+
# should only be invoked after the userspace driver components have been installed.
243+
#
244+
# KERNEL_MODULE_TYPE is the frontend interface that users can use to configure which module
245+
# to install. Valid values for KERNEL_MODULE_TYPE are 'auto' (default), 'open', and 'proprietary'.
246+
# When 'auto' is configured, we use the nvidia-installer to recommend the module type to install.
247+
_resolve_kernel_type() {
248+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
249+
KERNEL_TYPE=kernel
250+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
251+
KERNEL_TYPE=kernel-open
252+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
253+
kernel_module_type=$(nvidia-installer --print-recommended-kernel-module-type 2> /dev/null)
254+
if [ $? -ne 0 ]; then
255+
echo "failed to retrieve the recommended kernel module type from nvidia-installer, falling back to using the driver branch"
256+
_resolve_kernel_type_from_driver_branch
257+
return 0
258+
fi
259+
[[ "${kernel_module_type}" == "open" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
260+
else
261+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
262+
return 1
263+
fi
264+
}
265+
235266
# Link and install the kernel modules from a precompiled packages
236267
_install_driver() {
237268
# Install necessary driver userspace packages
@@ -244,7 +275,7 @@ _install_driver() {
244275
libnvidia-fbc1-${DRIVER_BRANCH}-server
245276

246277
# Now install the precompiled kernel module packages signed by Canonical
247-
if [ "$OPEN_KERNEL_MODULES_ENABLED" = true ]; then
278+
if [ "$KERNEL_TYPE" = "kernel-open" ]; then
248279
echo "Installing Open NVIDIA driver kernel modules..."
249280
apt-get install --no-install-recommends -y \
250281
linux-signatures-nvidia-${KERNEL_VERSION} \
@@ -276,6 +307,9 @@ _unmount_rootfs() {
276307
}
277308

278309
init() {
310+
# Determine the kernel module type
311+
_resolve_kernel_type || exit 1
312+
279313
echo -e "\n========== NVIDIA Software Installer ==========\n"
280314
echo -e "Starting installation of NVIDIA driver branch ${DRIVER_BRANCH} for Linux kernel version ${KERNEL_VERSION}\n"
281315

0 commit comments

Comments
 (0)