Skip to content

Commit 8e414e2

Browse files
authored
Merge pull request #199 from NVIDIA/resolve-kernel-module-type
Add KERNEL_MODULE_TYPE envvar to driver container
2 parents fe65aa7 + ec13669 commit 8e414e2

File tree

6 files changed

+223
-31
lines changed

6 files changed

+223
-31
lines changed

rhel8/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ SHELL ["/bin/bash", "-c"]
3232
ARG BASE_URL=https://us.download.nvidia.com/tesla
3333
ARG DRIVER_VERSION
3434
ENV DRIVER_VERSION=$DRIVER_VERSION
35+
ARG DRIVER_BRANCH
36+
ENV DRIVER_BRANCH=$DRIVER_BRANCH
3537

3638
# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
3739
ARG DRIVER_TYPE=passthrough

rhel8/nvidia-driver

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ DNF_RELEASEVER=${DNF_RELEASEVER:-""}
1818
RHEL_VERSION=${RHEL_VERSION:-""}
1919
RHEL_MAJOR_VERSION=8
2020

21-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
22-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
21+
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-}
22+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-}
2323

2424
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
2525
echo "DRIVER_ARCH is $DRIVER_ARCH"
@@ -535,6 +535,43 @@ _shutdown() {
535535
return 1
536536
}
537537

538+
# _resolve_kernel_type determines which kernel module type, open or proprietary, to install.
539+
# This function assumes that the nvidia-installer binary is in the PATH, so this function
540+
# should only be invoked after the userspace driver components have been installed.
541+
#
542+
# KERNEL_MODULE_TYPE is the frontend interface that users can use to configure which module
543+
# to install. Valid values for KERNEL_MODULE_TYPE are 'auto' (default), 'open', and 'proprietary'.
544+
# When 'auto' is configured, we use the nvidia-installer to recommend the module type to install.
545+
_resolve_kernel_type() {
546+
# For backwards compatibility with older GPU Operator versions where KERNEL_MODULE_TYPE is not set,
547+
# honor the deprecated OPEN_KERNEL_MODULES_ENABLED field
548+
if [ -z "${KERNEL_MODULE_TYPE}" ]; then
549+
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
550+
return 0
551+
fi
552+
553+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
554+
KERNEL_TYPE=kernel
555+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
556+
KERNEL_TYPE=kernel-open
557+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
558+
kernel_module_type=$(nvidia-installer --print-recommended-kernel-module-type)
559+
if [ $? -ne 0 ]; then
560+
echo "failed to retrieve the recommended kernel module type from nvidia-installer, falling back to using the driver branch"
561+
_resolve_kernel_type_from_driver_branch
562+
return 0
563+
fi
564+
[[ "${kernel_module_type}" == "open" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
565+
else
566+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
567+
return 1
568+
fi
569+
}
570+
571+
_resolve_kernel_type_from_driver_branch() {
572+
[[ "${DRIVER_BRANCH}" -lt 560 ]] && KERNEL_TYPE=kernel || KERNEL_TYPE=kernel-open
573+
}
574+
538575
_find_vgpu_driver_version() {
539576
local count=""
540577
local version=""
@@ -582,11 +619,16 @@ _prepare() {
582619
_find_vgpu_driver_version || exit 1
583620
fi
584621

585-
# Install the userspace components and copy the kernel module sources.
622+
# Install the userspace components
586623
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
587624
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
588-
sh /tmp/install.sh nvinstall && \
589-
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
625+
sh /tmp/install.sh nvinstall
626+
627+
# Determine the kernel module type
628+
_resolve_kernel_type || exit 1
629+
630+
# Copy the kernel module sources
631+
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
590632
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-$DRIVER_VERSION && \
591633
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION/.manifest
592634

@@ -674,12 +716,15 @@ update() {
674716
# vgpu driver version is chosen dynamically during runtime, so pre-compile modules for
675717
# only non-vgpu driver types
676718
if [ "${DRIVER_TYPE}" != "vgpu" ]; then
677-
# Install the userspace components and copy the kernel module sources.
678719
if [ ! -e /usr/src/nvidia-${DRIVER_VERSION}/mkprecompiled ]; then
720+
# Install the userspace components
679721
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
680722
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
681-
sh /tmp/install.sh nvinstall && \
682-
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
723+
sh /tmp/install.sh nvinstall
724+
# Determine the kernel module type
725+
_resolve_kernel_type || exit 1
726+
# Copy the kernel module sources
727+
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
683728
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-$DRIVER_VERSION && \
684729
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION/.manifest
685730
fi
@@ -694,6 +739,8 @@ update() {
694739
_resolve_kernel_version || exit 1
695740
_install_prerequisites
696741
if _kernel_requires_package; then
742+
# ensure KERNEL_TYPE is set before compiling kernel modules
743+
[[ -n "${KERNEL_TYPE}" ]] || _resolve_kernel_type || exit 1
697744
_create_driver_package
698745
fi
699746
_remove_prerequisites

rhel9/Dockerfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ SHELL ["/bin/bash", "-c"]
3232
ARG BASE_URL=https://us.download.nvidia.com/tesla
3333
ARG DRIVER_VERSION
3434
ENV DRIVER_VERSION=$DRIVER_VERSION
35+
ARG DRIVER_BRANCH
36+
ENV DRIVER_BRANCH=$DRIVER_BRANCH
3537

3638
# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
3739
ARG DRIVER_TYPE=passthrough

rhel9/nvidia-driver

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ DNF_RELEASEVER=${DNF_RELEASEVER:-""}
1818
RHEL_VERSION=${RHEL_VERSION:-""}
1919
RHEL_MAJOR_VERSION=9
2020

21-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
22-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
21+
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-}
22+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-}
2323

2424
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
2525
echo "DRIVER_ARCH is $DRIVER_ARCH"
@@ -535,6 +535,43 @@ _shutdown() {
535535
return 1
536536
}
537537

538+
# _resolve_kernel_type determines which kernel module type, open or proprietary, to install.
539+
# This function assumes that the nvidia-installer binary is in the PATH, so this function
540+
# should only be invoked after the userspace driver components have been installed.
541+
#
542+
# KERNEL_MODULE_TYPE is the frontend interface that users can use to configure which module
543+
# to install. Valid values for KERNEL_MODULE_TYPE are 'auto' (default), 'open', and 'proprietary'.
544+
# When 'auto' is configured, we use the nvidia-installer to recommend the module type to install.
545+
_resolve_kernel_type() {
546+
# For backwards compatibility with older GPU Operator versions where KERNEL_MODULE_TYPE is not set,
547+
# honor the deprecated OPEN_KERNEL_MODULES_ENABLED field
548+
if [ -z "${KERNEL_MODULE_TYPE}" ]; then
549+
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
550+
return 0
551+
fi
552+
553+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
554+
KERNEL_TYPE=kernel
555+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
556+
KERNEL_TYPE=kernel-open
557+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
558+
kernel_module_type=$(nvidia-installer --print-recommended-kernel-module-type)
559+
if [ $? -ne 0 ]; then
560+
echo "failed to retrieve the recommended kernel module type from nvidia-installer, falling back to using the driver branch"
561+
_resolve_kernel_type_from_driver_branch
562+
return 0
563+
fi
564+
[[ "${kernel_module_type}" == "open" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
565+
else
566+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
567+
return 1
568+
fi
569+
}
570+
571+
_resolve_kernel_type_from_driver_branch() {
572+
[[ "${DRIVER_BRANCH}" -lt 560 ]] && KERNEL_TYPE=kernel || KERNEL_TYPE=kernel-open
573+
}
574+
538575
_find_vgpu_driver_version() {
539576
local count=""
540577
local version=""
@@ -585,8 +622,13 @@ _prepare() {
585622
# Install the userspace components and copy the kernel module sources.
586623
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
587624
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
588-
sh /tmp/install.sh nvinstall && \
589-
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
625+
sh /tmp/install.sh nvinstall
626+
627+
# Determine the kernel module type
628+
_resolve_kernel_type || exit 1
629+
630+
# Copy the kernel module sources
631+
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
590632
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-$DRIVER_VERSION && \
591633
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION/.manifest
592634

@@ -674,12 +716,15 @@ update() {
674716
# vgpu driver version is chosen dynamically during runtime, so pre-compile modules for
675717
# only non-vgpu driver types
676718
if [ "${DRIVER_TYPE}" != "vgpu" ]; then
677-
# Install the userspace components and copy the kernel module sources.
678719
if [ ! -e /usr/src/nvidia-${DRIVER_VERSION}/mkprecompiled ]; then
720+
# Install the userspace components
679721
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
680722
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
681-
sh /tmp/install.sh nvinstall && \
682-
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
723+
sh /tmp/install.sh nvinstall
724+
# Determine the kernel module type
725+
_resolve_kernel_type || exit 1
726+
# Copy the kernel module sources
727+
mkdir -p /usr/src/nvidia-$DRIVER_VERSION && \
683728
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-$DRIVER_VERSION && \
684729
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-$DRIVER_VERSION/.manifest
685730
fi
@@ -694,6 +739,8 @@ update() {
694739
_resolve_kernel_version || exit 1
695740
_install_prerequisites
696741
if _kernel_requires_package; then
742+
# ensure KERNEL_TYPE is set before compiling kernel modules
743+
[[ -n "${KERNEL_TYPE}" ]] || _resolve_kernel_type || exit 1
697744
_create_driver_package
698745
fi
699746
_remove_prerequisites

ubuntu20.04/nvidia-driver

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ NVIDIA_MODESET_MODULE_PARAMS=()
1616
NVIDIA_PEERMEM_MODULE_PARAMS=()
1717
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
1818

19-
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
20-
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
19+
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-}
20+
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-}
2121

2222
export DEBIAN_FRONTEND=noninteractive
2323

@@ -477,6 +477,43 @@ _shutdown() {
477477
return 1
478478
}
479479

480+
# _resolve_kernel_type determines which kernel module type, open or proprietary, to install.
481+
# This function assumes that the nvidia-installer binary is in the PATH, so this function
482+
# should only be invoked after the userspace driver components have been installed.
483+
#
484+
# KERNEL_MODULE_TYPE is the frontend interface that users can use to configure which module
485+
# to install. Valid values for KERNEL_MODULE_TYPE are 'auto' (default), 'open', and 'proprietary'.
486+
# When 'auto' is configured, we use the nvidia-installer to recommend the module type to install.
487+
_resolve_kernel_type() {
488+
# For backwards compatibility with older GPU Operator versions where KERNEL_MODULE_TYPE is not set,
489+
# honor the deprecated OPEN_KERNEL_MODULES_ENABLED field
490+
if [ -z "${KERNEL_MODULE_TYPE}" ]; then
491+
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
492+
return 0
493+
fi
494+
495+
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
496+
KERNEL_TYPE=kernel
497+
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
498+
KERNEL_TYPE=kernel-open
499+
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
500+
kernel_module_type=$(nvidia-installer --print-recommended-kernel-module-type)
501+
if [ $? -ne 0 ]; then
502+
echo "failed to retrieve the recommended kernel module type from nvidia-installer, falling back to using the driver branch"
503+
_resolve_kernel_type_from_driver_branch
504+
return 0
505+
fi
506+
[[ "${kernel_module_type}" == "open" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
507+
else
508+
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
509+
return 1
510+
fi
511+
}
512+
513+
_resolve_kernel_type_from_driver_branch() {
514+
[[ "${DRIVER_BRANCH}" -lt 560 ]] && KERNEL_TYPE=kernel || KERNEL_TYPE=kernel-open
515+
}
516+
480517
_find_vgpu_driver_version() {
481518
local count=""
482519
local version=""
@@ -520,7 +557,7 @@ init() {
520557
_find_vgpu_driver_version || exit 1
521558
fi
522559

523-
# Install the userspace components and copy the kernel module sources.
560+
# Install the userspace components
524561
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
525562
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
526563
./nvidia-installer --silent \
@@ -535,8 +572,13 @@ init() {
535572
--x-prefix=/tmp/null \
536573
--x-module-path=/tmp/null \
537574
--x-library-path=/tmp/null \
538-
--x-sysconfig-path=/tmp/null && \
539-
mkdir -p /usr/src/nvidia-${DRIVER_VERSION} && \
575+
--x-sysconfig-path=/tmp/null
576+
577+
# Determine the kernel module type
578+
_resolve_kernel_type || exit 1
579+
580+
# Copy the kernel module sources
581+
mkdir -p /usr/src/nvidia-${DRIVER_VERSION} && \
540582
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION} && \
541583
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
542584

@@ -595,8 +637,8 @@ update() {
595637
# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
596638
# only non-vgpu driver types
597639
if [ "${DRIVER_TYPE}" != "vgpu" ]; then
598-
# Install the userspace components and copy the kernel module sources.
599640
if [ ! -e /usr/src/nvidia-${DRIVER_VERSION}/mkprecompiled ]; then
641+
# Install the userspace components
600642
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
601643
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
602644
./nvidia-installer --silent \
@@ -611,8 +653,11 @@ update() {
611653
--x-prefix=/tmp/null \
612654
--x-module-path=/tmp/null \
613655
--x-library-path=/tmp/null \
614-
--x-sysconfig-path=/tmp/null && \
615-
mkdir -p /usr/src/nvidia-${DRIVER_VERSION} && \
656+
--x-sysconfig-path=/tmp/null
657+
# Determine the kernel module type
658+
_resolve_kernel_type || exit 1
659+
# Copy the kernel module sources
660+
mkdir -p /usr/src/nvidia-${DRIVER_VERSION} && \
616661
mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION} && \
617662
sed '9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION}/.manifest
618663
fi
@@ -627,6 +672,8 @@ update() {
627672
_resolve_kernel_version || exit 1
628673
_install_prerequisites
629674
if _kernel_requires_package; then
675+
# ensure KERNEL_TYPE is set before compiling kernel modules
676+
[[ -n "${KERNEL_TYPE}" ]] || _resolve_kernel_type || exit 1
630677
_create_driver_package
631678
fi
632679
_remove_prerequisites

0 commit comments

Comments
 (0)