Skip to content

Commit 3708e6a

Browse files
committed
Revert "integrate gpu-driver-util into the driver images"
This reverts commit 29fc943.
1 parent c6d19b5 commit 3708e6a

File tree

14 files changed

+27
-192
lines changed

14 files changed

+27
-192
lines changed

rhel8/Dockerfile

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@ ENV PATH /usr/local/go/bin:$PATH
1717
WORKDIR /work
1818

1919
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
20-
go build -C driver/vgpu/src -o vgpu-util && \
21-
mv driver/vgpu/src/vgpu-util /work && \
22-
go build -C driver/gpu-driver-util -o gpu-driver-util && \
23-
mv driver/gpu-driver-util/gpu-driver-util /work
20+
cd driver/vgpu/src && \
21+
go build -o vgpu-util && \
22+
mv vgpu-util /work
2423

2524
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi8
2625

@@ -37,8 +36,6 @@ ENV DRIVER_VERSION=$DRIVER_VERSION
3736
# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
3837
ARG DRIVER_TYPE=passthrough
3938
ENV DRIVER_TYPE=$DRIVER_TYPE
40-
ARG DRIVER_BRANCH=550
41-
ENV DRIVER_BRANCH=$DRIVER_BRANCH
4239
ARG VGPU_LICENSE_SERVER_TYPE=NLS
4340
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
4441
# Enable vGPU version compability check by default
@@ -87,7 +84,6 @@ COPY ocp_dtk_entrypoint /usr/local/bin
8784
COPY common.sh /usr/local/bin
8885

8986
COPY --from=build /work/vgpu-util /usr/local/bin
90-
COPY --from=build /work/gpu-driver-util /usr/local/bin
9187

9288
WORKDIR /drivers
9389

rhel8/nvidia-driver

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ set -eu
66
RUN_DIR=/run/nvidia
77
PID_FILE=${RUN_DIR}/${0##*/}.pid
88
DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"}
9-
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing DRIVER_BRANCH env"}
109
KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
1110
NUM_VGPU_DEVICES=0
1211
NVIDIA_MODULE_PARAMS=()
@@ -18,7 +17,9 @@ USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
1817
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
1918
RHEL_VERSION=${RHEL_VERSION:-""}
2019
RHEL_MAJOR_VERSION=8
21-
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
20+
21+
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
22+
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
2223

2324
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
2425
echo "DRIVER_ARCH is $DRIVER_ARCH"
@@ -576,24 +577,6 @@ _start_vgpu_topology_daemon() {
576577
nvidia-topologyd
577578
}
578579

579-
_resolve_kernel_type() {
580-
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
581-
KERNEL_TYPE=kernel
582-
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
583-
KERNEL_TYPE=kernel-open
584-
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
585-
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
586-
if [ $? -ne 0 ]; then
587-
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
588-
tail -n 3 /var/log/gpu-driver-util.log
589-
return 1
590-
fi
591-
else
592-
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
593-
return 1
594-
fi
595-
}
596-
597580
_prepare() {
598581
if [ "${DRIVER_TYPE}" = "vgpu" ]; then
599582
_find_vgpu_driver_version || exit 1
@@ -814,6 +797,5 @@ if [ $# -ne 0 ]; then
814797
fi
815798

816799
_resolve_rhel_version || exit 1
817-
_resolve_kernel_type || exit 1
818800

819801
$command

rhel8/ocp_dtk_entrypoint

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ nv-ctr-run-with-dtk() {
2525
/usr/local/bin/nvidia-driver \
2626
/usr/local/bin/common.sh \
2727
/usr/local/bin/extract-vmlinux \
28-
/usr/local/bin/gpu-driver-util \
2928
/usr/local/bin/vgpu-util \
3029
/drivers \
3130
/licenses \
@@ -137,7 +136,6 @@ dtk-build-driver() {
137136
"$DRIVER_TOOLKIT_SHARED_DIR/nvidia-driver" \
138137
"$DRIVER_TOOLKIT_SHARED_DIR/common.sh" \
139138
"$DRIVER_TOOLKIT_SHARED_DIR/extract-vmlinux" \
140-
"$DRIVER_TOOLKIT_SHARED_DIR/gpu-driver-util" \
141139
"$DRIVER_TOOLKIT_SHARED_DIR/vgpu-util" \
142140
"${DRIVER_TOOLKIT_SHARED_DIR}/bin"
143141

rhel9/Dockerfile

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,9 @@ ENV PATH /usr/local/go/bin:$PATH
1717
WORKDIR /work
1818

1919
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
20-
go build -C driver/vgpu/src -o vgpu-util && \
21-
mv driver/vgpu/src/vgpu-util /work && \
22-
go build -C driver/gpu-driver-util -o gpu-driver-util && \
23-
mv driver/gpu-driver-util/gpu-driver-util /work
20+
cd driver/vgpu/src && \
21+
go build -o vgpu-util && \
22+
mv vgpu-util /work
2423

2524
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubi9
2625

@@ -37,8 +36,6 @@ ENV DRIVER_VERSION=$DRIVER_VERSION
3736
# Arg to indicate if driver type is either of passthrough/baremetal or vgpu
3837
ARG DRIVER_TYPE=passthrough
3938
ENV DRIVER_TYPE=$DRIVER_TYPE
40-
ARG DRIVER_BRANCH=550
41-
ENV DRIVER_BRANCH=$DRIVER_BRANCH
4239
ARG VGPU_LICENSE_SERVER_TYPE=NLS
4340
ENV VGPU_LICENSE_SERVER_TYPE=$VGPU_LICENSE_SERVER_TYPE
4441
# Enable vGPU version compability check by default
@@ -81,7 +78,6 @@ COPY ocp_dtk_entrypoint /usr/local/bin
8178
COPY common.sh /usr/local/bin
8279

8380
COPY --from=build /work/vgpu-util /usr/local/bin
84-
COPY --from=build /work/gpu-driver-util /usr/local/bin
8581

8682
WORKDIR /drivers
8783

rhel9/nvidia-driver

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ set -eu
66
RUN_DIR=/run/nvidia
77
PID_FILE=${RUN_DIR}/${0##*/}.pid
88
DRIVER_VERSION=${DRIVER_VERSION:?"Missing DRIVER_VERSION env"}
9-
DRIVER_BRANCH=${DRIVER_BRANCH:?"Missing DRIVER_BRANCH env"}
109
KERNEL_UPDATE_HOOK=/run/kernel/postinst.d/update-nvidia-driver
1110
NUM_VGPU_DEVICES=0
1211
NVIDIA_MODULE_PARAMS=()
@@ -18,7 +17,9 @@ USE_HOST_MOFED="${USE_HOST_MOFED:-false}"
1817
DNF_RELEASEVER=${DNF_RELEASEVER:-""}
1918
RHEL_VERSION=${RHEL_VERSION:-""}
2019
RHEL_MAJOR_VERSION=9
21-
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
20+
21+
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
22+
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
2223

2324
DRIVER_ARCH=${TARGETARCH/amd64/x86_64} && DRIVER_ARCH=${DRIVER_ARCH/arm64/aarch64}
2425
echo "DRIVER_ARCH is $DRIVER_ARCH"
@@ -570,24 +571,6 @@ _find_vgpu_driver_version() {
570571
return 0
571572
}
572573

573-
_resolve_kernel_type() {
574-
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
575-
KERNEL_TYPE=kernel
576-
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
577-
KERNEL_TYPE=kernel-open
578-
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
579-
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
580-
if [ $? -ne 0 ]; then
581-
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
582-
tail -n 3 /var/log/gpu-driver-util.log
583-
return 1
584-
fi
585-
else
586-
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
587-
return 1
588-
fi
589-
}
590-
591574
_start_vgpu_topology_daemon() {
592575
type nvidia-topologyd > /dev/null 2>&1 || return 0
593576
echo "Starting nvidia-topologyd.."
@@ -814,6 +797,5 @@ if [ $# -ne 0 ]; then
814797
fi
815798

816799
_resolve_rhel_version || exit 1
817-
_resolve_kernel_type || exit 1
818800

819801
$command

rhel9/ocp_dtk_entrypoint

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ nv-ctr-run-with-dtk() {
2525
/usr/local/bin/nvidia-driver \
2626
/usr/local/bin/common.sh \
2727
/usr/local/bin/extract-vmlinux \
28-
/usr/local/bin/gpu-driver-util \
2928
/usr/local/bin/vgpu-util \
3029
/drivers \
3130
/licenses \
@@ -137,7 +136,6 @@ dtk-build-driver() {
137136
"$DRIVER_TOOLKIT_SHARED_DIR/nvidia-driver" \
138137
"$DRIVER_TOOLKIT_SHARED_DIR/common.sh" \
139138
"$DRIVER_TOOLKIT_SHARED_DIR/extract-vmlinux" \
140-
"$DRIVER_TOOLKIT_SHARED_DIR/gpu-driver-util" \
141139
"$DRIVER_TOOLKIT_SHARED_DIR/vgpu-util" \
142140
"${DRIVER_TOOLKIT_SHARED_DIR}/bin"
143141

ubuntu20.04/Dockerfile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,9 @@ ENV PATH /usr/local/go/bin:$PATH
2828
WORKDIR /work
2929

3030
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
31-
go build -C driver/vgpu/src -o vgpu-util && \
32-
mv driver/vgpu/src/vgpu-util /work && \
33-
go build -C driver/gpu-driver-util -o gpu-driver-util && \
34-
mv driver/gpu-driver-util/gpu-driver-util /work
31+
cd driver/vgpu/src && \
32+
go build -o vgpu-util && \
33+
mv vgpu-util /work
3534

3635
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu20.04
3736

@@ -73,7 +72,6 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
7372
COPY nvidia-driver /usr/local/bin
7473

7574
COPY --from=build /work/vgpu-util /usr/local/bin
76-
COPY --from=build /work/gpu-driver-util /usr/local/bin
7775

7876
ADD drivers drivers/
7977

ubuntu20.04/nvidia-driver

Lines changed: 2 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ NVIDIA_MODESET_MODULE_PARAMS=()
1616
NVIDIA_PEERMEM_MODULE_PARAMS=()
1717
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
1818

19-
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
19+
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
20+
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
2021

2122
export DEBIAN_FRONTEND=noninteractive
2223

@@ -476,24 +477,6 @@ _shutdown() {
476477
return 1
477478
}
478479

479-
_resolve_kernel_type() {
480-
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
481-
KERNEL_TYPE=kernel
482-
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
483-
KERNEL_TYPE=kernel-open
484-
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
485-
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
486-
if [ $? -ne 0 ]; then
487-
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
488-
tail -n 3 /var/log/gpu-driver-util.log
489-
return 1
490-
fi
491-
else
492-
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
493-
return 1
494-
fi
495-
}
496-
497480
_find_vgpu_driver_version() {
498481
local count=""
499482
local version=""
@@ -537,8 +520,6 @@ init() {
537520
_find_vgpu_driver_version || exit 1
538521
fi
539522

540-
_resolve_kernel_type || exit 1
541-
542523
# Install the userspace components and copy the kernel module sources.
543524
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
544525
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
@@ -611,8 +592,6 @@ update() {
611592
fi
612593
exec 3>&-
613594

614-
_resolve_kernel_type || exit 1
615-
616595
# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
617596
# only non-vgpu driver types
618597
if [ "${DRIVER_TYPE}" != "vgpu" ]; then

ubuntu22.04/Dockerfile

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,9 @@ ENV PATH /usr/local/go/bin:$PATH
2828
WORKDIR /work
2929

3030
RUN git clone https://github.com/NVIDIA/gpu-driver-container driver && \
31-
go build -C driver/vgpu/src -o vgpu-util && \
32-
mv driver/vgpu/src/vgpu-util /work && \
33-
go build -C driver/gpu-driver-util -o gpu-driver-util && \
34-
mv driver/gpu-driver-util/gpu-driver-util /work
31+
cd driver/vgpu/src && \
32+
go build -o vgpu-util && \
33+
mv vgpu-util /work
3534

3635
FROM nvcr.io/nvidia/cuda:12.6.3-base-ubuntu22.04
3736

@@ -73,7 +72,6 @@ RUN /tmp/install.sh reposetup && /tmp/install.sh depinstall && \
7372
COPY nvidia-driver /usr/local/bin
7473

7574
COPY --from=build /work/vgpu-util /usr/local/bin
76-
COPY --from=build /work/gpu-driver-util /usr/local/bin
7775

7876
ADD drivers drivers/
7977

ubuntu22.04/nvidia-driver

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ NVIDIA_UVM_MODULE_PARAMS=()
1515
NVIDIA_MODESET_MODULE_PARAMS=()
1616
NVIDIA_PEERMEM_MODULE_PARAMS=()
1717
TARGETARCH=${TARGETARCH:?"Missing TARGETARCH env"}
18-
KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE:-auto}
18+
19+
OPEN_KERNEL_MODULES_ENABLED=${OPEN_KERNEL_MODULES_ENABLED:-false}
20+
[[ "${OPEN_KERNEL_MODULES_ENABLED}" == "true" ]] && KERNEL_TYPE=kernel-open || KERNEL_TYPE=kernel
1921

2022
export DEBIAN_FRONTEND=noninteractive
2123

@@ -479,24 +481,6 @@ _shutdown() {
479481
return 1
480482
}
481483

482-
_resolve_kernel_type() {
483-
if [ "${KERNEL_MODULE_TYPE}" == "proprietary" ]; then
484-
KERNEL_TYPE=kernel
485-
elif [ "${KERNEL_MODULE_TYPE}" == "open" ]; then
486-
KERNEL_TYPE=kernel-open
487-
elif [ "${KERNEL_MODULE_TYPE}" == "auto" ]; then
488-
KERNEL_TYPE=$(gpu-driver-util get-kernel-module-type -b "${DRIVER_BRANCH}")
489-
if [ $? -ne 0 ]; then
490-
echo "cannot autodetect the kernel module type, printing error logs from /var/log/gpu-driver-util.log..."
491-
tail -n 3 /var/log/gpu-driver-util.log
492-
return 1
493-
fi
494-
else
495-
echo "invalid value for the KERNEL_MODULE_TYPE variable: ${KERNEL_MODULE_TYPE}"
496-
return 1
497-
fi
498-
}
499-
500484
_find_vgpu_driver_version() {
501485
local count=""
502486
local version=""
@@ -540,8 +524,6 @@ init() {
540524
_find_vgpu_driver_version || exit 1
541525
fi
542526

543-
_resolve_kernel_type || exit 1
544-
545527
# Install the userspace components and copy the kernel module sources.
546528
sh NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION.run -x && \
547529
cd NVIDIA-Linux-$DRIVER_ARCH-$DRIVER_VERSION && \
@@ -614,8 +596,6 @@ update() {
614596
fi
615597
exec 3>&-
616598

617-
_resolve_kernel_type || exit 1
618-
619599
# vgpu driver version is choosen dynamically during runtime, so pre-compile modules for
620600
# only non-vgpu driver types
621601
if [ "${DRIVER_TYPE}" != "vgpu" ]; then

0 commit comments

Comments
 (0)