Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 25 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,17 @@ OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)

##### Public rules #####
DISTRIBUTIONS := ubuntu18.04 ubuntu20.04 ubuntu22.04 ubuntu24.04 signed_ubuntu20.04 signed_ubuntu22.04 signed_ubuntu24.04 rhel8 rhel9 flatcar fedora36 sles15.3 precompiled_rhcos
RHCOS_VERSIONS := rhcos4.14 rhcos4.15 rhcos4.16 rhcos4.17 rhcos4.18 rhcos4.19 rhcos4.20
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
BASE_FROM := noble jammy focal
PUSH_TARGETS := $(patsubst %, push-%, $(DISTRIBUTIONS))
VGPU_GUEST_DRIVER_PUSH_TARGETS := $(patsubst %, push-vgpuguest-%, $(DISTRIBUTIONS))
VGPU_HOST_DRIVER_PUSH_TARGETS := $(patsubst %, push-vgpuhost-%, $(DISTRIBUTIONS))
VGPU_GUEST_DRIVER_PUSH_TARGETS := $(patsubst %, push-vgpuguest-%, $(DISTRIBUTIONS) $(RHCOS_VERSIONS))
VGPU_HOST_DRIVER_PUSH_TARGETS := $(patsubst %, push-vgpuhost-%, $(DISTRIBUTIONS) $(RHCOS_VERSIONS))
DRIVER_PUSH_TARGETS := $(foreach push_target, $(PUSH_TARGETS), $(addprefix $(push_target)-, $(DRIVER_VERSIONS)))
BUILD_TARGETS := $(patsubst %, build-%, $(DISTRIBUTIONS))
DRIVER_BUILD_TARGETS := $(foreach build_target, $(BUILD_TARGETS), $(addprefix $(build_target)-, $(DRIVER_VERSIONS)))
VGPU_GUEST_DRIVER_BUILD_TARGETS := $(patsubst %, build-vgpuguest-%, $(DISTRIBUTIONS))
VGPU_HOST_DRIVER_BUILD_TARGETS := $(patsubst %, build-vgpuhost-%, $(DISTRIBUTIONS))
VGPU_GUEST_DRIVER_BUILD_TARGETS := $(patsubst %, build-vgpuguest-%, $(DISTRIBUTIONS) $(RHCOS_VERSIONS))
VGPU_HOST_DRIVER_BUILD_TARGETS := $(patsubst %, build-vgpuhost-%, $(DISTRIBUTIONS) $(RHCOS_VERSIONS))
TEST_TARGETS := $(patsubst %, test-%, $(DISTRIBUTIONS))
PULL_TARGETS := $(patsubst %, pull-%, $(DISTRIBUTIONS))
DRIVER_PULL_TARGETS := $(foreach pull_target, $(PULL_TARGETS), $(addprefix $(pull_target)-, $(DRIVER_VERSIONS)))
Expand Down Expand Up @@ -243,7 +244,15 @@ build-vgpuguest-%: DOCKERFILE = $(CURDIR)/$(SUBDIR)/Dockerfile
# Remove '-grid' substring in the image tag
build-vgpuguest-%: DRIVER_TAG = $(DRIVER_VERSION:-grid=)

build-vgpuguest-rhcos%: SUBDIR = rhel8
# Source of truth for RHEL and CoreOS compatibility https://access.redhat.com/articles/6907891
# Note: INTENTIONALLY replace SUBDIR, as per the above macro would set SUBDIR to "rchos4.18" for `make build-vgpuguest-rhcos4.18`
build-vgpuguest-rhcos4.14: SUBDIR = rhel8
build-vgpuguest-rhcos4.15: SUBDIR = rhel9
build-vgpuguest-rhcos4.16: SUBDIR = rhel9
build-vgpuguest-rhcos4.17: SUBDIR = rhel9
build-vgpuguest-rhcos4.18: SUBDIR = rhel9
build-vgpuguest-rhcos4.19: SUBDIR = rhel9
build-vgpuguest-rhcos4.20: SUBDIR = rhel9

$(VGPU_GUEST_DRIVER_BUILD_TARGETS):
DOCKER_BUILDKIT=1 \
Expand Down Expand Up @@ -280,7 +289,17 @@ build-vgpuhost-%: DIST = $(word 3,$(subst -, ,$@))
build-vgpuhost-%: SUBDIR = $(word 3,$(subst -, ,$@))
build-vgpuhost-%: DOCKERFILE = $(CURDIR)/vgpu-manager/$(SUBDIR)/Dockerfile

build-vgpuhost-rhcos%: SUBDIR = rhel8
# Source of truth for RHEL and CoreOS compatibility https://access.redhat.com/articles/6907891
# Note: INTENTIONALLY replace SUBDIR, as per the above macro would set SUBDIR to "rchos4.X" for `make build-vgpuhost-rhcos4.X`
build-vgpuhost-rhcos4.12: SUBDIR = rhel8
build-vgpuhost-rhcos4.13: SUBDIR = rhel8
build-vgpuhost-rhcos4.14: SUBDIR = rhel8
build-vgpuhost-rhcos4.15: SUBDIR = rhel9
build-vgpuhost-rhcos4.16: SUBDIR = rhel9
build-vgpuhost-rhcos4.17: SUBDIR = rhel9
build-vgpuhost-rhcos4.18: SUBDIR = rhel9
build-vgpuhost-rhcos4.19: SUBDIR = rhel9
build-vgpuhost-rhcos4.20: SUBDIR = rhel9

$(VGPU_HOST_DRIVER_BUILD_TARGETS):
DOCKER_BUILDKIT=1 \
Expand All @@ -297,8 +316,6 @@ $(VGPU_HOST_DRIVER_BUILD_TARGETS):
--file $(DOCKERFILE) \
$(CURDIR)/vgpu-manager/$(SUBDIR)



# $(VGPU_HOST_DRIVER_PUSH_TARGETS) is in the form of push-vgpuhost-$(DIST)
# VGPU_HOST_DRIVER_VERSION must be defined in the environment when invoking this target.
push-vgpuhost-%: $(if $(VGPU_HOST_DRIVER_VERSION),,$(error "VGPU_HOST_DRIVER_VERSION is not set"))
Expand Down
34 changes: 34 additions & 0 deletions vgpu-manager/rhel9/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
FROM nvcr.io/nvidia/cuda:13.0.1-base-ubi9
Copy link
Author

@mvalsecchi-nv mvalsecchi-nv Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unfortunately symlinks (from vgpu-manager/rhel8 to vgpu-manager/rhel9) would not cut it, as we pass the subdir, making those files (inside rhel8 unreachable from any other sibling folder).

Let me see if I can come up with a cleaner way, rather than duplicating all the files in vgpu-manager/rhel*

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems most directories do indeed have copies of the same files, so I'll leave the duplicates inside vpu-manager/rhel* folders instead of refactoring.


ARG DRIVER_VERSION
ENV DRIVER_VERSION=$DRIVER_VERSION
ARG DRIVER_ARCH=x86_64
ENV DRIVER_ARCH=$DRIVER_ARCH

RUN mkdir -p /driver
WORKDIR /driver
COPY NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run .
RUN chmod +x NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run

COPY nvidia-driver /usr/local/bin
COPY ocp_dtk_entrypoint /usr/local/bin

LABEL io.k8s.display-name="NVIDIA vGPU Manager Container"
LABEL name="NVIDIA vGPU Manager Container"
LABEL vendor="NVIDIA"
LABEL version="${DRIVER_VERSION}"
LABEL release="N/A"
LABEL summary="Provision the NVIDIA vGPU Manager through containers"
LABEL description="See summary"

# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
yum update -y ${CVE_UPDATES} && \
rm -rf /var/cache/yum/*; \
fi

# Add NGC DL license from the CUDA image
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

ENTRYPOINT ["nvidia-driver", "init"]
252 changes: 252 additions & 0 deletions vgpu-manager/rhel9/nvidia-driver
Original file line number Diff line number Diff line change
@@ -0,0 +1,252 @@
#!/bin/bash
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.

set -xe

DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"}
DRIVER_RESET_RETRIES=10
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
RUN_DIR=/run/nvidia

# Mount the driver rootfs into the run directory with the exception of sysfs.
_mount_rootfs() {
echo "Mounting NVIDIA driver rootfs..."
mount --make-runbindable /sys
mount --make-private /sys
mkdir -p ${RUN_DIR}/driver
mount --rbind / ${RUN_DIR}/driver

echo "Change device files security context for selinux compatibility"
chcon -R -t container_file_t ${RUN_DIR}/driver/dev
}

# Unmount the driver rootfs from the run directory.
_unmount_rootfs() {
echo "Unmounting NVIDIA driver rootfs..."
if findmnt -r -o TARGET | grep "${RUN_DIR}/driver" > /dev/null; then
umount -l -R ${RUN_DIR}/driver
fi
}

# Create /dev/char directory if it doesn't exist inside the container.
# Without this directory, nvidia-vgpu-mgr will fail to create symlinks
# under /dev/char for new devices nodes.
_create_dev_char_directory() {
if [ ! -d "/dev/char" ]; then
echo "Creating '/dev/char' directory"
mkdir -p /dev/char
fi
}

_set_fw_search_path() {
local nv_fw_search_path="$RUN_DIR/driver/lib/firmware"
local fw_path_config_file="/sys/module/firmware_class/parameters/path"

if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then
echo "WARNING: A search path is already configured in $fw_path_config_file"
echo " Retaining the current configuration. Note, GSP firmware may not be found and thus won't be used by the NVIDIA driver."
return
fi

echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path"
echo -n "$nv_fw_search_path" > $fw_path_config_file
}

_install_driver() {
local tmp_dir=$(mktemp -d)

sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
}

# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
_load_driver() {
/usr/bin/nvidia-vgpud
/usr/bin/nvidia-vgpu-mgr &

# check nvidia drivers are loaded
if [ ! -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ ! -f /sys/module/nvidia/refcnt ]; then
echo "Failed to load nvidia driver"
return 1
fi
return 0
}

# Enable virtual functions for all physical GPUs on the node that support SR-IOV.
# Retry logic is to account for when the driver is busy (i.e. during driver initialization)
_enable_vfs() {
# Wait before attempting to create VFs to ensure the driver has finished initializing.
# This is a WAR for a bug in vGPU 17.2 where sriov-manage does not return a non-zero
# exit code even though VF creation fails.
sleep $DELAY_BEFORE_VF_CREATION

local retry
for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do
if /usr/lib/nvidia/sriov-manage -e ALL; then
return 0
fi
if [ $retry == $DRIVER_RESET_RETRIES ]; then
echo "Failed to enable VFs"
fi
done
return 1
}

# Disable virtual functions for all physical GPUs on the node that support SR-IOV.
# Retry logic is to account for when the driver is busy (i.e. during driver initialization)
_disable_vfs() {
local retry
for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do
if /usr/lib/nvidia/sriov-manage -d ALL; then
return 0
fi
if [ $retry == $DRIVER_RESET_RETRIES ]; then
echo "Failed to disable VFs"
fi
done
return 1
}

_unload_driver() {
local rmmod_args=()
local nvidia_deps=0
local nvidia_refs=0
local nvidia_vgpu_vfio_refs=0

if [ -f /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid ]; then
echo "Stopping NVIDIA vGPU Manager..."
local pid=$(< /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid)

kill -TERM "${pid}"
for i in $(seq 1 50); do
kill -0 "${pid}" 2> /dev/null || break
sleep 0.1
done
if [ $i -eq 50 ]; then
echo "Could not stop NVIDIA vGPU Manager" >&2
return 1
fi
fi

echo "Unloading NVIDIA driver kernel modules..."
if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ]; then
nvidia_vgpu_vfio_refs=$(< /sys/module/nvidia_vgpu_vfio/refcnt)
rmmod_args+=("nvidia_vgpu_vfio")
((++nvidia_deps))
fi
if [ -f /sys/module/nvidia/refcnt ]; then
nvidia_refs=$(< /sys/module/nvidia/refcnt)
rmmod_args+=("nvidia")
fi

# TODO: check if nvidia module is in use by checking refcnt

if [ ${#rmmod_args[@]} -gt 0 ]; then
rmmod ${rmmod_args[@]}
if [ "$?" != "0" ]; then
return 1
fi
fi
return 0
}

_shutdown() {
if _disable_vfs && _unload_driver; then
_unmount_rootfs
return 0
fi
echo "Failed to cleanup driver"
return 1
}

build() {
echo "build() not implemented"
}

load() {
echo "load() not implemented"
}

update() {
echo "update() not implemented"
}

init() {
trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
trap "_shutdown" EXIT

if ! _unload_driver; then
echo "Previous NVIDIA driver installation cannot be removed. Exiting"
exit 1
fi
_unmount_rootfs
_create_dev_char_directory
_set_fw_search_path
_install_driver
_load_driver || exit 1
_mount_rootfs
_enable_vfs

# In certain scenarios, /sys/class/mdev_bus is not populated with the correct list of devices (PFs and possible VFs) at this point.
# Re-run nvdidia-vgpud to ensure /sys/class/mdev_bus is populated correctly. And restart nvidia-vgpu-mgr if previously killed.
nvidia-vgpud &
pgrep nvidia-vgpu-mgr >/dev/null || (echo "Restarting nvidia-vgpu-mgr after previously killed" && nvidia-vgpu-mgr &)

set +x
echo "Done, now waiting for signal"
trap "echo 'Caught signal'; _shutdown; trap - EXIT; exit" HUP INT QUIT PIPE TERM

while true; do
sleep 15
pgrep nvidia-vgpu-mgr >/dev/null || (echo "ERROR: nvidia-vgpu-mgr daemon is no longer running. Exiting." && exit 1)
done
}


usage() {
cat >&2 <<EOF
Usage: $0 COMMAND [ARG...]

Commands:
init [-a | --accept-license]
build [-a | --accept-license]
load
update [-k | --kernel VERSION] [-s | --sign KEYID] [-t | --tag TAG]
EOF
exit 1
}

if [ $# -eq 0 ]; then
usage
fi
command=$1; shift
case "${command}" in
init) options=$(getopt -l accept-license -o a -- "$@") ;;
build) options=$(getopt -l accept-license,tag: -o a:t -- "$@") ;;
load) options="" ;;
update) options=$(getopt -l kernel:,sign:,tag: -o k:s:t: -- "$@") ;;
*) usage ;;
esac
if [ $? -ne 0 ]; then
usage
fi
eval set -- "${options}"

ACCEPT_LICENSE=""
KERNEL_VERSION=$(uname -r)
PRIVATE_KEY=""
PACKAGE_TAG=""

for opt in ${options}; do
case "$opt" in
-a | --accept-license) ACCEPT_LICENSE="yes"; shift 1 ;;
-k | --kernel) KERNEL_VERSION=$2; shift 2 ;;
-s | --sign) PRIVATE_KEY=$2; shift 2 ;;
-t | --tag) PACKAGE_TAG=$2; shift 2 ;;
--) shift; break ;;
esac
done
if [ $# -ne 0 ]; then
usage
fi

$command
Loading