@@ -530,6 +530,7 @@ _mount_rootfs() {
530530 mount --make-private /sys
531531 mkdir -p ${RUN_DIR} /driver
532532 mount --rbind / ${RUN_DIR} /driver
533+ echo " Driver container rootfs mounted at ${RUN_DIR} /driver"
533534}
534535
535536# Unmount the driver rootfs from the run directory.
@@ -638,20 +639,6 @@ _start_vgpu_topology_daemon() {
638639 nvidia-topologyd
639640}
640641
641- # Read the currently loaded NVIDIA driver version from sysfs.
642- _read_loaded_version () {
643- cat /sys/module/nvidia/version 2> /dev/null || return 1
644- }
645-
646- _is_rootfs_mounted () {
647- findmnt -rno TARGET " ${RUN_DIR} /driver" > /dev/null 2>&1
648- }
649-
650- # Ensure the driver rootfs is mounted exactly once.
651- _ensure_rootfs_mounted_idempotent () {
652- _is_rootfs_mounted || _mount_rootfs
653- }
654-
655642_ensure_persistence_running () {
656643 local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
657644 if pid=$( < " ${pid_file} " 2> /dev/null) && [ -n " ${pid} " ] && kill -0 " ${pid} " 2> /dev/null; then
@@ -665,6 +652,31 @@ _ensure_persistence_running() {
665652 fi
666653}
667654
655+ _build_driver_config () {
656+ local config=" DRIVER_VERSION=${DRIVER_VERSION}
657+ KERNEL_VERSION=$( uname -r)
658+ GPU_DIRECT_RDMA_ENABLED=${GPU_DIRECT_RDMA_ENABLED}
659+ USE_HOST_MOFED=${USE_HOST_MOFED}
660+ KERNEL_MODULE_TYPE=${KERNEL_MODULE_TYPE} "
661+
662+ # Append config file contents directly
663+ for conf_file in nvidia.conf nvidia-uvm.conf nvidia-modeset.conf nvidia-peermem.conf; do
664+ if [ -f " /drivers/$conf_file " ]; then
665+ config=" ${config}
666+ $( cat " /drivers/$conf_file " ) "
667+ fi
668+ done
669+
670+ echo " $config "
671+ }
672+
673+ _store_driver_config () {
674+ local config_file=" /run/nvidia/driver-config.state"
675+ echo " Storing driver configuration state..."
676+ _build_driver_config > " $config_file "
677+ echo " Driver configuration stored at $config_file "
678+ }
679+
668680init () {
669681 if [ " ${DRIVER_TYPE} " = " vgpu" ]; then
670682 _find_vgpu_driver_version || exit 1
@@ -683,13 +695,15 @@ init() {
683695 trap " echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
684696 trap " _shutdown" EXIT
685697
686- # Fast path: if the NVIDIA kernel modules are already loaded and match the desired
687- # version, skip kernel module build/load but install userspace components.
698+ # Fast path: if the NVIDIA kernel modules are already loaded and driver config matches,
699+ # skip kernel module build/load but install userspace components.
688700 # This handles non-clean restarts where modules are in use and can't be unloaded.
689- if [ -f /sys/module/nvidia/refcnt ]; then
690- loaded_version=$( _read_loaded_version || true)
691- if [ -n " ${loaded_version} " ] && [ " ${loaded_version} " = " ${DRIVER_VERSION} " ]; then
692- echo " Detected matching loaded driver (${loaded_version} ); performing userspace-only install"
701+ if [ -f /sys/module/nvidia/refcnt ] && [ -f /run/nvidia/driver-config.state ]; then
702+ current_config=$( _build_driver_config)
703+ stored_config=$( cat /run/nvidia/driver-config.state)
704+
705+ if [ " ${current_config} " = " ${stored_config} " ]; then
706+ echo " Detected matching loaded driver & config (${DRIVER_VERSION} ); performing userspace-only install"
693707
694708 # Skip kernel module unload since they're already loaded with correct version
695709 # Unmount any existing rootfs
@@ -715,16 +729,19 @@ init() {
715729 --no-drm \
716730 --no-peermem
717731
718- # Mount the driver rootfs to make components available
719- _mount_rootfs
720-
721- # Ensure persistence daemon is running
722- _ensure_persistence_running
723-
724- # Write kernel update hook
725- _write_kernel_update_hook
726-
727- echo " Userspace-only install complete, now waiting for signal"
732+ # Mount the driver rootfs to make components available
733+ _mount_rootfs
734+
735+ # Ensure persistence daemon is running
736+ _ensure_persistence_running
737+
738+ # Write kernel update hook
739+ _write_kernel_update_hook
740+
741+ # Store driver configuration
742+ _store_driver_config
743+
744+ echo " Userspace-only install complete, now waiting for signal"
728745 sleep infinity &
729746 trap " echo 'Caught signal'; _shutdown && { kill $! ; exit 0; }" HUP INT QUIT PIPE TERM
730747 trap - EXIT
@@ -776,6 +793,7 @@ init() {
776793 _load_driver || exit 1
777794 _mount_rootfs
778795 _write_kernel_update_hook
796+ _store_driver_config
779797
780798 echo " Done, now waiting for signal"
781799 sleep infinity &
0 commit comments