@@ -638,11 +638,104 @@ _start_vgpu_topology_daemon() {
638638 nvidia-topologyd
639639}
640640
641+ # Read the currently loaded NVIDIA driver version from sysfs.
642+ _read_loaded_version () {
643+ cat /sys/module/nvidia/version 2> /dev/null || return 1
644+ }
645+
646+ _is_rootfs_mounted () {
647+ findmnt -rno TARGET " ${RUN_DIR} /driver" > /dev/null 2>&1
648+ }
649+
650+ # Ensure the driver rootfs is mounted exactly once.
651+ _ensure_rootfs_mounted_idempotent () {
652+ _is_rootfs_mounted || _mount_rootfs
653+ }
654+
655+ _ensure_persistence_running () {
656+ local pid_file=/var/run/nvidia-persistenced/nvidia-persistenced.pid pid
657+ if pid=$( < " ${pid_file} " 2> /dev/null) && [ -n " ${pid} " ] && kill -0 " ${pid} " 2> /dev/null; then
658+ return 0
659+ fi
660+
661+ if command -v nvidia-persistenced > /dev/null 2>&1 ; then
662+ nvidia-persistenced --persistence-mode || true
663+ else
664+ echo " nvidia-persistenced not found; continuing without persistence"
665+ fi
666+ }
667+
641668init () {
642669 if [ " ${DRIVER_TYPE} " = " vgpu" ]; then
643670 _find_vgpu_driver_version || exit 1
644671 fi
645672
673+ echo -e " \n========== NVIDIA Software Installer ==========\n"
674+ echo -e " Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION} \n"
675+
676+ exec 3> ${PID_FILE}
677+ if ! flock -n 3; then
678+ echo " An instance of the NVIDIA driver is already running, aborting"
679+ exit 1
680+ fi
681+ echo $$ >&3
682+
683+ trap " echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
684+ trap " _shutdown" EXIT
685+
686+ # Fast path: if the NVIDIA kernel modules are already loaded and match the desired
687+ # version, skip kernel module build/load but install userspace components.
688+ # This handles non-clean restarts where modules are in use and can't be unloaded.
689+ if [ -f /sys/module/nvidia/refcnt ]; then
690+ loaded_version=$( _read_loaded_version || true)
691+ if [ -n " ${loaded_version} " ] && [ " ${loaded_version} " = " ${DRIVER_VERSION} " ]; then
692+ echo " Detected matching loaded driver (${loaded_version} ); performing userspace-only install"
693+
694+ # Skip kernel module unload since they're already loaded with correct version
695+ # Unmount any existing rootfs
696+ _unmount_rootfs
697+
698+ # Update package cache for userspace install
699+ _update_package_cache
700+ _resolve_kernel_version || exit 1
701+ _install_prerequisites
702+
703+ # Install userspace components only (libraries, binaries)
704+ # The --no-kernel-module flag tells nvidia-installer to skip kernel module build/install
705+ echo " Installing userspace components (libraries and binaries)..."
706+ cd /drivers
707+ # Extract the driver first
708+ sh NVIDIA-Linux-${DRIVER_ARCH} -${DRIVER_VERSION} .run -x
709+ cd NVIDIA-Linux-${DRIVER_ARCH} -${DRIVER_VERSION}
710+ ./nvidia-installer \
711+ --silent \
712+ --no-kernel-module \
713+ --no-nouveau-check \
714+ --no-nvidia-modprobe \
715+ --no-drm \
716+ --no-peermem
717+
718+ # Mount the driver rootfs to make components available
719+ _mount_rootfs
720+
721+ # Ensure persistence daemon is running
722+ _ensure_persistence_running
723+
724+ # Write kernel update hook
725+ _write_kernel_update_hook
726+
727+ echo " Userspace-only install complete, now waiting for signal"
728+ sleep infinity &
729+ trap " echo 'Caught signal'; _shutdown && { kill $! ; exit 0; }" HUP INT QUIT PIPE TERM
730+ trap - EXIT
731+ while true ; do wait $! || continue ; done
732+ exit 0
733+ fi
734+ fi
735+
736+ _unload_driver || exit 1
737+ _unmount_rootfs
738+
646739 # Install the userspace components
647740 sh NVIDIA-Linux-$DRIVER_ARCH -$DRIVER_VERSION .run -x && \
648741 cd NVIDIA-Linux-$DRIVER_ARCH -$DRIVER_VERSION && \
@@ -668,22 +761,6 @@ init() {
668761 mv LICENSE mkprecompiled ${KERNEL_TYPE} /usr/src/nvidia-${DRIVER_VERSION} && \
669762 sed ' 9,${/^\(kernel\|LICENSE\)/!d}' .manifest > /usr/src/nvidia-${DRIVER_VERSION} /.manifest
670763
671- echo -e " \n========== NVIDIA Software Installer ==========\n"
672- echo -e " Starting installation of NVIDIA driver version ${DRIVER_VERSION} for Linux kernel version ${KERNEL_VERSION} \n"
673-
674- exec 3> ${PID_FILE}
675- if ! flock -n 3; then
676- echo " An instance of the NVIDIA driver is already running, aborting"
677- exit 1
678- fi
679- echo $$ >&3
680-
681- trap " echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
682- trap " _shutdown" EXIT
683-
684- _unload_driver || exit 1
685- _unmount_rootfs
686-
687764 if _kernel_requires_package; then
688765 _update_ca_certificates
689766 _update_package_cache
0 commit comments