@@ -355,7 +355,7 @@ func main() {
355355 Value : defaultDriverInstallDirCtrPath ,
356356 Usage : "the path where the NVIDIA driver install dir is mounted in the container" ,
357357 Destination : & driverInstallDirCtrPathFlag ,
358- Sources : cli .EnvVars ("DISABLE_DEV_CHAR_SYMLINK_CREATION " ),
358+ Sources : cli .EnvVars ("DRIVER_INSTALL_DIR_CTR_PATH " ),
359359 },
360360 }
361361
@@ -843,6 +843,20 @@ func (d *Driver) createStatusFile(driverInfo driverInfo) error {
843843 return createStatusFileWithContent (outputDirFlag + "/" + driverStatusFile , statusFileContent )
844844}
845845
846+ // areNvidiaModulesLoaded checks if NVIDIA kernel modules are already loaded in kernel memory.
847+ func areNvidiaModulesLoaded () bool {
848+ // Check if the nvidia module is loaded by checking if /sys/module/nvidia/refcnt exists
849+ if _ , err := os .Stat ("/sys/module/nvidia/refcnt" ); err == nil {
850+ refcntData , err := os .ReadFile ("/sys/module/nvidia/refcnt" )
851+ if err == nil {
852+ refcnt := strings .TrimSpace (string (refcntData ))
853+ log .Infof ("NVIDIA kernel modules already loaded in kernel memory (refcnt=%s)" , refcnt )
854+ return true
855+ }
856+ }
857+ return false
858+ }
859+
846860// createDevCharSymlinks creates symlinks in /host-dev-char that point to all possible NVIDIA devices nodes.
847861func createDevCharSymlinks (driverInfo driverInfo , disableDevCharSymlinkCreation bool ) error {
848862 if disableDevCharSymlinkCreation {
@@ -853,8 +867,16 @@ func createDevCharSymlinks(driverInfo driverInfo, disableDevCharSymlinkCreation
853867
854868 log .Info ("creating symlinks under /dev/char that correspond to NVIDIA character devices" )
855869
856- // Only attempt to load NVIDIA kernel modules when we can chroot into driverRoot
857- loadKernelModules := driverInfo .isHostDriver || (driverInfo .devRoot == driverInfo .driverRoot )
870+ // Check if NVIDIA modules are already loaded in kernel memory.
871+ // If they are, we don't need to run modprobe (which would fail if modules aren't in /lib/modules/).
872+ // This handles the case where the driver container performed a userspace-only install
873+ // after detecting that modules were already loaded from a previous boot.
874+ modulesAlreadyLoaded := areNvidiaModulesLoaded ()
875+
876+ // Only attempt to load NVIDIA kernel modules when:
877+ // 1. Modules are not already loaded in kernel memory, AND
878+ // 2. We can chroot into driverRoot to run modprobe
879+ loadKernelModules := ! modulesAlreadyLoaded && (driverInfo .isHostDriver || (driverInfo .devRoot == driverInfo .driverRoot ))
858880
859881 // driverRootCtrPath is the path of the driver install dir in the container. This will either be
860882 // driverInstallDirCtrPathFlag or '/host'.
0 commit comments