|
| 1 | +#!/bin/bash |
| 2 | +# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. |
| 3 | + |
| 4 | +set -xe |
| 5 | + |
| 6 | +DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"} |
| 7 | +DRIVER_RESET_RETRIES=10 |
| 8 | +DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15} |
| 9 | +RUN_DIR=/run/nvidia |
| 10 | + |
| 11 | +# Mount the driver rootfs into the run directory with the exception of sysfs. |
| 12 | +_mount_rootfs() { |
| 13 | + echo "Mounting NVIDIA driver rootfs..." |
| 14 | + mount --make-runbindable /sys |
| 15 | + mount --make-private /sys |
| 16 | + mkdir -p ${RUN_DIR}/driver |
| 17 | + mount --rbind / ${RUN_DIR}/driver |
| 18 | + |
| 19 | + echo "Change device files security context for selinux compatibility" |
| 20 | + chcon -R -t container_file_t ${RUN_DIR}/driver/dev |
| 21 | +} |
| 22 | + |
| 23 | +# Unmount the driver rootfs from the run directory. |
| 24 | +_unmount_rootfs() { |
| 25 | + echo "Unmounting NVIDIA driver rootfs..." |
| 26 | + if findmnt -r -o TARGET | grep "${RUN_DIR}/driver" > /dev/null; then |
| 27 | + umount -l -R ${RUN_DIR}/driver |
| 28 | + fi |
| 29 | +} |
| 30 | + |
| 31 | +# Create /dev/char directory if it doesn't exist inside the container. |
| 32 | +# Without this directory, nvidia-vgpu-mgr will fail to create symlinks |
| 33 | +# under /dev/char for new devices nodes. |
| 34 | +_create_dev_char_directory() { |
| 35 | + if [ ! -d "/dev/char" ]; then |
| 36 | + echo "Creating '/dev/char' directory" |
| 37 | + mkdir -p /dev/char |
| 38 | + fi |
| 39 | +} |
| 40 | + |
| 41 | +_set_fw_search_path() { |
| 42 | + local nv_fw_search_path="$RUN_DIR/driver/lib/firmware" |
| 43 | + local fw_path_config_file="/sys/module/firmware_class/parameters/path" |
| 44 | + |
| 45 | + if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then |
| 46 | + echo "WARNING: A search path is already configured in $fw_path_config_file" |
| 47 | + echo " Retaining the current configuration. Note, GSP firmware may not be found and thus won't be used by the NVIDIA driver." |
| 48 | + return |
| 49 | + fi |
| 50 | + |
| 51 | + echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path" |
| 52 | + echo -n "$nv_fw_search_path" > $fw_path_config_file |
| 53 | +} |
| 54 | + |
| 55 | +_install_driver() { |
| 56 | + local tmp_dir=$(mktemp -d) |
| 57 | + |
| 58 | + sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd |
| 59 | +} |
| 60 | + |
| 61 | +# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons |
| 62 | +_load_driver() { |
| 63 | + /usr/bin/nvidia-vgpud |
| 64 | + /usr/bin/nvidia-vgpu-mgr & |
| 65 | + |
| 66 | + # check nvidia drivers are loaded |
| 67 | + if [ ! -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ ! -f /sys/module/nvidia/refcnt ]; then |
| 68 | + echo "Failed to load nvidia driver" |
| 69 | + return 1 |
| 70 | + fi |
| 71 | + return 0 |
| 72 | +} |
| 73 | + |
| 74 | +# Enable virtual functions for all physical GPUs on the node that support SR-IOV. |
| 75 | +# Retry logic is to account for when the driver is busy (i.e. during driver initialization) |
| 76 | +_enable_vfs() { |
| 77 | + # Wait before attempting to create VFs to ensure the driver has finished initializing. |
| 78 | + # This is a WAR for a bug in vGPU 17.2 where sriov-manage does not return a non-zero |
| 79 | + # exit code even though VF creation fails. |
| 80 | + sleep $DELAY_BEFORE_VF_CREATION |
| 81 | + |
| 82 | + local retry |
| 83 | + for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do |
| 84 | + if /usr/lib/nvidia/sriov-manage -e ALL; then |
| 85 | + return 0 |
| 86 | + fi |
| 87 | + if [ $retry == $DRIVER_RESET_RETRIES ]; then |
| 88 | + echo "Failed to enable VFs" |
| 89 | + fi |
| 90 | + done |
| 91 | + return 1 |
| 92 | +} |
| 93 | + |
| 94 | +# Disable virtual functions for all physical GPUs on the node that support SR-IOV. |
| 95 | +# Retry logic is to account for when the driver is busy (i.e. during driver initialization) |
| 96 | +_disable_vfs() { |
| 97 | + local retry |
| 98 | + for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do |
| 99 | + if /usr/lib/nvidia/sriov-manage -d ALL; then |
| 100 | + return 0 |
| 101 | + fi |
| 102 | + if [ $retry == $DRIVER_RESET_RETRIES ]; then |
| 103 | + echo "Failed to disable VFs" |
| 104 | + fi |
| 105 | + done |
| 106 | + return 1 |
| 107 | +} |
| 108 | + |
| 109 | +_unload_driver() { |
| 110 | + local rmmod_args=() |
| 111 | + local nvidia_deps=0 |
| 112 | + local nvidia_refs=0 |
| 113 | + local nvidia_vgpu_vfio_refs=0 |
| 114 | + |
| 115 | + if [ -f /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid ]; then |
| 116 | + echo "Stopping NVIDIA vGPU Manager..." |
| 117 | + local pid=$(< /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid) |
| 118 | + |
| 119 | + kill -TERM "${pid}" |
| 120 | + for i in $(seq 1 50); do |
| 121 | + kill -0 "${pid}" 2> /dev/null || break |
| 122 | + sleep 0.1 |
| 123 | + done |
| 124 | + if [ $i -eq 50 ]; then |
| 125 | + echo "Could not stop NVIDIA vGPU Manager" >&2 |
| 126 | + return 1 |
| 127 | + fi |
| 128 | + fi |
| 129 | + |
| 130 | + echo "Unloading NVIDIA driver kernel modules..." |
| 131 | + if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ]; then |
| 132 | + nvidia_vgpu_vfio_refs=$(< /sys/module/nvidia_vgpu_vfio/refcnt) |
| 133 | + rmmod_args+=("nvidia_vgpu_vfio") |
| 134 | + ((++nvidia_deps)) |
| 135 | + fi |
| 136 | + if [ -f /sys/module/nvidia/refcnt ]; then |
| 137 | + nvidia_refs=$(< /sys/module/nvidia/refcnt) |
| 138 | + rmmod_args+=("nvidia") |
| 139 | + fi |
| 140 | + |
| 141 | + # TODO: check if nvidia module is in use by checking refcnt |
| 142 | + |
| 143 | + if [ ${#rmmod_args[@]} -gt 0 ]; then |
| 144 | + rmmod ${rmmod_args[@]} |
| 145 | + if [ "$?" != "0" ]; then |
| 146 | + return 1 |
| 147 | + fi |
| 148 | + fi |
| 149 | + return 0 |
| 150 | +} |
| 151 | + |
| 152 | +_shutdown() { |
| 153 | + if _disable_vfs && _unload_driver; then |
| 154 | + _unmount_rootfs |
| 155 | + return 0 |
| 156 | + fi |
| 157 | + echo "Failed to cleanup driver" |
| 158 | + return 1 |
| 159 | +} |
| 160 | + |
| 161 | +build() { |
| 162 | + echo "build() not implemented" |
| 163 | +} |
| 164 | + |
| 165 | +load() { |
| 166 | + echo "load() not implemented" |
| 167 | +} |
| 168 | + |
| 169 | +update() { |
| 170 | + echo "update() not implemented" |
| 171 | +} |
| 172 | + |
| 173 | +init() { |
| 174 | + trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM |
| 175 | + trap "_shutdown" EXIT |
| 176 | + |
| 177 | + if ! _unload_driver; then |
| 178 | + echo "Previous NVIDIA driver installation cannot be removed. Exiting" |
| 179 | + exit 1 |
| 180 | + fi |
| 181 | + _unmount_rootfs |
| 182 | + _create_dev_char_directory |
| 183 | + _set_fw_search_path |
| 184 | + _install_driver |
| 185 | + _load_driver || exit 1 |
| 186 | + _mount_rootfs |
| 187 | + _enable_vfs |
| 188 | + |
| 189 | + # In certain scenarios, /sys/class/mdev_bus is not populated with the correct list of devices (PFs and possible VFs) at this point. |
| 190 | + # Re-run nvdidia-vgpud to ensure /sys/class/mdev_bus is populated correctly. And restart nvidia-vgpu-mgr if previously killed. |
| 191 | + nvidia-vgpud & |
| 192 | + pgrep nvidia-vgpu-mgr >/dev/null || (echo "Restarting nvidia-vgpu-mgr after previously killed" && nvidia-vgpu-mgr &) |
| 193 | + |
| 194 | + set +x |
| 195 | + echo "Done, now waiting for signal" |
| 196 | + trap "echo 'Caught signal'; _shutdown; trap - EXIT; exit" HUP INT QUIT PIPE TERM |
| 197 | + |
| 198 | + while true; do |
| 199 | + sleep 15 |
| 200 | + pgrep nvidia-vgpu-mgr >/dev/null || (echo "ERROR: nvidia-vgpu-mgr daemon is no longer running. Exiting." && exit 1) |
| 201 | + done |
| 202 | +} |
| 203 | + |
| 204 | + |
| 205 | +usage() { |
| 206 | + cat >&2 <<EOF |
| 207 | +Usage: $0 COMMAND [ARG...] |
| 208 | +
|
| 209 | +Commands: |
| 210 | + init [-a | --accept-license] |
| 211 | + build [-a | --accept-license] |
| 212 | + load |
| 213 | + update [-k | --kernel VERSION] [-s | --sign KEYID] [-t | --tag TAG] |
| 214 | +EOF |
| 215 | + exit 1 |
| 216 | +} |
| 217 | + |
| 218 | +if [ $# -eq 0 ]; then |
| 219 | + usage |
| 220 | +fi |
| 221 | +command=$1; shift |
| 222 | +case "${command}" in |
| 223 | + init) options=$(getopt -l accept-license -o a -- "$@") ;; |
| 224 | + build) options=$(getopt -l accept-license,tag: -o a:t -- "$@") ;; |
| 225 | + load) options="" ;; |
| 226 | + update) options=$(getopt -l kernel:,sign:,tag: -o k:s:t: -- "$@") ;; |
| 227 | + *) usage ;; |
| 228 | +esac |
| 229 | +if [ $? -ne 0 ]; then |
| 230 | + usage |
| 231 | +fi |
| 232 | +eval set -- "${options}" |
| 233 | + |
| 234 | +ACCEPT_LICENSE="" |
| 235 | +KERNEL_VERSION=$(uname -r) |
| 236 | +PRIVATE_KEY="" |
| 237 | +PACKAGE_TAG="" |
| 238 | + |
| 239 | +for opt in ${options}; do |
| 240 | + case "$opt" in |
| 241 | + -a | --accept-license) ACCEPT_LICENSE="yes"; shift 1 ;; |
| 242 | + -k | --kernel) KERNEL_VERSION=$2; shift 2 ;; |
| 243 | + -s | --sign) PRIVATE_KEY=$2; shift 2 ;; |
| 244 | + -t | --tag) PACKAGE_TAG=$2; shift 2 ;; |
| 245 | + --) shift; break ;; |
| 246 | + esac |
| 247 | +done |
| 248 | +if [ $# -ne 0 ]; then |
| 249 | + usage |
| 250 | +fi |
| 251 | + |
| 252 | +$command |
0 commit comments