|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -xeu |
| 4 | + |
| 5 | +DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"} |
| 6 | +DRIVER_ARCH=${DRIVER_ARCH:?"Missing driver arch"} |
| 7 | +DRIVER_RESET_RETRIES=10 |
| 8 | +DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15} |
| 9 | +KERNEL_VERSION=$(uname -r) |
| 10 | +RUN_DIR=/run/nvidia |
| 11 | + |
| 12 | +export DEBIAN_FRONTEND=noninteractive |
| 13 | + |
| 14 | +_update_package_cache() { |
| 15 | + if [ "${PACKAGE_TAG:-}" != "builtin" ]; then |
| 16 | + echo "Updating the package cache..." |
| 17 | + apt-get -qq update |
| 18 | + fi |
| 19 | +} |
| 20 | + |
| 21 | +_cleanup_package_cache() { |
| 22 | + if [ "${PACKAGE_TAG:-}" != "builtin" ]; then |
| 23 | + echo "Cleaning up the package cache..." |
| 24 | + rm -rf /var/lib/apt/lists/* |
| 25 | + fi |
| 26 | +} |
| 27 | + |
| 28 | +# Resolve the kernel version to the form major.minor.patch-revision-flavor where flavor defaults to generic. |
| 29 | +_resolve_kernel_version() { |
| 30 | + local version=$(apt-cache show "linux-headers-${KERNEL_VERSION}" 2> /dev/null | \ |
| 31 | + sed -nE 's/^Version:\s+(([0-9]+\.){2}[0-9]+)[-.]([0-9]+).*/\1-\3/p' | head -1) |
| 32 | + local kernel_flavor=$(echo ${KERNEL_VERSION} | sed 's/[^a-z]*//') |
| 33 | + kernel_flavor="${kernel_flavor//virtual/generic}" |
| 34 | + |
| 35 | + echo "Resolving Linux kernel version..." |
| 36 | + if [ -z "${version}" ]; then |
| 37 | + echo "Could not resolve Linux kernel version" >&2 |
| 38 | + return 1 |
| 39 | + fi |
| 40 | + |
| 41 | + KERNEL_VERSION="${version}-${kernel_flavor}" |
| 42 | + echo "Proceeding with Linux kernel version ${KERNEL_VERSION}" |
| 43 | + return 0 |
| 44 | +} |
| 45 | + |
| 46 | +# Install the kernel modules header/builtin/order files and generate the kernel version string. |
| 47 | +_install_prerequisites() { |
| 48 | + local tmp_dir=$(mktemp -d) |
| 49 | + |
| 50 | + trap "popd; rm -rf ${tmp_dir}" RETURN EXIT |
| 51 | + pushd ${tmp_dir} |
| 52 | + |
| 53 | + rm -rf /lib/modules/${KERNEL_VERSION} |
| 54 | + mkdir -p /lib/modules/${KERNEL_VERSION}/proc |
| 55 | + |
| 56 | + echo "Installing Linux kernel headers..." |
| 57 | + apt-get -qq install --no-install-recommends linux-headers-${KERNEL_VERSION} > /dev/null |
| 58 | + |
| 59 | + echo "Installing Linux kernel module files..." |
| 60 | + apt-get -qq download linux-image-${KERNEL_VERSION} && dpkg -x linux-image*.deb . |
| 61 | + { apt-get -qq download linux-modules-${KERNEL_VERSION} && dpkg -x linux-modules*.deb . || true; } 2> /dev/null |
| 62 | + # linux-modules-extra contains pci-pf-stub which is required when enabling SR-IOV on a physical GPU |
| 63 | + { apt-get -qq download linux-modules-extra-${KERNEL_VERSION} && dpkg -x linux-modules-extra*.deb . || true; } 2> /dev/null |
| 64 | + mv lib/modules/${KERNEL_VERSION}/modules.* /lib/modules/${KERNEL_VERSION} |
| 65 | + mv lib/modules/${KERNEL_VERSION}/kernel /lib/modules/${KERNEL_VERSION} |
| 66 | + depmod ${KERNEL_VERSION} |
| 67 | + |
| 68 | + echo "Generating Linux kernel version string..." |
| 69 | + |
| 70 | + file boot/vmlinuz-* | awk 'BEGIN { RS="," } $1=="version" { print $2 }' - > version |
| 71 | + if [ -z "$(<version)" ]; then |
| 72 | + echo "Could not locate Linux kernel version string" >&2 |
| 73 | + return 1 |
| 74 | + fi |
| 75 | + mv version /lib/modules/${KERNEL_VERSION}/proc |
| 76 | +} |
| 77 | + |
| 78 | +# Cleanup the prerequisites installed above. |
| 79 | +_remove_prerequisites() { |
| 80 | + if [ "${PACKAGE_TAG:-}" != "builtin" ]; then |
| 81 | + apt-get -qq purge linux-headers-${KERNEL_VERSION} > /dev/null |
| 82 | + # TODO remove module files not matching an existing driver package. |
| 83 | + fi |
| 84 | +} |
| 85 | + |
| 86 | +# Mount the driver rootfs into the run directory with the exception of sysfs. |
| 87 | +_mount_rootfs() { |
| 88 | + echo "Mounting NVIDIA driver rootfs..." |
| 89 | + # Hack: remount /sys as rw to overcome intermittent bug with |
| 90 | + # /sys being mounted ro for privileged containers |
| 91 | + mount -o remount,rw /sys |
| 92 | + mount --make-runbindable /sys |
| 93 | + mount --make-private /sys |
| 94 | + mkdir -p ${RUN_DIR}/driver |
| 95 | + mount --rbind / ${RUN_DIR}/driver |
| 96 | +} |
| 97 | + |
| 98 | +# Unmount the driver rootfs from the run directory. |
| 99 | +_unmount_rootfs() { |
| 100 | + echo "Unmounting NVIDIA driver rootfs..." |
| 101 | + if findmnt -r -o TARGET | grep "${RUN_DIR}/driver" > /dev/null; then |
| 102 | + umount -l -R ${RUN_DIR}/driver |
| 103 | + fi |
| 104 | +} |
| 105 | + |
| 106 | +# Create /dev/char directory if it doesn't exist inside the container. |
| 107 | +# Without this directory, nvidia-vgpu-mgr will fail to create symlinks |
| 108 | +# under /dev/char for new devices nodes. |
| 109 | +_create_dev_char_directory() { |
| 110 | + if [ ! -d "/dev/char" ]; then |
| 111 | + echo "Creating '/dev/char' directory" |
| 112 | + mkdir -p /dev/char |
| 113 | + fi |
| 114 | +} |
| 115 | + |
| 116 | +_set_fw_search_path() { |
| 117 | + local nv_fw_search_path="$RUN_DIR/driver/lib/firmware" |
| 118 | + local fw_path_config_file="/sys/module/firmware_class/parameters/path" |
| 119 | + |
| 120 | + if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then |
| 121 | + echo "WARNING: A search path is already configured in $fw_path_config_file" |
| 122 | + echo " Retaining the current configuration. Note, GSP firmware may not be found and thus won't be used by the NVIDIA driver." |
| 123 | + return |
| 124 | + fi |
| 125 | + |
| 126 | + echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path" |
| 127 | + echo -n "$nv_fw_search_path" > $fw_path_config_file |
| 128 | +} |
| 129 | + |
| 130 | +_install_driver() { |
| 131 | + local tmp_dir=$(mktemp -d) |
| 132 | + |
| 133 | + sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd |
| 134 | +} |
| 135 | + |
| 136 | +# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons |
| 137 | +_load_driver() { |
| 138 | + /usr/bin/nvidia-vgpud |
| 139 | + /usr/bin/nvidia-vgpu-mgr & |
| 140 | + |
| 141 | + # check nvidia drivers are loaded |
| 142 | + if [ ! -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ ! -f /sys/module/nvidia/refcnt ]; then |
| 143 | + echo "Failed to load nvidia driver" |
| 144 | + return 1 |
| 145 | + fi |
| 146 | + return 0 |
| 147 | +} |
| 148 | + |
| 149 | +# Enable virtual functions for all physical GPUs on the node that support SR-IOV. |
| 150 | +# Retry logic is to account for when the driver is busy (i.e. during driver initialization) |
| 151 | +_enable_vfs() { |
| 152 | + # Wait before attempting to create VFs to ensure the driver has finished initializing. |
| 153 | + # This is a WAR for a bug in vGPU 17.2 where sriov-manage does not return a non-zero |
| 154 | + # exit code even though VF creation fails. |
| 155 | + sleep $DELAY_BEFORE_VF_CREATION |
| 156 | + |
| 157 | + local retry |
| 158 | + for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do |
| 159 | + if /usr/lib/nvidia/sriov-manage -e ALL; then |
| 160 | + return 0 |
| 161 | + fi |
| 162 | + if [ $retry == $DRIVER_RESET_RETRIES ]; then |
| 163 | + echo "Failed to enable VFs" |
| 164 | + fi |
| 165 | + done |
| 166 | + return 1 |
| 167 | +} |
| 168 | + |
| 169 | +# Disable virtual functions for all physical GPUs on the node that support SR-IOV. |
| 170 | +# Retry logic is to account for when the driver is busy (i.e. during driver initialization) |
| 171 | +_disable_vfs() { |
| 172 | + local retry |
| 173 | + for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do |
| 174 | + if /usr/lib/nvidia/sriov-manage -d ALL; then |
| 175 | + return 0 |
| 176 | + fi |
| 177 | + if [ $retry == $DRIVER_RESET_RETRIES ]; then |
| 178 | + echo "Failed to disable VFs" |
| 179 | + fi |
| 180 | + done |
| 181 | + return 1 |
| 182 | +} |
| 183 | + |
| 184 | +_unload_driver() { |
| 185 | + local rmmod_args=() |
| 186 | + local nvidia_deps=0 |
| 187 | + local nvidia_refs=0 |
| 188 | + local nvidia_vgpu_vfio_refs=0 |
| 189 | + |
| 190 | + if [ -f /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid ]; then |
| 191 | + echo "Stopping NVIDIA vGPU Manager..." |
| 192 | + local pid=$(< /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid) |
| 193 | + |
| 194 | + kill -TERM "${pid}" |
| 195 | + for i in $(seq 1 50); do |
| 196 | + kill -0 "${pid}" 2> /dev/null || break |
| 197 | + sleep 0.1 |
| 198 | + done |
| 199 | + if [ $i -eq 50 ]; then |
| 200 | + echo "Could not stop NVIDIA vGPU Manager" >&2 |
| 201 | + return 1 |
| 202 | + fi |
| 203 | + fi |
| 204 | + |
| 205 | + echo "Unloading NVIDIA driver kernel modules..." |
| 206 | + if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ]; then |
| 207 | + nvidia_vgpu_vfio_refs=$(< /sys/module/nvidia_vgpu_vfio/refcnt) |
| 208 | + rmmod_args+=("nvidia_vgpu_vfio") |
| 209 | + ((++nvidia_deps)) |
| 210 | + fi |
| 211 | + if [ -f /sys/module/nvidia/refcnt ]; then |
| 212 | + nvidia_refs=$(< /sys/module/nvidia/refcnt) |
| 213 | + rmmod_args+=("nvidia") |
| 214 | + fi |
| 215 | + |
| 216 | + # TODO: check if nvidia module is in use by checking refcnt |
| 217 | + |
| 218 | + if [ ${#rmmod_args[@]} -gt 0 ]; then |
| 219 | + rmmod ${rmmod_args[@]} |
| 220 | + if [ "$?" != "0" ]; then |
| 221 | + return 1 |
| 222 | + fi |
| 223 | + fi |
| 224 | + return 0 |
| 225 | +} |
| 226 | + |
| 227 | +_shutdown() { |
| 228 | + if _disable_vfs && _unload_driver; then |
| 229 | + _unmount_rootfs |
| 230 | + # Remove validation file |
| 231 | + rm -f /run/nvidia/validations/vgpu-manager-ready |
| 232 | + return 0 |
| 233 | + fi |
| 234 | + echo "Failed to cleanup driver" |
| 235 | + return 1 |
| 236 | +} |
| 237 | + |
| 238 | +build() { |
| 239 | + echo "build() not implemented" |
| 240 | +} |
| 241 | + |
| 242 | +load() { |
| 243 | + echo "load() not implemented" |
| 244 | +} |
| 245 | + |
| 246 | +update() { |
| 247 | + echo "update() not implemented" |
| 248 | +} |
| 249 | + |
| 250 | +init() { |
| 251 | + trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM |
| 252 | + trap "_shutdown" EXIT |
| 253 | + |
| 254 | + if ! _unload_driver; then |
| 255 | + echo "Previous NVIDIA driver installation cannot be removed. Exiting" |
| 256 | + exit 1 |
| 257 | + fi |
| 258 | + _unmount_rootfs |
| 259 | + |
| 260 | + _update_package_cache |
| 261 | + _resolve_kernel_version || exit 1 |
| 262 | + _install_prerequisites |
| 263 | + _create_dev_char_directory |
| 264 | + _set_fw_search_path |
| 265 | + _install_driver |
| 266 | + _load_driver || exit 1 |
| 267 | + _mount_rootfs |
| 268 | + _enable_vfs |
| 269 | + |
| 270 | + # In certain scenarios, /sys/class/mdev_bus is not populated with the correct list of devices (PFs and possible VFs) at this point. |
| 271 | + # Re-run nvdidia-vgpud to ensure /sys/class/mdev_bus is populated correctly. And restart nvidia-vgpu-mgr if previously killed. |
| 272 | + nvidia-vgpud & |
| 273 | + pgrep nvidia-vgpu-mgr >/dev/null || (echo "Restarting nvidia-vgpu-mgr after previously killed" && nvidia-vgpu-mgr &) |
| 274 | + |
| 275 | + # Create validation file to indicate vGPU manager is ready |
| 276 | + echo "Creating vGPU manager validation file..." |
| 277 | + mkdir -p /run/nvidia/validations |
| 278 | + touch /run/nvidia/validations/vgpu-manager-ready |
| 279 | + |
| 280 | + set +x |
| 281 | + echo "Done, now waiting for signal" |
| 282 | + trap "echo 'Caught signal'; _shutdown; trap - EXIT; exit" HUP INT QUIT PIPE TERM |
| 283 | + |
| 284 | + while true; do |
| 285 | + sleep 15 |
| 286 | + pgrep nvidia-vgpu-mgr >/dev/null || (echo "ERROR: nvidia-vgpu-mgr daemon is no longer running. Exiting." && exit 1) |
| 287 | + done |
| 288 | +} |
| 289 | + |
| 290 | + |
| 291 | +usage() { |
| 292 | + cat >&2 <<EOF |
| 293 | +Usage: $0 COMMAND [ARG...] |
| 294 | +
|
| 295 | +Commands: |
| 296 | + init [-a | --accept-license] |
| 297 | + build [-a | --accept-license] |
| 298 | + load |
| 299 | + update [-k | --kernel VERSION] [-s | --sign KEYID] [-t | --tag TAG] |
| 300 | +EOF |
| 301 | + exit 1 |
| 302 | +} |
| 303 | + |
| 304 | +if [ $# -eq 0 ]; then |
| 305 | + usage |
| 306 | +fi |
| 307 | +command=$1; shift |
| 308 | +case "${command}" in |
| 309 | + init) options=$(getopt -l accept-license -o a -- "$@") ;; |
| 310 | + build) options=$(getopt -l accept-license,tag: -o a:t -- "$@") ;; |
| 311 | + load) options="" ;; |
| 312 | + update) options=$(getopt -l kernel:,sign:,tag: -o k:s:t: -- "$@") ;; |
| 313 | + *) usage ;; |
| 314 | +esac |
| 315 | +if [ $? -ne 0 ]; then |
| 316 | + usage |
| 317 | +fi |
| 318 | +eval set -- "${options}" |
| 319 | + |
| 320 | +ACCEPT_LICENSE="" |
| 321 | +KERNEL_VERSION=$(uname -r) |
| 322 | +PRIVATE_KEY="" |
| 323 | +PACKAGE_TAG="" |
| 324 | + |
| 325 | +for opt in ${options}; do |
| 326 | + case "$opt" in |
| 327 | + -a | --accept-license) ACCEPT_LICENSE="yes"; shift 1 ;; |
| 328 | + -k | --kernel) KERNEL_VERSION=$2; shift 2 ;; |
| 329 | + -s | --sign) PRIVATE_KEY=$2; shift 2 ;; |
| 330 | + -t | --tag) PACKAGE_TAG=$2; shift 2 ;; |
| 331 | + --) shift; break ;; |
| 332 | + esac |
| 333 | +done |
| 334 | +if [ $# -ne 0 ]; then |
| 335 | + usage |
| 336 | +fi |
| 337 | + |
| 338 | +$command |
0 commit comments