Skip to content

Commit 9a950fc

Browse files
committed
Add rhel9 base for vGPU-manager containers
Close #453 Signed-off-by: Michele Valsecchi <[email protected]>
1 parent 87517d2 commit 9a950fc

File tree

4 files changed

+495
-5
lines changed

4 files changed

+495
-5
lines changed

Makefile

Lines changed: 53 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,32 @@ build-vgpuguest-%: DOCKERFILE = $(CURDIR)/$(SUBDIR)/Dockerfile
243243
# Remove '-grid' substring in the image tag
244244
build-vgpuguest-%: DRIVER_TAG = $(DRIVER_VERSION:-grid=)
245245

246-
build-vgpuguest-rhcos%: SUBDIR = rhel8
246+
# Source of truth https://access.redhat.com/articles/6907891
247+
build-vgpuguest-rhcos4.12: SUBDIR = rhel8
248+
build-vgpuguest-rhcos4.13: SUBDIR = rhel8
249+
build-vgpuguest-rhcos4.14: SUBDIR = rhel8
250+
build-vgpuguest-rhcos4.15: SUBDIR = rhel9
251+
build-vgpuguest-rhcos4.16: SUBDIR = rhel9
252+
build-vgpuguest-rhcos4.17: SUBDIR = rhel9
253+
build-vgpuguest-rhcos4.18: SUBDIR = rhel9
254+
build-vgpuguest-rhcos4.19: SUBDIR = rhel9
255+
build-vgpuguest-rhcos4.20: SUBDIR = rhel9
256+
257+
build-vgpuguest-rhcos%:
258+
DOCKER_BUILDKIT=1 \
259+
$(DOCKER) $(BUILDX) build --pull \
260+
$(DOCKER_BUILD_OPTIONS) \
261+
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
262+
--tag $(IMAGE) \
263+
--build-arg DRIVER_TYPE=vgpu \
264+
--build-arg VGPU_LICENSE_SERVER_TYPE=NLS \
265+
--build-arg DRIVER_VERSION="$(DRIVER_VERSION)" \
266+
--build-arg DRIVER_BRANCH="$(DRIVER_BRANCH)" \
267+
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
268+
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
269+
$(DOCKER_BUILD_ARGS) \
270+
--file $(DOCKERFILE) \
271+
$(CURDIR)/$(SUBDIR)
247272

248273
$(VGPU_GUEST_DRIVER_BUILD_TARGETS):
249274
DOCKER_BUILDKIT=1 \
@@ -280,9 +305,19 @@ build-vgpuhost-%: DIST = $(word 3,$(subst -, ,$@))
280305
build-vgpuhost-%: SUBDIR = $(word 3,$(subst -, ,$@))
281306
build-vgpuhost-%: DOCKERFILE = $(CURDIR)/vgpu-manager/$(SUBDIR)/Dockerfile
282307

283-
build-vgpuhost-rhcos%: SUBDIR = rhel8
284-
285-
$(VGPU_HOST_DRIVER_BUILD_TARGETS):
308+
# Source of truth https://access.redhat.com/articles/6907891
309+
build-vgpuhost-rhcos4.12: SUBDIR = rhel8
310+
build-vgpuhost-rhcos4.13: SUBDIR = rhel8
311+
build-vgpuhost-rhcos4.14: SUBDIR = rhel8
312+
build-vgpuhost-rhcos4.15: SUBDIR = rhel9
313+
build-vgpuhost-rhcos4.16: SUBDIR = rhel9
314+
build-vgpuhost-rhcos4.17: SUBDIR = rhel9
315+
build-vgpuhost-rhcos4.18: SUBDIR = rhel9
316+
build-vgpuhost-rhcos4.19: SUBDIR = rhel9
317+
build-vgpuhost-rhcos4.20: SUBDIR = rhel9
318+
319+
# TODO(mvalsecchi): find a better way than just duplicate the recipe
320+
build-vgpuhost-rhcos%:
286321
DOCKER_BUILDKIT=1 \
287322
$(DOCKER) $(BUILDX) build --pull \
288323
$(DOCKER_BUILD_OPTIONS) \
@@ -297,7 +332,20 @@ $(VGPU_HOST_DRIVER_BUILD_TARGETS):
297332
--file $(DOCKERFILE) \
298333
$(CURDIR)/vgpu-manager/$(SUBDIR)
299334

300-
335+
$(VGPU_HOST_DRIVER_BUILD_TARGETS):
336+
DOCKER_BUILDKIT=1 \
337+
$(DOCKER) $(BUILDX) build --pull \
338+
$(DOCKER_BUILD_OPTIONS) \
339+
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
340+
--tag $(IMAGE) \
341+
--build-arg DRIVER_BRANCH="$(DRIVER_BRANCH)" \
342+
--build-arg DRIVER_VERSION="$(DRIVER_VERSION)" \
343+
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
344+
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
345+
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
346+
$(DOCKER_BUILD_ARGS) \
347+
--file $(DOCKERFILE) \
348+
$(CURDIR)/vgpu-manager/$(SUBDIR)
301349

302350
# $(VGPU_HOST_DRIVER_PUSH_TARGETS) is in the form of push-vgpuhost-$(DIST)
303351
# VGPU_HOST_DRIVER_VERSION must be defined in the environment when invoking this target.

vgpu-manager/rhel9/Dockerfile

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
FROM nvcr.io/nvidia/cuda:13.0.1-base-ubi9
2+
3+
ARG DRIVER_VERSION
4+
ENV DRIVER_VERSION=$DRIVER_VERSION
5+
ARG DRIVER_ARCH=x86_64
6+
ENV DRIVER_ARCH=$DRIVER_ARCH
7+
8+
RUN mkdir -p /driver
9+
WORKDIR /driver
10+
COPY NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run .
11+
RUN chmod +x NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run
12+
13+
COPY nvidia-driver /usr/local/bin
14+
COPY ocp_dtk_entrypoint /usr/local/bin
15+
16+
LABEL io.k8s.display-name="NVIDIA vGPU Manager Container"
17+
LABEL name="NVIDIA vGPU Manager Container"
18+
LABEL vendor="NVIDIA"
19+
LABEL version="${DRIVER_VERSION}"
20+
LABEL release="N/A"
21+
LABEL summary="Provision the NVIDIA vGPU Manager through containers"
22+
LABEL description="See summary"
23+
24+
# Install / upgrade packages here that are required to resolve CVEs
25+
ARG CVE_UPDATES
26+
RUN if [ -n "${CVE_UPDATES}" ]; then \
27+
yum update -y ${CVE_UPDATES} && \
28+
rm -rf /var/cache/yum/*; \
29+
fi
30+
31+
# Add NGC DL license from the CUDA image
32+
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
33+
34+
ENTRYPOINT ["nvidia-driver", "init"]

vgpu-manager/rhel9/nvidia-driver

Lines changed: 252 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,252 @@
1+
#!/bin/bash
2+
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
3+
4+
set -xe
5+
6+
DRIVER_VERSION=${DRIVER_VERSION:?"Missing driver version"}
7+
DRIVER_RESET_RETRIES=10
8+
DELAY_BEFORE_VF_CREATION=${DELAY_BEFORE_VF_CREATION:-15}
9+
RUN_DIR=/run/nvidia
10+
11+
# Mount the driver rootfs into the run directory with the exception of sysfs.
12+
_mount_rootfs() {
13+
echo "Mounting NVIDIA driver rootfs..."
14+
mount --make-runbindable /sys
15+
mount --make-private /sys
16+
mkdir -p ${RUN_DIR}/driver
17+
mount --rbind / ${RUN_DIR}/driver
18+
19+
echo "Change device files security context for selinux compatibility"
20+
chcon -R -t container_file_t ${RUN_DIR}/driver/dev
21+
}
22+
23+
# Unmount the driver rootfs from the run directory.
24+
_unmount_rootfs() {
25+
echo "Unmounting NVIDIA driver rootfs..."
26+
if findmnt -r -o TARGET | grep "${RUN_DIR}/driver" > /dev/null; then
27+
umount -l -R ${RUN_DIR}/driver
28+
fi
29+
}
30+
31+
# Create /dev/char directory if it doesn't exist inside the container.
32+
# Without this directory, nvidia-vgpu-mgr will fail to create symlinks
33+
# under /dev/char for new devices nodes.
34+
_create_dev_char_directory() {
35+
if [ ! -d "/dev/char" ]; then
36+
echo "Creating '/dev/char' directory"
37+
mkdir -p /dev/char
38+
fi
39+
}
40+
41+
_set_fw_search_path() {
42+
local nv_fw_search_path="$RUN_DIR/driver/lib/firmware"
43+
local fw_path_config_file="/sys/module/firmware_class/parameters/path"
44+
45+
if [[ ! -z $(grep '[^[:space:]]' $fw_path_config_file) ]]; then
46+
echo "WARNING: A search path is already configured in $fw_path_config_file"
47+
echo " Retaining the current configuration. Note, GSP firmware may not be found and thus won't be used by the NVIDIA driver."
48+
return
49+
fi
50+
51+
echo "Configuring the following firmware search path in '$fw_path_config_file': $nv_fw_search_path"
52+
echo -n "$nv_fw_search_path" > $fw_path_config_file
53+
}
54+
55+
_install_driver() {
56+
local tmp_dir=$(mktemp -d)
57+
58+
sh NVIDIA-Linux-${DRIVER_ARCH}-${DRIVER_VERSION}-vgpu-kvm.run --ui=none --no-questions --tmpdir ${tmp_dir} --no-systemd
59+
}
60+
61+
# Currently _install_driver() takes care of loading nvidia modules. Just need to start necessary vgpu daemons
62+
_load_driver() {
63+
/usr/bin/nvidia-vgpud
64+
/usr/bin/nvidia-vgpu-mgr &
65+
66+
# check nvidia drivers are loaded
67+
if [ ! -f /sys/module/nvidia_vgpu_vfio/refcnt ] || [ ! -f /sys/module/nvidia/refcnt ]; then
68+
echo "Failed to load nvidia driver"
69+
return 1
70+
fi
71+
return 0
72+
}
73+
74+
# Enable virtual functions for all physical GPUs on the node that support SR-IOV.
75+
# Retry logic is to account for when the driver is busy (i.e. during driver initialization)
76+
_enable_vfs() {
77+
# Wait before attempting to create VFs to ensure the driver has finished initializing.
78+
# This is a WAR for a bug in vGPU 17.2 where sriov-manage does not return a non-zero
79+
# exit code even though VF creation fails.
80+
sleep $DELAY_BEFORE_VF_CREATION
81+
82+
local retry
83+
for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do
84+
if /usr/lib/nvidia/sriov-manage -e ALL; then
85+
return 0
86+
fi
87+
if [ $retry == $DRIVER_RESET_RETRIES ]; then
88+
echo "Failed to enable VFs"
89+
fi
90+
done
91+
return 1
92+
}
93+
94+
# Disable virtual functions for all physical GPUs on the node that support SR-IOV.
95+
# Retry logic is to account for when the driver is busy (i.e. during driver initialization)
96+
_disable_vfs() {
97+
local retry
98+
for ((retry = 0 ; retry <= $DRIVER_RESET_RETRIES ; retry++)); do
99+
if /usr/lib/nvidia/sriov-manage -d ALL; then
100+
return 0
101+
fi
102+
if [ $retry == $DRIVER_RESET_RETRIES ]; then
103+
echo "Failed to disable VFs"
104+
fi
105+
done
106+
return 1
107+
}
108+
109+
_unload_driver() {
110+
local rmmod_args=()
111+
local nvidia_deps=0
112+
local nvidia_refs=0
113+
local nvidia_vgpu_vfio_refs=0
114+
115+
if [ -f /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid ]; then
116+
echo "Stopping NVIDIA vGPU Manager..."
117+
local pid=$(< /var/run/nvidia-vgpu-mgr/nvidia-vgpu-mgr.pid)
118+
119+
kill -TERM "${pid}"
120+
for i in $(seq 1 50); do
121+
kill -0 "${pid}" 2> /dev/null || break
122+
sleep 0.1
123+
done
124+
if [ $i -eq 50 ]; then
125+
echo "Could not stop NVIDIA vGPU Manager" >&2
126+
return 1
127+
fi
128+
fi
129+
130+
echo "Unloading NVIDIA driver kernel modules..."
131+
if [ -f /sys/module/nvidia_vgpu_vfio/refcnt ]; then
132+
nvidia_vgpu_vfio_refs=$(< /sys/module/nvidia_vgpu_vfio/refcnt)
133+
rmmod_args+=("nvidia_vgpu_vfio")
134+
((++nvidia_deps))
135+
fi
136+
if [ -f /sys/module/nvidia/refcnt ]; then
137+
nvidia_refs=$(< /sys/module/nvidia/refcnt)
138+
rmmod_args+=("nvidia")
139+
fi
140+
141+
# TODO: check if nvidia module is in use by checking refcnt
142+
143+
if [ ${#rmmod_args[@]} -gt 0 ]; then
144+
rmmod ${rmmod_args[@]}
145+
if [ "$?" != "0" ]; then
146+
return 1
147+
fi
148+
fi
149+
return 0
150+
}
151+
152+
_shutdown() {
153+
if _disable_vfs && _unload_driver; then
154+
_unmount_rootfs
155+
return 0
156+
fi
157+
echo "Failed to cleanup driver"
158+
return 1
159+
}
160+
161+
build() {
162+
echo "build() not implemented"
163+
}
164+
165+
load() {
166+
echo "load() not implemented"
167+
}
168+
169+
update() {
170+
echo "update() not implemented"
171+
}
172+
173+
init() {
174+
trap "echo 'Caught signal'; exit 1" HUP INT QUIT PIPE TERM
175+
trap "_shutdown" EXIT
176+
177+
if ! _unload_driver; then
178+
echo "Previous NVIDIA driver installation cannot be removed. Exiting"
179+
exit 1
180+
fi
181+
_unmount_rootfs
182+
_create_dev_char_directory
183+
_set_fw_search_path
184+
_install_driver
185+
_load_driver || exit 1
186+
_mount_rootfs
187+
_enable_vfs
188+
189+
# In certain scenarios, /sys/class/mdev_bus is not populated with the correct list of devices (PFs and possible VFs) at this point.
190+
# Re-run nvdidia-vgpud to ensure /sys/class/mdev_bus is populated correctly. And restart nvidia-vgpu-mgr if previously killed.
191+
nvidia-vgpud &
192+
pgrep nvidia-vgpu-mgr >/dev/null || (echo "Restarting nvidia-vgpu-mgr after previously killed" && nvidia-vgpu-mgr &)
193+
194+
set +x
195+
echo "Done, now waiting for signal"
196+
trap "echo 'Caught signal'; _shutdown; trap - EXIT; exit" HUP INT QUIT PIPE TERM
197+
198+
while true; do
199+
sleep 15
200+
pgrep nvidia-vgpu-mgr >/dev/null || (echo "ERROR: nvidia-vgpu-mgr daemon is no longer running. Exiting." && exit 1)
201+
done
202+
}
203+
204+
205+
usage() {
206+
cat >&2 <<EOF
207+
Usage: $0 COMMAND [ARG...]
208+
209+
Commands:
210+
init [-a | --accept-license]
211+
build [-a | --accept-license]
212+
load
213+
update [-k | --kernel VERSION] [-s | --sign KEYID] [-t | --tag TAG]
214+
EOF
215+
exit 1
216+
}
217+
218+
if [ $# -eq 0 ]; then
219+
usage
220+
fi
221+
command=$1; shift
222+
case "${command}" in
223+
init) options=$(getopt -l accept-license -o a -- "$@") ;;
224+
build) options=$(getopt -l accept-license,tag: -o a:t -- "$@") ;;
225+
load) options="" ;;
226+
update) options=$(getopt -l kernel:,sign:,tag: -o k:s:t: -- "$@") ;;
227+
*) usage ;;
228+
esac
229+
if [ $? -ne 0 ]; then
230+
usage
231+
fi
232+
eval set -- "${options}"
233+
234+
ACCEPT_LICENSE=""
235+
KERNEL_VERSION=$(uname -r)
236+
PRIVATE_KEY=""
237+
PACKAGE_TAG=""
238+
239+
for opt in ${options}; do
240+
case "$opt" in
241+
-a | --accept-license) ACCEPT_LICENSE="yes"; shift 1 ;;
242+
-k | --kernel) KERNEL_VERSION=$2; shift 2 ;;
243+
-s | --sign) PRIVATE_KEY=$2; shift 2 ;;
244+
-t | --tag) PACKAGE_TAG=$2; shift 2 ;;
245+
--) shift; break ;;
246+
esac
247+
done
248+
if [ $# -ne 0 ]; then
249+
usage
250+
fi
251+
252+
$command

0 commit comments

Comments
 (0)