Skip to content

Commit 2fd2c25

Browse files
committed
work on err msg, add hint for common problem
Signed-off-by: Dr. Jan-Philip Gehrcke <[email protected]>
1 parent 03d2a95 commit 2fd2c25

File tree

1 file changed

+19
-11
lines changed

1 file changed

+19
-11
lines changed

hack/kubelet-plugin-prestart.sh

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,28 +16,36 @@ fi
1616

1717
echo "NVIDIA_DRIVER_ROOT (path on host): ${NVIDIA_DRIVER_ROOT}"
1818

19-
# Note: the following path is in-container, after chroot to /driver-root (and
20-
# /driver-root is where NVIDIA_DRIVER_ROOT is mounted from the host filesystem).
21-
# This typically outputs /usr/bin/nvidia-smi.
22-
echo "command -v nvidia-smi: $(command -v nvidia-smi)"
23-
2419
while true
2520
do
2621
if ! command -v nvidia-smi &>/dev/null
2722
then
2823
printf '%b' \
29-
"Command not found: 'nvidia-smi'. " \
30-
"Has the NVIDIA GPU driver been set up on the host? " \
24+
"Not in PATH: 'nvidia-smi'. " \
25+
"Has the NVIDIA GPU driver been set up? " \
3126
"The GPU driver is expected to be installed under " \
32-
"NVIDIA_DRIVER_ROOT in the host filesystem. " \
33-
"NVIDIA_DRIVER_ROOT is currently set to: '${NVIDIA_DRIVER_ROOT}'. " \
34-
"If that value appears to be unexpected: " \
27+
"NVIDIA_DRIVER_ROOT ('${NVIDIA_DRIVER_ROOT}') in the host filesystem. " \
28+
"If NVIDIA_DRIVER_ROOT appears to be unexpected: " \
3529
"review and adjust the 'nvidiaDriverRoot' Helm chart variable. " \
3630
"If the value is expected: review if the GPU driver has " \
3731
"actually been installed under NVIDIA_DRIVER_ROOT. " \
38-
"If you chose the NVIDIA GPU Operator to manage the GPU driver: " \
32+
"If you chose the NVIDIA GPU Operator to manage the GPU driver " \
33+
"(NVIDIA_DRIVER_ROOT set to /run/nvidia/driver): "\
3934
"make sure that Operator is deployed and healthy.\n"
35+
36+
# Provide hint for a specific, common mistake.
37+
if [ "$NVIDIA_DRIVER_ROOT" == "/" ] && [ -f /run/nvidia/driver/usr/bin/nvidia-smi ]; then
38+
printf '%b' \
39+
"Note: /run/nvidia/driver/usr/bin/nvidia-smi exists on the host, you " \
40+
"may want to re-install the DRA driver Helm chart with " \
41+
"--set nvidiaDriverRoot=/run/nvidia/driver\n"
42+
fi
4043
else
44+
# Note: the following path is in-container, after chroot to /driver-root (and
45+
# /driver-root is where NVIDIA_DRIVER_ROOT is mounted from the host filesystem).
46+
# This typically outputs /usr/bin/nvidia-smi.
47+
echo "command -v nvidia-smi: $(command -v nvidia-smi)"
48+
4149
# This may be slow or hang, in a bad setup.
4250
echo "invoking nvidia-smi"
4351
nvidia-smi

0 commit comments

Comments
 (0)