Skip to content

Commit 20a36d6

Browse files
committed
non-slash driver root: mount parent
Signed-off-by: Dr. Jan-Philip Gehrcke <[email protected]>
1 parent 0924a0a commit 20a36d6

File tree

2 files changed

+42
-8
lines changed

2 files changed

+42
-8
lines changed

deployments/helm/nvidia-dra-driver-gpu/templates/kubeletplugin.yaml

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,25 @@ spec:
6161
# Use runc: explicit "void"; otherwise we inherit "all".
6262
- name: NVIDIA_VISIBLE_DEVICES
6363
value: void
64-
# In case of the operator-provided driver, another container mounts the
65-
# driver onto the host using `mountPropagation: Bidirectional`
66-
# (out-of-band of the lifecycle of _this_ pod here). For us to see that
67-
# mount, `mountPropagation: HostToContainer` is required (docs: "if any
68-
# Pod with Bidirectional mount propagation to the same volume mounts
69-
# anything there, the container with HostToContainer mount propagation
70-
# will see it.").
7164
volumeMounts:
72-
- name: driver-root
65+
{{- if eq "/" .Values.nvidiaDriverRoot }}
66+
volumeMounts:
67+
- name: host-root
7368
mountPath: /driver-root
7469
readOnly: true
70+
{{- else }}
71+
volumeMounts:
72+
- name: driver-root-parent
73+
mountPath: /driver-root-parent
74+
# In case of the operator-provided driver, another container mounts
75+
# the driver onto the host using `mountPropagation: Bidirectional`
76+
# (out-of-band of the lifecycle of _this_ pod here). For us to see
77+
# that mount, `mountPropagation: HostToContainer` is required (docs:
78+
# "if any Pod with Bidirectional mount propagation to the same volume
79+
# mounts anything there, the container with HostToContainer mount
80+
# propagation will see it.").
7581
mountPropagation: HostToContainer
82+
{{- end }}
7683
containers:
7784
{{- if .Values.resources.computeDomains.enabled }}
7885
- name: compute-domains
@@ -197,6 +204,19 @@ spec:
197204
- name: cdi
198205
hostPath:
199206
path: /var/run/cdi
207+
{{- if eq "/" .Values.nvidiaDriverRoot }}
208+
- name: host-root
209+
hostPath:
210+
path: /
211+
{{- else }}
212+
- name: driver-root-parent
213+
hostPath:
214+
# First, remove trailing slash (if exists) and then remove last path
215+
# element with sprig template function `dir`. Examples: /a/b/ -> /a,
216+
# /a/b/c -> /a/b.
217+
path: {{ dir (trimSuffix "/" .Values.nvidiaDriverRoot) }}
218+
type: DirectoryOrCreate
219+
{{- end }}
200220
- name: driver-root
201221
hostPath:
202222
path: {{ .Values.nvidiaDriverRoot }}

hack/kubelet-plugin-prestart.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,20 @@ if [ -z "$NVIDIA_DRIVER_ROOT" ]; then
1111
export NVIDIA_DRIVER_ROOT="/"
1212
fi
1313

14+
15+
if [ "${NVIDIA_DRIVER_ROOT}" != "/" ]; then
16+
# Create in-container path /driver-root as a symlink. Expectation: link may
17+
# be broken initially (e.g., if the GPU operator isn't deployed yet. The
18+
# link heals the driver becomes mounted (e.g., once GPU operator provides
19+
# the driver on the host at /run/nvidia/driver).
20+
21+
# Remove trailing slash (if existing) and get last elem: /a/b/c -> c,
22+
# /a/b/c/ -> c
23+
_driver_root_basename=$("${NVIDIA_DRIVER_ROOT%/}" | xargs basename)
24+
echo "create symlink: /driver-root -> /driver-root-parent/${_driver_root_basename}"
25+
ln -s "/driver-root-parent/${_driver_root_basename}" /driver-root
26+
fi
27+
1428
emit_common_err () {
1529
printf '%b' \
1630
"Check failed. Has the NVIDIA GPU driver been set up? " \

0 commit comments

Comments
 (0)