Skip to content

Commit 3cbf44c

Browse files
authored
Merge pull request #259 from klueska/fix-envvars-compute-domain-kubelet-plugin
Add envars dropped in helm chart refactor when introducing ComputeDomai
2 parents c664187 + 810d8c6 commit 3cbf44c

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

deployments/helm/nvidia-dra-driver-gpu/templates/kubeletplugin.yaml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,34 @@ spec:
5555
{{- toYaml .Values.kubeletPlugin.containers.computeDomains.securityContext | nindent 10 }}
5656
image: {{ include "nvidia-dra-driver-gpu.fullimage" . }}
5757
imagePullPolicy: {{ .Values.image.pullPolicy }}
58-
command:
59-
- compute-domain-kubelet-plugin
58+
command: ["bash", "-c"]
59+
args:
60+
- |-
61+
# Conditionally mask the params file to prevent this container from
62+
# recreating any missing GPU device nodes. This is necessary, for
63+
# example, when running under nvkind to limit the set GPUs governed
64+
# by the plugin even though it has cgroup access to all of them.
65+
if [ "${MASK_NVIDIA_DRIVER_PARAMS}" = "true" ]; then
66+
cp /proc/driver/nvidia/params root/gpu-params
67+
sed -i 's/^ModifyDeviceFiles: 1$/ModifyDeviceFiles: 0/' root/gpu-params
68+
mount --bind root/gpu-params /proc/driver/nvidia/params
69+
fi
70+
compute-domain-kubelet-plugin
6071
resources:
6172
{{- toYaml .Values.kubeletPlugin.containers.computeDomains.resources | nindent 10 }}
6273
env:
74+
- name: MASK_NVIDIA_DRIVER_PARAMS
75+
value: "{{ .Values.maskNvidiaDriverParams }}"
76+
- name: NVIDIA_CTK_PATH
77+
value: "{{ .Values.nvidiaCtkPath }}"
78+
- name: NVIDIA_DRIVER_ROOT
79+
value: "{{ .Values.nvidiaDriverRoot }}"
80+
- name: NVIDIA_VISIBLE_DEVICES
81+
value: void
6382
- name: CDI_ROOT
6483
value: /var/run/cdi
84+
- name: NVIDIA_MIG_CONFIG_DEVICES
85+
value: all
6586
- name: NODE_NAME
6687
valueFrom:
6788
fieldRef:

0 commit comments

Comments
 (0)