Skip to content

Commit e64d5e2

Browse files
committed
Add envars dropped in helm chart refactor when introducing ComputeDomain
Signed-off-by: Kevin Klues <[email protected]>
1 parent 5f4121d commit e64d5e2

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

deployments/helm/nvidia-dra-driver-gpu/templates/kubeletplugin.yaml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,34 @@ spec:
8989
{{- toYaml .Values.kubeletPlugin.containers.gpus.securityContext | nindent 10 }}
9090
image: {{ include "nvidia-dra-driver-gpu.fullimage" . }}
9191
imagePullPolicy: {{ .Values.image.pullPolicy }}
92-
command:
93-
- gpu-kubelet-plugin
92+
command: ["bash", "-c"]
93+
args:
94+
- |-
95+
# Conditionally mask the params file to prevent this container from
96+
# recreating any missing GPU device nodes. This is necessary, for
97+
# example, when running under nvkind to limit the set GPUs governed
98+
# by the plugin even though it has cgroup access to all of them.
99+
if [ "${MASK_NVIDIA_DRIVER_PARAMS}" = "true" ]; then
100+
cp /proc/driver/nvidia/params root/gpu-params
101+
sed -i 's/^ModifyDeviceFiles: 1$/ModifyDeviceFiles: 0/' root/gpu-params
102+
mount --bind root/gpu-params /proc/driver/nvidia/params
103+
fi
104+
gpu-kubelet-plugin
94105
resources:
95106
{{- toYaml .Values.kubeletPlugin.containers.gpus.resources | nindent 10 }}
96107
env:
108+
- name: MASK_NVIDIA_DRIVER_PARAMS
109+
value: "{{ .Values.maskNvidiaDriverParams }}"
110+
- name: NVIDIA_CTK_PATH
111+
value: "{{ .Values.nvidiaCtkPath }}"
112+
- name: NVIDIA_DRIVER_ROOT
113+
value: "{{ .Values.nvidiaDriverRoot }}"
114+
- name: NVIDIA_VISIBLE_DEVICES
115+
value: void
97116
- name: CDI_ROOT
98117
value: /var/run/cdi
118+
- name: NVIDIA_MIG_CONFIG_DEVICES
119+
value: all
99120
- name: NODE_NAME
100121
valueFrom:
101122
fieldRef:

0 commit comments

Comments
 (0)