Skip to content

Commit 56174a6

Browse files
committed
Fix helm charts - problems identified by linter
Signed-off-by: Davanum Srinivas <[email protected]>
1 parent e47d646 commit 56174a6

File tree

8 files changed

+79
-35
lines changed

8 files changed

+79
-35
lines changed

distros/kubernetes/nvsentinel/charts/csp-health-monitor/templates/deployment.yaml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,14 @@ spec:
2424
{{- include "csp-health-monitor.selectorLabels" . | nindent 6 }}
2525
template:
2626
metadata:
27-
{{- with (.Values.global.podAnnotations | default .Values.podAnnotations) }}
27+
{{- with ((.Values.global).podAnnotations | default .Values.podAnnotations) }}
2828
annotations:
2929
{{- toYaml . | nindent 8 }}
3030
{{- end }}
3131
labels:
3232
{{- include "csp-health-monitor.selectorLabels" . | nindent 8 }}
3333
spec:
34-
{{- with .Values.global.imagePullSecrets }}
34+
{{- with ((.Values.global).imagePullSecrets | default .Values.imagePullSecrets) }}
3535
imagePullSecrets:
3636
{{- toYaml . | nindent 8 }}
3737
{{- end }}
@@ -53,19 +53,19 @@ spec:
5353
type: DirectoryOrCreate
5454
containers:
5555
- name: {{ .Chart.Name }}
56-
image: "{{ .Values.global.cspHealthMonitor.image.repository }}:{{ .Values.global.image.tag | default .Chart.AppVersion }}"
57-
imagePullPolicy: {{ .Values.global.cspHealthMonitor.image.pullPolicy }}
56+
image: "{{ ((.Values.global).cspHealthMonitor).image.repository | default "ghcr.io/nvidia/nvsentinel-csp-health-monitor" }}:{{ ((.Values.global).image).tag | default .Chart.AppVersion }}"
57+
imagePullPolicy: {{ ((.Values.global).cspHealthMonitor).image.pullPolicy | default "IfNotPresent" }}
5858
command: ["/app/csp-health-monitor"]
5959
args:
6060
- "--config=/etc/config/config.toml"
61-
- "--metrics-port={{ .Values.global.metricsPort }}"
61+
- "--metrics-port={{ ((.Values.global).metricsPort) | default 2112 }}"
6262
- "--mongo-client-cert-mount-path={{ .Values.clientCertMountPath }}"
6363
- "-v={{ .Values.logLevel }}"
6464
resources:
6565
{{- toYaml .Values.resources | nindent 12 }}
6666
ports:
6767
- name: metrics
68-
containerPort: {{ .Values.global.metricsPort }}
68+
containerPort: {{ ((.Values.global).metricsPort) | default 2112 }}
6969
protocol: TCP
7070
volumeMounts:
7171
- name: config-volume
@@ -79,8 +79,8 @@ spec:
7979
optional: true
8080

8181
- name: maintenance-notifier
82-
image: "{{ .Values.global.cspHealthMonitor.image.repository }}:{{ .Values.global.image.tag | default .Chart.AppVersion }}"
83-
imagePullPolicy: {{ .Values.global.cspHealthMonitor.image.pullPolicy }}
82+
image: "{{ ((.Values.global).cspHealthMonitor).image.repository | default "ghcr.io/nvidia/nvsentinel-csp-health-monitor" }}:{{ ((.Values.global).image).tag | default .Chart.AppVersion }}"
83+
imagePullPolicy: {{ ((.Values.global).cspHealthMonitor).image.pullPolicy | default "IfNotPresent" }}
8484
securityContext:
8585
runAsUser: 0
8686
command: ["/app/maintenance-notifier"]
@@ -109,15 +109,15 @@ spec:
109109
name: mongodb-config
110110
optional: true
111111
restartPolicy: Always
112-
{{- with (.Values.global.systemNodeSelector | default .Values.nodeSelector) }}
112+
{{- with (((.Values.global).systemNodeSelector) | default .Values.nodeSelector) }}
113113
nodeSelector:
114114
{{- toYaml . | nindent 8 }}
115115
{{- end }}
116-
{{- with (.Values.global.affinity | default .Values.affinity) }}
116+
{{- with (((.Values.global).affinity) | default .Values.affinity) }}
117117
affinity:
118118
{{- toYaml . | nindent 8 }}
119119
{{- end }}
120-
{{- with (.Values.global.systemNodeTolerations | default .Values.tolerations) }}
120+
{{- with (((.Values.global).systemNodeTolerations) | default .Values.tolerations) }}
121121
tolerations:
122122
{{- toYaml . | nindent 8 }}
123123
{{- end }}

distros/kubernetes/nvsentinel/charts/csp-health-monitor/values.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ serviceAccount:
4646
annotations: {}
4747
name: ""
4848

49+
# Scheduling configuration
50+
nodeSelector: {}
51+
affinity: {}
52+
tolerations: []
53+
54+
# Optional image pull secrets (fallback if global not available)
55+
imagePullSecrets: []
56+
57+
# Optional pod annotations (fallback if global not available)
58+
podAnnotations: {}
59+
4960
# Klog verbosity level for the main CSP health monitor container (e.g., 1=Info, 2=Debug, etc.)
5061
logLevel: 1
5162

distros/kubernetes/nvsentinel/charts/fault-quarantine/templates/deployment.yaml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,35 +24,35 @@ spec:
2424
{{- include "fault-quarantine-module.selectorLabels" . | nindent 6 }}
2525
template:
2626
metadata:
27-
{{- with (.Values.global.podAnnotations | default .Values.podAnnotations) }}
27+
{{- with ((.Values.global).podAnnotations | default .Values.podAnnotations) }}
2828
annotations:
2929
{{- toYaml . | nindent 8 }}
3030
{{- end }}
3131
labels:
3232
{{- include "fault-quarantine-module.selectorLabels" . | nindent 8 }}
3333
spec:
34-
{{- with .Values.global.imagePullSecrets }}
34+
{{- with ((.Values.global).imagePullSecrets | default .Values.imagePullSecrets) }}
3535
imagePullSecrets:
3636
{{- toYaml . | nindent 8 }}
3737
{{- end }}
38-
serviceAccountName: {{ include "nvsentinel.serviceAccountName" . }}
38+
serviceAccountName: {{ include "fault-quarantine-module.serviceAccountName" . }}
3939
containers:
4040
- name: fault-quarantine
41-
image: "{{ .Values.global.faultQuarantineModule.image.repository }}:{{ .Values.global.image.tag | default .Chart.AppVersion }}"
42-
imagePullPolicy: {{ .Values.global.faultQuarantineModule.image.pullPolicy }}
41+
image: "{{ ((.Values.global).faultQuarantineModule).image.repository | default "ghcr.io/nvidia/nvsentinel-fault-quarantine-module" }}:{{ ((.Values.global).image).tag | default .Chart.AppVersion }}"
42+
imagePullPolicy: {{ ((.Values.global).faultQuarantineModule).image.pullPolicy | default "IfNotPresent" }}
4343
resources:
4444
{{- toYaml .Values.resources | nindent 12 }}
4545
args:
46-
- "--metrics-port={{ .Values.global.metricsPort }}"
46+
- "--metrics-port={{ ((.Values.global).metricsPort) | default 2112 }}"
4747
- "--mongo-client-cert-mount-path={{ .Values.clientCertMountPath }}"
48-
- "--dry-run={{ .Values.global.dryRun }}"
48+
- "--dry-run={{ ((.Values.global).dryRun) | default false }}"
4949
- "--circuit-breaker-percentage={{ .Values.circuitBreaker.percentage }}"
5050
- "--circuit-breaker-duration={{ .Values.circuitBreaker.duration }}"
5151
- "--circuit-breaker-enabled={{ .Values.circuitBreaker.enabled }}"
5252
- "-v={{ .Values.logLevel }}"
5353
ports:
5454
- name: metrics
55-
containerPort: {{ .Values.global.metricsPort }}
55+
containerPort: {{ ((.Values.global).metricsPort) | default 2112 }}
5656
volumeMounts:
5757
- name: config-volume
5858
mountPath: /etc/config/config.toml
@@ -81,15 +81,15 @@ spec:
8181
secretName: mongo-app-client-cert-secret
8282
optional: true
8383
restartPolicy: Always
84-
{{- with (.Values.global.systemNodeSelector | default .Values.nodeSelector) }}
84+
{{- with (((.Values.global).systemNodeSelector) | default .Values.nodeSelector) }}
8585
nodeSelector:
8686
{{- toYaml . | nindent 8 }}
8787
{{- end }}
88-
{{- with (.Values.global.affinity | default .Values.affinity) }}
88+
{{- with (((.Values.global).affinity) | default .Values.affinity) }}
8989
affinity:
9090
{{- toYaml . | nindent 8 }}
9191
{{- end }}
92-
{{- with (.Values.global.systemNodeTolerations | default .Values.tolerations) }}
92+
{{- with (((.Values.global).systemNodeTolerations) | default .Values.tolerations) }}
9393
tolerations:
9494
{{- toYaml . | nindent 8 }}
9595
{{- end }}

distros/kubernetes/nvsentinel/charts/fault-quarantine/values.yaml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,17 @@ serviceAccount:
3232
annotations: {}
3333
name: ""
3434

35+
# Scheduling configuration
36+
nodeSelector: {}
37+
affinity: {}
38+
tolerations: []
39+
40+
# Optional image pull secrets (fallback if global not available)
41+
imagePullSecrets: []
42+
43+
# Optional pod annotations (fallback if global not available)
44+
podAnnotations: {}
45+
3546
# Log level for the fault quarantine module (klog verbosity)
3647
logLevel: 1
3748

distros/kubernetes/nvsentinel/charts/fault-remediation/templates/deployment.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,18 @@ spec:
2424
{{- include "fault-remediation-module.selectorLabels" . | nindent 6 }}
2525
template:
2626
metadata:
27-
{{- with (.Values.global.podAnnotations | default .Values.podAnnotations) }}
27+
{{- with ((.Values.global).podAnnotations | default .Values.podAnnotations) }}
2828
annotations:
2929
{{- toYaml . | nindent 8 }}
3030
{{- end }}
3131
labels:
3232
{{- include "fault-remediation-module.selectorLabels" . | nindent 8 }}
3333
spec:
34-
{{- with .Values.global.imagePullSecrets }}
34+
{{- with ((.Values.global).imagePullSecrets | default .Values.imagePullSecrets) }}
3535
imagePullSecrets:
3636
{{- toYaml . | nindent 8 }}
3737
{{- end }}
38-
serviceAccountName: {{ include "nvsentinel.serviceAccountName" . }}
38+
serviceAccountName: {{ include "fault-remediation-module.serviceAccountName" . }}
3939
containers:
4040
- name: fault-remediation
4141
env:
@@ -46,16 +46,16 @@ spec:
4646
- name: MAINTENANCE_API_GROUP
4747
value: {{ .Values.maintenanceResource.apiGroup }}
4848

49-
image: "{{ .Values.global.faultRemediationModule.image.repository }}:{{ .Values.global.image.tag | default .Chart.AppVersion }}"
50-
imagePullPolicy: {{ .Values.global.faultRemediationModule.image.pullPolicy | default "IfNotPresent" }}
49+
image: "{{ ((.Values.global).faultRemediationModule).image.repository | default "ghcr.io/nvidia/nvsentinel-fault-remediation-module" }}:{{ ((.Values.global).image).tag | default .Chart.AppVersion }}"
50+
imagePullPolicy: {{ ((.Values.global).faultRemediationModule).image.pullPolicy | default "IfNotPresent" }}
5151
resources:
5252
{{- toYaml .Values.resources | nindent 12 }}
5353
args:
5454
- "--mongo-client-cert-mount-path={{ .Values.clientCertMountPath }}"
55-
- "--dry-run={{ .Values.global.dryRun }}"
55+
- "--dry-run={{ ((.Values.global).dryRun) | default false }}"
5656
ports:
5757
- name: metrics
58-
containerPort: {{ .Values.global.metricsPort }}
58+
containerPort: {{ ((.Values.global).metricsPort) | default 2112 }}
5959
volumeMounts:
6060
- name: mongo-app-client-cert
6161
mountPath: {{ .Values.clientCertMountPath }}
@@ -99,15 +99,15 @@ spec:
9999
path: {{ .Values.logCollector.manifestFileName }}
100100
{{- end }}
101101
restartPolicy: Always
102-
{{- with (.Values.global.systemNodeSelector | default .Values.nodeSelector) }}
102+
{{- with (((.Values.global).systemNodeSelector) | default .Values.nodeSelector) }}
103103
nodeSelector:
104104
{{- toYaml . | nindent 8 }}
105105
{{- end }}
106-
{{- with (.Values.global.affinity | default .Values.affinity) }}
106+
{{- with (((.Values.global).affinity) | default .Values.affinity) }}
107107
affinity:
108108
{{- toYaml . | nindent 8 }}
109109
{{- end }}
110-
{{- with (.Values.global.systemNodeTolerations | default .Values.tolerations) }}
110+
{{- with (((.Values.global).systemNodeTolerations) | default .Values.tolerations) }}
111111
tolerations:
112112
{{- toYaml . | nindent 8 }}
113113
{{- end }}

distros/kubernetes/nvsentinel/charts/fault-remediation/values.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ serviceAccount:
3737
annotations: {}
3838
name: ""
3939

40+
# Scheduling configuration
41+
nodeSelector: {}
42+
affinity: {}
43+
44+
# Optional image pull secrets (fallback if global not available)
45+
imagePullSecrets: []
46+
47+
# Optional pod annotations (fallback if global not available)
48+
podAnnotations: {}
49+
4050
# Special tolerations for fault remediation - allow running on nodes with any taints for log collection
4151
tolerations:
4252
- operator: "Exists"

distros/kubernetes/nvsentinel/charts/gpu-health-monitor/templates/daemonset-dcgm-4.x.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@ spec:
2929
{{- include "gpu-health-monitor.selectorLabels" . | nindent 6 }}
3030
template:
3131
metadata:
32-
{{- with (.Values.global.podAnnotations | default .Values.podAnnotations) }}
32+
{{- with ((.Values.global).podAnnotations | default .Values.podAnnotations) }}
3333
annotations:
3434
{{- toYaml . | nindent 8 }}
3535
{{- end }}
3636
labels:
3737
{{- include "gpu-health-monitor.selectorLabels" . | nindent 8 }}
3838
spec:
39-
{{- with .Values.global.imagePullSecrets }}
39+
{{- with ((.Values.global).imagePullSecrets | default .Values.imagePullSecrets) }}
4040
imagePullSecrets:
4141
{{- toYaml . | nindent 8 }}
4242
{{- end }}

distros/kubernetes/nvsentinel/charts/gpu-health-monitor/values.yaml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,23 @@ podSecurityContext: {}
2323
securityContext:
2424
runAsUser: 0
2525

26-
resources:
26+
resources:
2727
limits:
2828
cpu: 500m
2929
memory: 512Mi
3030
requests:
3131
cpu: 100m
3232
memory: 128Mi
33+
34+
# Scheduling configuration
35+
nodeSelector: {}
36+
affinity: {}
37+
tolerations: []
38+
39+
# Optional image pull secrets (fallback if global not available)
40+
imagePullSecrets: []
41+
42+
# Optional pod annotations (fallback if global not available)
43+
podAnnotations: {}
44+
3345
stateFilePath: "/var/run/statefile"

0 commit comments

Comments
 (0)