Skip to content

Commit 11e640b

Browse files
committed
feat: add helm chart
Signed-off-by: Ajay Mishra <[email protected]>
1 parent 5ecd33f commit 11e640b

File tree

11 files changed

+553
-0
lines changed

11 files changed

+553
-0
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v2
16+
name: kubernetes-object-monitor
17+
description: Monitors Kubernetes objects and publishes health events based on CEL policy predicates
18+
19+
# Application chart for deployment
20+
type: application
21+
22+
# Chart version - increment for chart changes
23+
version: 0.1.0
24+
25+
# Application version
26+
appVersion: "1.16.0"
27+
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{{/*
2+
Expand the name of the chart.
3+
*/}}
4+
{{- define "kubernetes-object-monitor.name" -}}
5+
{{- .Chart.Name | trunc 63 | trimSuffix "-" }}
6+
{{- end }}
7+
8+
{{/*
9+
Create a default fully qualified app name.
10+
*/}}
11+
{{- define "kubernetes-object-monitor.fullname" -}}
12+
{{- "kubernetes-object-monitor" | trunc 63 | trimSuffix "-" }}
13+
{{- end }}
14+
15+
{{/*
16+
Create chart name and version as used by the chart label.
17+
*/}}
18+
{{- define "kubernetes-object-monitor.chart" -}}
19+
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
20+
{{- end }}
21+
22+
{{/*
23+
Common labels
24+
*/}}
25+
{{- define "kubernetes-object-monitor.labels" -}}
26+
helm.sh/chart: {{ include "kubernetes-object-monitor.chart" . }}
27+
{{ include "kubernetes-object-monitor.selectorLabels" . }}
28+
{{- if .Chart.AppVersion }}
29+
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
30+
{{- end }}
31+
app.kubernetes.io/managed-by: {{ .Release.Service }}
32+
{{- end }}
33+
34+
{{/*
35+
Selector labels
36+
*/}}
37+
{{- define "kubernetes-object-monitor.selectorLabels" -}}
38+
app.kubernetes.io/name: {{ include "kubernetes-object-monitor.name" . }}
39+
app.kubernetes.io/instance: {{ .Release.Name }}
40+
{{- end }}
41+
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: rbac.authorization.k8s.io/v1
16+
kind: ClusterRole
17+
metadata:
18+
name: {{ include "kubernetes-object-monitor.fullname" . }}
19+
labels:
20+
{{- include "kubernetes-object-monitor.labels" . | nindent 4 }}
21+
rules:
22+
- apiGroups:
23+
- ""
24+
resources:
25+
- nodes
26+
verbs:
27+
- get
28+
- list
29+
- watch
30+
- patch
31+
- update
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: rbac.authorization.k8s.io/v1
16+
kind: ClusterRoleBinding
17+
metadata:
18+
name: {{ include "kubernetes-object-monitor.fullname" . }}
19+
labels:
20+
{{- include "kubernetes-object-monitor.labels" . | nindent 4 }}
21+
subjects:
22+
- kind: ServiceAccount
23+
name: {{ include "kubernetes-object-monitor.fullname" . }}
24+
namespace: {{ .Release.Namespace }}
25+
roleRef:
26+
kind: ClusterRole
27+
name: {{ include "kubernetes-object-monitor.fullname" . }}
28+
apiGroup: rbac.authorization.k8s.io
29+
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v1
16+
kind: ConfigMap
17+
metadata:
18+
name: {{ include "kubernetes-object-monitor.fullname" . }}
19+
labels:
20+
{{- include "kubernetes-object-monitor.labels" . | nindent 4 }}
21+
data:
22+
config.toml: |
23+
{{- range .Values.policies }}
24+
[[policies]]
25+
name = {{ .name | quote }}
26+
enabled = {{ .enabled }}
27+
28+
[policies.resource]
29+
group = {{ .resource.group | quote }}
30+
version = {{ .resource.version | quote }}
31+
kind = {{ .resource.kind | quote }}
32+
33+
[policies.predicate]
34+
expression = {{ if contains "\n" .predicate.expression }}'''
35+
{{ .predicate.expression | trim | nindent 10 }}
36+
'''{{ else }}{{ .predicate.expression | quote }}{{ end }}
37+
38+
{{- if .nodeAssociation }}
39+
[policies.nodeAssociation]
40+
expression = {{ .nodeAssociation.expression | quote }}
41+
{{- end }}
42+
43+
[policies.healthEvent]
44+
componentClass = {{ .healthEvent.componentClass | quote }}
45+
isFatal = {{ .healthEvent.isFatal }}
46+
message = {{ .healthEvent.message | quote }}
47+
recommendedAction = {{ .healthEvent.recommendedAction | quote }}
48+
{{- if .healthEvent.errorCode }}
49+
errorCode = [{{- range $index, $code := .healthEvent.errorCode }}{{- if $index }}, {{ end }}{{ $code | quote }}{{- end }}]
50+
{{- end }}
51+
52+
{{- end }}
53+
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: apps/v1
16+
kind: Deployment
17+
metadata:
18+
name: {{ include "kubernetes-object-monitor.fullname" . }}
19+
labels:
20+
{{- include "kubernetes-object-monitor.labels" . | nindent 4 }}
21+
spec:
22+
replicas: {{ .Values.replicaCount }}
23+
selector:
24+
matchLabels:
25+
{{- include "kubernetes-object-monitor.selectorLabels" . | nindent 6 }}
26+
template:
27+
metadata:
28+
annotations:
29+
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
30+
{{- with .Values.podAnnotations }}
31+
{{- toYaml . | nindent 8 }}
32+
{{- end }}
33+
labels:
34+
{{- include "kubernetes-object-monitor.labels" . | nindent 8 }}
35+
{{- with .Values.podLabels }}
36+
{{- toYaml . | nindent 8 }}
37+
{{- end }}
38+
spec:
39+
{{- if .Values.global }}
40+
{{- with .Values.global.imagePullSecrets }}
41+
imagePullSecrets:
42+
{{- toYaml . | nindent 8 }}
43+
{{- end }}
44+
{{- end }}
45+
serviceAccountName: {{ include "kubernetes-object-monitor.fullname" . }}
46+
containers:
47+
- name: {{ .Chart.Name }}
48+
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default ((.Values.global).image).tag | default .Chart.AppVersion }}"
49+
imagePullPolicy: {{ .Values.image.pullPolicy }}
50+
args:
51+
- "--config"
52+
- "/config/config.toml"
53+
- "--platform-connector-socket"
54+
- "{{ .Values.platformConnector.socket }}"
55+
- "--metrics-port"
56+
- "{{ (.Values.global).metricsPort | default 8080 }}"
57+
- "--max-concurrent-reconciles"
58+
- "{{ .Values.maxConcurrentReconciles }}"
59+
resources:
60+
{{- toYaml .Values.resources | nindent 12 }}
61+
ports:
62+
- name: metrics
63+
containerPort: {{ (.Values.global).metricsPort | default 8080 }}
64+
livenessProbe:
65+
httpGet:
66+
path: /healthz
67+
port: metrics
68+
initialDelaySeconds: 15
69+
periodSeconds: 20
70+
timeoutSeconds: 5
71+
failureThreshold: 3
72+
readinessProbe:
73+
httpGet:
74+
path: /healthz
75+
port: metrics
76+
initialDelaySeconds: 5
77+
periodSeconds: 10
78+
timeoutSeconds: 3
79+
failureThreshold: 3
80+
env:
81+
- name: LOG_LEVEL
82+
value: "{{ .Values.logLevel }}"
83+
volumeMounts:
84+
- name: config
85+
mountPath: /config
86+
readOnly: true
87+
- name: platform-connector-socket
88+
mountPath: /var/run/platform-connector
89+
volumes:
90+
- name: config
91+
configMap:
92+
name: {{ include "kubernetes-object-monitor.fullname" . }}
93+
{{- range .Values.volumes }}
94+
- {{- toYaml . | nindent 10 }}
95+
{{- end }}
96+
{{- with ((.Values.global).systemNodeSelector | default .Values.nodeSelector) }}
97+
nodeSelector:
98+
{{- toYaml . | nindent 8 }}
99+
{{- end }}
100+
{{- with ((.Values.global).affinity | default .Values.affinity) }}
101+
affinity:
102+
{{- toYaml . | nindent 8 }}
103+
{{- end }}
104+
{{- with ((.Values.global).systemNodeTolerations | default .Values.tolerations) }}
105+
tolerations:
106+
{{- toYaml . | nindent 8 }}
107+
{{- end }}
108+
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: v1
16+
kind: ServiceAccount
17+
metadata:
18+
name: {{ include "kubernetes-object-monitor.fullname" . }}
19+
labels:
20+
{{- include "kubernetes-object-monitor.labels" . | nindent 4 }}
21+
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# Default values for kubernetes-object-monitor.
16+
# This is a YAML-formatted file.
17+
# Declare variables to be passed into your templates.
18+
19+
replicaCount: 1
20+
21+
logLevel: info
22+
23+
image:
24+
repository: ghcr.io/nvidia/nvsentinel/kubernetes-object-monitor
25+
pullPolicy: IfNotPresent
26+
tag: ""
27+
28+
podAnnotations: {}
29+
30+
maxConcurrentReconciles: 1
31+
32+
platformConnector:
33+
socket: "unix:///var/run/platform-connector/platform-connector.sock"
34+
35+
policies:
36+
- name: node-not-ready
37+
enabled: true
38+
resource:
39+
group: ""
40+
version: v1
41+
kind: Node
42+
predicate:
43+
expression: |
44+
resource.status.conditions.filter(c, c.type == "Ready" && c.status == "False").size() > 0
45+
healthEvent:
46+
componentClass: Node
47+
isFatal: true
48+
message: "Node is not ready"
49+
recommendedAction: CONTACT_SUPPORT
50+
errorCode:
51+
- NODE_NOT_READY
52+
53+
resources:
54+
requests:
55+
cpu: 100m
56+
memory: 128Mi
57+
limits:
58+
cpu: 500m
59+
memory: 256Mi
60+
61+
volumes:
62+
- name: platform-connector-socket
63+
hostPath:
64+
path: /var/run/platform-connector
65+
type: DirectoryOrCreate
66+

0 commit comments

Comments
 (0)