1+ # Copyright 2025 NVIDIA CORPORATION
2+ # SPDX-License-Identifier: Apache-2.0
3+
4+ apiVersion : monitoring.coreos.com/v1
5+ kind : ServiceMonitor
6+ metadata :
7+ name : binder
8+ namespace : kai-scheduler
9+ labels :
10+ accounting : kai-scheduler
11+ spec :
12+ jobLabel : binder
13+ namespaceSelector :
14+ matchNames :
15+ - kai-scheduler
16+ selector :
17+ matchLabels :
18+ app : binder
19+ endpoints :
20+ - port : http-metrics
21+ bearerTokenFile : /var/run/secrets/kubernetes.io/serviceaccount/token
22+ ---
23+ apiVersion : monitoring.coreos.com/v1
24+ kind : ServiceMonitor
25+ metadata :
26+ name : scheduler
27+ namespace : kai-scheduler
28+ labels :
29+ accounting : kai-scheduler
30+ spec :
31+ jobLabel : scheduler
32+ namespaceSelector :
33+ matchNames :
34+ - kai-scheduler
35+ selector :
36+ matchLabels :
37+ app : scheduler
38+ endpoints :
39+ - port : http-metrics
40+ bearerTokenFile : /var/run/secrets/kubernetes.io/serviceaccount/token
41+ ---
42+ apiVersion : monitoring.coreos.com/v1
43+ kind : ServiceMonitor
44+ metadata :
45+ name : queuecontroller
46+ namespace : kai-scheduler
47+ labels :
48+ accounting : kai-scheduler
49+ spec :
50+ jobLabel : queuecontroller
51+ namespaceSelector :
52+ matchNames :
53+ - kai-scheduler
54+ selector :
55+ matchLabels :
56+ app : queuecontroller
57+ endpoints :
58+ - port : metrics
59+ ---
60+ apiVersion : monitoring.coreos.com/v1
61+ kind : ServiceMonitor
62+ metadata :
63+ name : dcgm-exporter
64+ namespace : kai-scheduler
65+ labels :
66+ accounting : kai-scheduler
67+ spec :
68+ jobLabel : dcgm-exporter
69+ namespaceSelector :
70+ matchNames :
71+ - gpu-operator
72+ selector :
73+ matchLabels :
74+ app : nvidia-dcgm-exporter
75+ endpoints :
76+ - port : gpu-metrics
77+ ---
78+ apiVersion : monitoring.coreos.com/v1
79+ kind : ServiceMonitor
80+ metadata :
81+ name : kube-state-metrics
82+ namespace : kai-scheduler
83+ labels :
84+ accounting : kai-scheduler
85+ spec :
86+ jobLabel : kube-state-metrics
87+ namespaceSelector :
88+ matchNames :
89+ - monitoring
90+ selector :
91+ matchLabels :
92+ app.kubernetes.io/name : kube-state-metrics
93+ endpoints :
94+ - port : http
95+ interval : 30s
96+ bearerTokenFile : /var/run/secrets/kubernetes.io/serviceaccount/token
0 commit comments