From 56195f76c89ea979fd43582ca67c7b01a95d001b Mon Sep 17 00:00:00 2001
From: Michal Kapalka <mkapalka@adobe.com>
Date: Fri, 9 May 2025 16:56:40 +0200
Subject: [PATCH] Add cgroups CPU quota and throttling metrics

Add metrics related to CPU quotas and CPU throttling (Linux CFS bandwidth control), as well as
the total CPU usage from Linux cgroups CPU accounting. Those metrics can be useful in multi-tenant
cloud environments, in particular on Elastic Cloud nodes that use CPU boosting (vCPU credits).

Signed-off-by: Michal Kapalka <mkapalka@adobe.com>
---
 README.md                   |  5 ++++
 collector/nodes.go          | 60 +++++++++++++++++++++++++++++++++++++
 collector/nodes_response.go | 37 +++++++++++++++++++----
 collector/nodes_test.go     | 45 ++++++++++++++++++++++++++++
 4 files changed, 142 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index f813534d..9fa64a85 100644
--- a/README.md
+++ b/README.md
@@ -221,6 +221,11 @@ Further Information
 | elasticsearch_jvm_memory_pool_max_bytes                              | counter    | 3           | JVM memory max by pool                                                                              |
 | elasticsearch_jvm_memory_pool_peak_used_bytes                        | counter    | 3           | JVM memory peak used by pool                                                                        |
 | elasticsearch_jvm_memory_pool_peak_max_bytes                         | counter    | 3           | JVM memory peak max by pool                                                                         |
+| elasticsearch_os_cgroup_cpu_cfs_period_micros                        | gauge      | 1           | CPU period length in microseconds (Linux CFS bandwidth control)                                     |
+| elasticsearch_os_cgroup_cpu_cfs_quota_micros                         | gauge      | 1           | CPU quota per CPU period (cgroup_cfs_period_micros) in microseconds (Linux CFS bandwidth control)   |
+| elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled           | counter    | 1           | Number of times the process has been throttled (Linux CFS bandwidth control)                        |
+| elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos                | counter    | 1           | Total time duration (in nanoseconds) for which the process has been throttled (Linux CFS bandwidth control) |
+| elasticsearch_os_cgroup_cpuacct_usage_nanos                          | counter    | 1           | Total CPU usage in nanoseconds (Linux cgroups CPU accounting)                                       |
 | elasticsearch_os_cpu_percent                                         | gauge      | 1           | Percent CPU used by the OS                                                                          |
 | elasticsearch_os_load1                                               | gauge      | 1           | Shortterm load average                                                                              |
 | elasticsearch_os_load5                                               | gauge      | 1           | Midterm load average                                                                                |
diff --git a/collector/nodes.go b/collector/nodes.go
index 527870f9..9f33b2de 100644
--- a/collector/nodes.go
+++ b/collector/nodes.go
@@ -286,6 +286,66 @@ func NewNodes(logger *slog.Logger, client *http.Client, url *url.URL, all bool,
 				},
 				Labels: defaultNodeLabelValues,
 			},
+			{
+				Type: prometheus.GaugeValue,
+				Desc: prometheus.NewDesc(
+					prometheus.BuildFQName(namespace, "os", "cgroup_cpu_cfs_period_micros"),
+					"CPU period length in microseconds (Linux CFS bandwidth control)",
+					defaultNodeLabels, nil,
+				),
+				Value: func(node NodeStatsNodeResponse) float64 {
+					return float64(node.OS.Cgroup.CPU.CfsPeriodMicros)
+				},
+				Labels: defaultNodeLabelValues,
+			},
+			{
+				Type: prometheus.GaugeValue,
+				Desc: prometheus.NewDesc(
+					prometheus.BuildFQName(namespace, "os", "cgroup_cpu_cfs_quota_micros"),
+					"CPU quota per CPU period (cgroup_cfs_period_micros) in microseconds (Linux CFS bandwidth control)",
+					defaultNodeLabels, nil,
+				),
+				Value: func(node NodeStatsNodeResponse) float64 {
+					return float64(node.OS.Cgroup.CPU.CfsQuotaMicros)
+				},
+				Labels: defaultNodeLabelValues,
+			},
+			{
+				Type: prometheus.CounterValue,
+				Desc: prometheus.NewDesc(
+					prometheus.BuildFQName(namespace, "os", "cgroup_cpu_stat_number_of_times_throttled"),
+					"Number of times the process has been throttled (Linux CFS bandwidth control)",
+					defaultNodeLabels, nil,
+				),
+				Value: func(node NodeStatsNodeResponse) float64 {
+					return float64(node.OS.Cgroup.CPU.Stat.NumberOfTimesThrottled)
+				},
+				Labels: defaultNodeLabelValues,
+			},
+			{
+				Type: prometheus.CounterValue,
+				Desc: prometheus.NewDesc(
+					prometheus.BuildFQName(namespace, "os", "cgroup_cpu_stat_time_throttled_nanos"),
+					"Total time duration (in nanoseconds) for which the process has been throttled (Linux CFS bandwidth control)",
+					defaultNodeLabels, nil,
+				),
+				Value: func(node NodeStatsNodeResponse) float64 {
+					return float64(node.OS.Cgroup.CPU.Stat.TimeThrottledNanos)
+				},
+				Labels: defaultNodeLabelValues,
+			},
+			{
+				Type: prometheus.CounterValue,
+				Desc: prometheus.NewDesc(
+					prometheus.BuildFQName(namespace, "os", "cgroup_cpuacct_usage_nanos"),
+					"Total CPU usage in nanoseconds (Linux cgroups CPU accounting)",
+					defaultNodeLabels, nil,
+				),
+				Value: func(node NodeStatsNodeResponse) float64 {
+					return float64(node.OS.Cgroup.CPUAcct.UsageNanos)
+				},
+				Labels: defaultNodeLabelValues,
+			},
 			{
 				Type: prometheus.GaugeValue,
 				Desc: prometheus.NewDesc(
diff --git a/collector/nodes_response.go b/collector/nodes_response.go
index 6ba2ad7b..f73e7af9 100644
--- a/collector/nodes_response.go
+++ b/collector/nodes_response.go
@@ -13,7 +13,9 @@
 
 package collector
 
-import "encoding/json"
+import (
+	"encoding/json"
+)
 
 // nodeStatsResponse is a representation of an Elasticsearch Node Stats
 type nodeStatsResponse struct {
@@ -289,10 +291,11 @@ type NodeStatsOSResponse struct {
 	Uptime    int64 `json:"uptime_in_millis"`
 	// LoadAvg was an array of per-cpu values pre-2.0, and is a string in 2.0
 	// Leaving this here in case we want to implement parsing logic later
-	LoadAvg json.RawMessage         `json:"load_average"`
-	CPU     NodeStatsOSCPUResponse  `json:"cpu"`
-	Mem     NodeStatsOSMemResponse  `json:"mem"`
-	Swap    NodeStatsOSSwapResponse `json:"swap"`
+	LoadAvg json.RawMessage           `json:"load_average"`
+	Cgroup  NodeStatsOSCgroupResponse `json:"cgroup"`
+	CPU     NodeStatsOSCPUResponse    `json:"cpu"`
+	Mem     NodeStatsOSMemResponse    `json:"mem"`
+	Swap    NodeStatsOSSwapResponse   `json:"swap"`
 }
 
 // NodeStatsOSMemResponse defines node stats operating system memory usage structure
@@ -322,6 +325,30 @@ type NodeStatsOSCPULoadResponse struct {
 	Load15 float64 `json:"15m"`
 }
 
+// NodeStatsOSCgroupResponse defines statistics related to Linux control groups (currently only CPU-related)
+type NodeStatsOSCgroupResponse struct {
+	CPU     NodeStatsOSCgroupCPUResponse     `json:"cpu"`
+	CPUAcct NodeStatsOCCgroupCPUAcctResponse `json:"cpuacct"`
+}
+
+// NodeStatsOSCgroupCPUResponse represents the current CPU quota (quota value and the corresponding period), as well as the related CPU throttling stats (Linux CFS bandwidth control)
+type NodeStatsOSCgroupCPUResponse struct {
+	CfsPeriodMicros int64                             `json:"cfs_period_micros"`
+	CfsQuotaMicros  int64                             `json:"cfs_quota_micros"`
+	Stat            NodeStatsOSCgroupCPUStatsResponse `json:"stat"`
+}
+
+// NodeStatsOSCgroupCPUStatsResponse represents the CPU throttling stats (Linux CFS bandwidth control)
+type NodeStatsOSCgroupCPUStatsResponse struct {
+	NumberOfTimesThrottled int64 `json:"number_of_times_throttled"`
+	TimeThrottledNanos     int64 `json:"time_throttled_nanos"`
+}
+
+// NodeStatsOCCgroupCPUAcctResponse represents the Linux control groups CPU accounting stats
+type NodeStatsOCCgroupCPUAcctResponse struct {
+	UsageNanos int64 `json:"usage_nanos"`
+}
+
 // NodeStatsProcessResponse is a representation of a process statistics, memory consumption, cpu usage, open file descriptors
 type NodeStatsProcessResponse struct {
 	Timestamp int64                       `json:"timestamp"`
diff --git a/collector/nodes_test.go b/collector/nodes_test.go
index 9e731837..d6fe1e68 100644
--- a/collector/nodes_test.go
+++ b/collector/nodes_test.go
@@ -352,6 +352,21 @@ func TestNodesStats(t *testing.T) {
             elasticsearch_nodes_roles{cluster="elasticsearch",host="127.0.0.1",name="bVrN1Hx",role="ml"} 0
             elasticsearch_nodes_roles{cluster="elasticsearch",host="127.0.0.1",name="bVrN1Hx",role="remote_cluster_client"} 0
             elasticsearch_nodes_roles{cluster="elasticsearch",host="127.0.0.1",name="bVrN1Hx",role="transform"} 0
+            # HELP elasticsearch_os_cgroup_cpu_cfs_period_micros CPU period length in microseconds (Linux CFS bandwidth control)
+            # TYPE elasticsearch_os_cgroup_cpu_cfs_period_micros gauge
+            elasticsearch_os_cgroup_cpu_cfs_period_micros{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="127.0.0.1",name="bVrN1Hx"} 0
+            # HELP elasticsearch_os_cgroup_cpu_cfs_quota_micros CPU quota per CPU period (cgroup_cfs_period_micros) in microseconds (Linux CFS bandwidth control)
+            # TYPE elasticsearch_os_cgroup_cpu_cfs_quota_micros gauge
+            elasticsearch_os_cgroup_cpu_cfs_quota_micros{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="127.0.0.1",name="bVrN1Hx"} 0
+            # HELP elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled Number of times the process has been throttled (Linux CFS bandwidth control)
+            # TYPE elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled counter
+            elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="127.0.0.1",name="bVrN1Hx"} 0
+            # HELP elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos Total time duration (in nanoseconds) for which the process has been throttled (Linux CFS bandwidth control)
+            # TYPE elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos counter
+            elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="127.0.0.1",name="bVrN1Hx"} 0
+            # HELP elasticsearch_os_cgroup_cpuacct_usage_nanos Total CPU usage in nanoseconds (Linux cgroups CPU accounting)
+            # TYPE elasticsearch_os_cgroup_cpuacct_usage_nanos counter
+            elasticsearch_os_cgroup_cpuacct_usage_nanos{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="127.0.0.1",name="bVrN1Hx"} 0
             # HELP elasticsearch_os_cpu_percent Percent CPU used by OS
             # TYPE elasticsearch_os_cpu_percent gauge
             elasticsearch_os_cpu_percent{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="127.0.0.1",name="bVrN1Hx"} 23
@@ -811,6 +826,21 @@ func TestNodesStats(t *testing.T) {
              elasticsearch_nodes_roles{cluster="elasticsearch",host="172.17.0.2",name="9_P7yui",role="ml"} 0
              elasticsearch_nodes_roles{cluster="elasticsearch",host="172.17.0.2",name="9_P7yui",role="remote_cluster_client"} 0
              elasticsearch_nodes_roles{cluster="elasticsearch",host="172.17.0.2",name="9_P7yui",role="transform"} 0
+             # HELP elasticsearch_os_cgroup_cpu_cfs_period_micros CPU period length in microseconds (Linux CFS bandwidth control)
+             # TYPE elasticsearch_os_cgroup_cpu_cfs_period_micros gauge
+             elasticsearch_os_cgroup_cpu_cfs_period_micros{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="9_P7yui"} 100000
+             # HELP elasticsearch_os_cgroup_cpu_cfs_quota_micros CPU quota per CPU period (cgroup_cfs_period_micros) in microseconds (Linux CFS bandwidth control)
+             # TYPE elasticsearch_os_cgroup_cpu_cfs_quota_micros gauge
+             elasticsearch_os_cgroup_cpu_cfs_quota_micros{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="9_P7yui"} -1
+             # HELP elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled Number of times the process has been throttled (Linux CFS bandwidth control)
+             # TYPE elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled counter
+             elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="9_P7yui"} 0
+             # HELP elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos Total time duration (in nanoseconds) for which the process has been throttled (Linux CFS bandwidth control)
+             # TYPE elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos counter
+             elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="9_P7yui"} 0
+             # HELP elasticsearch_os_cgroup_cpuacct_usage_nanos Total CPU usage in nanoseconds (Linux cgroups CPU accounting)
+             # TYPE elasticsearch_os_cgroup_cpuacct_usage_nanos counter
+             elasticsearch_os_cgroup_cpuacct_usage_nanos{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="9_P7yui"} 3.3206615382e+10
              # HELP elasticsearch_os_cpu_percent Percent CPU used by OS
              # TYPE elasticsearch_os_cpu_percent gauge
              elasticsearch_os_cpu_percent{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="9_P7yui"} 30
@@ -1334,6 +1364,21 @@ func TestNodesStats(t *testing.T) {
              elasticsearch_nodes_roles{cluster="elasticsearch",host="172.17.0.2",name="aaf5a8a0bceb",role="ml"} 1
              elasticsearch_nodes_roles{cluster="elasticsearch",host="172.17.0.2",name="aaf5a8a0bceb",role="remote_cluster_client"} 1
              elasticsearch_nodes_roles{cluster="elasticsearch",host="172.17.0.2",name="aaf5a8a0bceb",role="transform"} 1
+             # HELP elasticsearch_os_cgroup_cpu_cfs_period_micros CPU period length in microseconds (Linux CFS bandwidth control)
+             # TYPE elasticsearch_os_cgroup_cpu_cfs_period_micros gauge
+             elasticsearch_os_cgroup_cpu_cfs_period_micros{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="aaf5a8a0bceb"} 100000
+             # HELP elasticsearch_os_cgroup_cpu_cfs_quota_micros CPU quota per CPU period (cgroup_cfs_period_micros) in microseconds (Linux CFS bandwidth control)
+             # TYPE elasticsearch_os_cgroup_cpu_cfs_quota_micros gauge
+             elasticsearch_os_cgroup_cpu_cfs_quota_micros{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="aaf5a8a0bceb"} -1
+             # HELP elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled Number of times the process has been throttled (Linux CFS bandwidth control)
+             # TYPE elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled counter
+             elasticsearch_os_cgroup_cpu_stat_number_of_times_throttled{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="aaf5a8a0bceb"} 0
+             # HELP elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos Total time duration (in nanoseconds) for which the process has been throttled (Linux CFS bandwidth control)
+             # TYPE elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos counter
+             elasticsearch_os_cgroup_cpu_stat_time_throttled_nanos{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="aaf5a8a0bceb"} 0
+             # HELP elasticsearch_os_cgroup_cpuacct_usage_nanos Total CPU usage in nanoseconds (Linux cgroups CPU accounting)
+             # TYPE elasticsearch_os_cgroup_cpuacct_usage_nanos counter
+             elasticsearch_os_cgroup_cpuacct_usage_nanos{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="aaf5a8a0bceb"} 5.2445263941e+10
              # HELP elasticsearch_os_cpu_percent Percent CPU used by OS
              # TYPE elasticsearch_os_cpu_percent gauge
              elasticsearch_os_cpu_percent{cluster="elasticsearch",es_client_node="true",es_data_node="true",es_ingest_node="true",es_master_node="true",host="172.17.0.2",name="aaf5a8a0bceb"} 37