Skip to content

Commit ceeb9a9

Browse files
authored
kernel utilization metrics on EC2 AL2023 w/ 6.1 kernel (#515)
* kernel utilization metrics on EC2 AL2023 w/ 6.1 kernel Signed-off-by: Harper, Jason M <[email protected]> * fix ICX metric definition Signed-off-by: Harper, Jason M <[email protected]> * icx metric event translation to perf Signed-off-by: Harper, Jason M <[email protected]> * make deterministic expression parsing to replace fixed counter event names with corresponding perf event names Signed-off-by: Harper, Jason M <[email protected]> * cleanup fix counter event name replacement Signed-off-by: Harper, Jason M <[email protected]> --------- Signed-off-by: Harper, Jason M <[email protected]>
1 parent c065003 commit ceeb9a9

File tree

4 files changed

+28
-5
lines changed

4 files changed

+28
-5
lines changed

cmd/metrics/loader_perfmon.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,9 @@ func customizeOCREventNames(metrics []MetricDefinition) []MetricDefinition {
309309
// example formula: "( 1000000000 * (a / b) / (c / (d * socket_count) ) ) * DURATIONTIMEINSECONDS"
310310
// desired output: "( 1000000000 * ([event1] / [event2]) / ([constant1] / ([constant2] * socket_count) ) ) * 1"
311311
func getExpression(perfmonMetric PerfmonMetric) (string, error) {
312+
if perfmonMetric.Formula == "" {
313+
return "", fmt.Errorf("metric '%s' has no formula defined", perfmonMetric.MetricName)
314+
}
312315
expression := perfmonMetric.Formula
313316
replacers := make(map[string]string)
314317
for _, event := range perfmonMetric.Events {
@@ -329,10 +332,18 @@ func getExpression(perfmonMetric PerfmonMetric) (string, error) {
329332
for commonEvent, alias := range commonEventReplacements {
330333
expression = strings.ReplaceAll(expression, commonEvent, alias)
331334
}
332-
// replace fixed counter perfmon event names with their corresponding perf event names
333-
for perfmonEventName, perfEventName := range fixedCounterEventNameTranslation {
334-
// Replace event name as whole words only (not substrings)
335-
expression = util.ReplaceWholeWord(expression, perfmonEventName, perfEventName)
335+
// replace fixed counter perfmon event names with their corresponding perf
336+
// event names found in the fixedCounterEventNameTranslation map
337+
// example: "100 * ([CPU_CLK_UNHALTED.REF_TSC:k] / [TSC])"
338+
// becomes "100 * ([ref-cycles:k] / [TSC])"
339+
expressionVarPattern := regexp.MustCompile(`\[[^\]]+\]`)
340+
for _, match := range expressionVarPattern.FindAllString(expression, -1) {
341+
// strip the brackets
342+
match = strings.Trim(match, "[]")
343+
// check if the match is in the translation map
344+
if perfEventName, ok := fixedCounterEventNameTranslation[match]; ok {
345+
expression = strings.ReplaceAll(expression, match, perfEventName)
346+
}
336347
}
337348
return expression, nil
338349
}

cmd/metrics/loader_perfmon_event_core.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ var fixedCounterEventNameTranslation = map[string]string{
145145
"CPU_CLK_UNHALTED.THREAD_P:SUP": "cpu-cycles:k",
146146
"CPU_CLK_UNHALTED.CORE_P:SUP": "cpu-cycles:k", // srf - thread and core are the same
147147
"CPU_CLK_UNHALTED.REF_TSC": "ref-cycles",
148+
"CPU_CLK_UNHALTED.REF_TSC:SUP": "ref-cycles:k",
148149
"CPU_CLK_UNHALTED.REF_TSC_P:SUP": "ref-cycles:k",
149150
"TOPDOWN.SLOTS:perf_metrics": "topdown.slots",
150151
"PERF_METRICS.BAD_SPECULATION": "topdown-bad-spec",

cmd/metrics/loader_perfmon_group_core.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,17 @@ func (group *CoreGroup) AddEvent(event CoreEvent, reorder bool, metadata Metadat
241241
validCounters += fmt.Sprintf("%d,", i)
242242
}
243243
}
244+
// When the fixed ref cycles counter is not supported, we cannot put CPU_CLK_UNHALTED.REF_TSC (ref-cycles) and
245+
// CPU_CLK_UNHALTED.REF_TSC_P:SUP (ref-cycles:k) in the same group.
246+
// Note: this was discovered through testing on AWS m7i.8xlarge instances with Amazon Linux 2023 w/ kernel
247+
// 6.1. The same platform but with kernel 6.12 supports the fixed ref cycles counter and doesn't have this limitation.
248+
if !metadata.SupportsFixedRefCycles && strings.HasPrefix(event.EventName, "CPU_CLK_UNHALTED.REF_TSC") {
249+
for _, existingEvent := range group.GeneralPurposeCounters {
250+
if strings.HasPrefix(existingEvent.EventName, "CPU_CLK_UNHALTED.REF_TSC") {
251+
return fmt.Errorf("cannot add %s to group as it contains %s and fixed reference cycles are not supported", event.EventName, existingEvent.EventName)
252+
}
253+
}
254+
}
244255
// otherwise, it is a general purpose event, check if we can place it in one of the general purpose counters
245256
for i := range group.GeneralPurposeCounters {
246257
if counter := group.GeneralPurposeCounters[i]; counter.IsEmpty() {

cmd/metrics/resources/perfmon/icx/icx_perfspect_metrics.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"BriefDescription": "CPU utilization percentage in kernel mode",
1010
"Events": [
1111
{
12-
"Name": "CPU_CLK_UNHALTED.REF_TSC_P:SUP",
12+
"Name": "CPU_CLK_UNHALTED.REF_TSC:SUP",
1313
"Alias": "a"
1414
},
1515
{

0 commit comments

Comments
 (0)