Skip to content

Commit 240133d

Browse files
authored
Add support for kernel lock analysis (#114)
* Add support for kernel lock analysis * Fix code format issue by make format * Fix staticcheck issue * address the reviewer's comment, except for the html output * Add html render * Modify README for perf lock
1 parent 70d79cd commit 240133d

File tree

7 files changed

+241
-1
lines changed

7 files changed

+241
-1
lines changed

README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ Svr-info functionality is now included in PerfSpect. The svr-info configuration
3939
| ------- | ----------- |
4040
| [`perfspect config`](#config-command) | Modify system configuration |
4141
| [`perfspect flame`](#flame-command) | Generate flamegraphs |
42+
| [`perfspect lock`](#lock-command) | Collect system wide hotspot, c2c and lock contention information |
4243
| [`perfspect metrics`](#metrics-command) | Monitor core and uncore metrics |
4344
| [`perfspect report`](#report-command) | Generate configuration report |
4445
| [`perfspect telemetry`](#telemetry-command) | Collect system telemetry |
@@ -55,6 +56,10 @@ $ ./perfspect config --cores 24 --llc 2.0 --uncoremaxfreq 1.8
5556
```
5657
#### Flame Command
5758
Software flamegraphs are useful in diagnosing software performance bottlenecks. Run `perfspect flame -h` to capture a system-wide software flamegraph.
59+
60+
#### Lock Command
61+
As system contains more and more cores, it would be usuful to be able to analyze the kernel lock overhead and potential false-sharing that impact the overall system scalability. Run `perfspect lock -h` to collect system wide hotspot, c2c and lock contention information, that will be helpful for experienced performance engineers to have a chance to look into the related problems.
62+
5863
#### Metrics Command
5964
The `metrics` command provides system performance characterization metrics. The metrics provided are dependent on the platform architecture.
6065

@@ -186,4 +191,4 @@ $ ./perfspect report --benchmark speed,memory --targets targets.yaml
186191
`builder/build.sh` builds the dependencies and the app in Docker containers that provide the required build environments. Assumes you have Docker installed on your development system.
187192

188193
### Subsequent Builds
189-
`make` builds the app. Assumes the dependencies have been built previously and that you have Go installed on your development system.
194+
`make` builds the app. Assumes the dependencies have been built previously and that you have Go installed on your development system.

cmd/lock/lock.go

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// Package lock is a subcommand of the root command. It is used to collect kernel lock related perf information from target(s).
2+
package lock
3+
4+
// Copyright (C) 2021-2024 Intel Corporation
5+
// SPDX-License-Identifier: BSD-3-Clause
6+
7+
import (
8+
"fmt"
9+
"os"
10+
"perfspect/internal/common"
11+
"perfspect/internal/report"
12+
"strings"
13+
14+
"github.com/spf13/cobra"
15+
"github.com/spf13/pflag"
16+
)
17+
18+
const cmdName = "lock"
19+
20+
var examples = []string{
21+
fmt.Sprintf(" Lock inspect from local host: $ %s %s", common.AppName, cmdName),
22+
fmt.Sprintf(" Lock inspect from remote target: $ %s %s --target 192.168.1.1 --user fred --key fred_key", common.AppName, cmdName),
23+
fmt.Sprintf(" Lock inspect from multiple targets: $ %s %s --targets targets.yaml", common.AppName, cmdName),
24+
}
25+
26+
var Cmd = &cobra.Command{
27+
Use: cmdName,
28+
Short: "Collect system information for kernel lock analysis from target(s)",
29+
Long: "",
30+
Example: strings.Join(examples, "\n"),
31+
RunE: runCmd,
32+
PreRunE: validateFlags,
33+
GroupID: "primary",
34+
Args: cobra.NoArgs,
35+
SilenceErrors: true,
36+
}
37+
38+
var (
39+
flagDuration int
40+
flagFrequency int
41+
)
42+
43+
const (
44+
flagDurationName = "duration"
45+
flagFrequencyName = "frequency"
46+
)
47+
48+
func init() {
49+
Cmd.Flags().StringVar(&common.FlagInput, common.FlagInputName, "", "")
50+
Cmd.Flags().StringSliceVar(&common.FlagFormat, common.FlagFormatName, []string{report.FormatHtml}, "")
51+
Cmd.Flags().IntVar(&flagDuration, flagDurationName, 10, "")
52+
Cmd.Flags().IntVar(&flagFrequency, flagFrequencyName, 11, "")
53+
54+
common.AddTargetFlags(Cmd)
55+
56+
Cmd.SetUsageFunc(usageFunc)
57+
}
58+
59+
func usageFunc(cmd *cobra.Command) error {
60+
cmd.Printf("Usage: %s [flags]\n\n", cmd.CommandPath())
61+
cmd.Printf("Examples:\n%s\n\n", cmd.Example)
62+
cmd.Println("Flags:")
63+
for _, group := range getFlagGroups() {
64+
cmd.Printf(" %s:\n", group.GroupName)
65+
for _, flag := range group.Flags {
66+
flagDefault := ""
67+
if cmd.Flags().Lookup(flag.Name).DefValue != "" {
68+
flagDefault = fmt.Sprintf(" (default: %s)", cmd.Flags().Lookup(flag.Name).DefValue)
69+
}
70+
cmd.Printf(" --%-20s %s%s\n", flag.Name, flag.Help, flagDefault)
71+
}
72+
}
73+
cmd.Println("\nGlobal Flags:")
74+
cmd.Parent().PersistentFlags().VisitAll(func(pf *pflag.Flag) {
75+
flagDefault := ""
76+
if cmd.Parent().PersistentFlags().Lookup(pf.Name).DefValue != "" {
77+
flagDefault = fmt.Sprintf(" (default: %s)", cmd.Flags().Lookup(pf.Name).DefValue)
78+
}
79+
cmd.Printf(" --%-20s %s%s\n", pf.Name, pf.Usage, flagDefault)
80+
})
81+
return nil
82+
}
83+
84+
func getFlagGroups() []common.FlagGroup {
85+
var groups []common.FlagGroup
86+
flags := []common.Flag{
87+
{
88+
Name: flagDurationName,
89+
Help: "number of seconds to run the collection",
90+
},
91+
{
92+
Name: flagFrequencyName,
93+
Help: "number of samples taken per second",
94+
},
95+
}
96+
groups = append(groups, common.FlagGroup{
97+
GroupName: "Options",
98+
Flags: flags,
99+
})
100+
groups = append(groups, common.GetTargetFlagGroup())
101+
102+
return groups
103+
}
104+
105+
func validateFlags(cmd *cobra.Command, args []string) error {
106+
if flagDuration <= 0 {
107+
err := fmt.Errorf("duration must be greater than 0")
108+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
109+
return err
110+
}
111+
return nil
112+
}
113+
114+
func runCmd(cmd *cobra.Command, args []string) error {
115+
reportingCommand := common.ReportingCommand{
116+
Cmd: cmd,
117+
ReportNamePost: "lock",
118+
Frequency: flagFrequency,
119+
Duration: flagDuration,
120+
TableNames: []string{report.KernelLockAnalysisTableName},
121+
}
122+
return reportingCommand.Run()
123+
}

cmd/root.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"perfspect/cmd/config"
2424
"perfspect/cmd/flame"
25+
"perfspect/cmd/lock"
2526
"perfspect/cmd/metrics"
2627
"perfspect/cmd/report"
2728
"perfspect/cmd/telemetry"
@@ -111,6 +112,7 @@ Additional help topics:{{range .Commands}}{{if .IsAdditionalHelpTopicCommand}}
111112
rootCmd.AddCommand(metrics.Cmd)
112113
rootCmd.AddCommand(telemetry.Cmd)
113114
rootCmd.AddCommand(flame.Cmd)
115+
rootCmd.AddCommand(lock.Cmd)
114116
rootCmd.AddCommand(config.Cmd)
115117
if onIntelNetwork() {
116118
rootCmd.AddGroup([]*cobra.Group{{ID: "other", Title: "Other Commands:"}}...)

internal/report/html.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,3 +1041,19 @@ func codePathFrequencyTableHTMLRenderer(tableValues TableValues, targetName stri
10411041
out += renderFlameGraph("Java", tableValues, "Java Paths")
10421042
return out
10431043
}
1044+
1045+
func kernelLockAnalysisHTMLRenderer(tableValues TableValues, targetName string) string {
1046+
values := [][]string{}
1047+
var tableValueStyles [][]string
1048+
for _, field := range tableValues.Fields {
1049+
rowValues := []string{}
1050+
rowValues = append(rowValues, field.Name)
1051+
rowValues = append(rowValues, field.Values[0])
1052+
values = append(values, rowValues)
1053+
rowStyles := []string{}
1054+
rowStyles = append(rowStyles, "font-weight:bold")
1055+
rowStyles = append(rowStyles, "white-space: pre-wrap")
1056+
tableValueStyles = append(tableValueStyles, rowStyles)
1057+
}
1058+
return renderHTMLTable([]string{}, values, "pure-table pure-table-striped", tableValueStyles)
1059+
}

internal/report/table_defs.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ const (
114114
ConfigurationTableName = "Configuration"
115115
// flamegraph table names
116116
CodePathFrequencyTableName = "Code Path Frequency"
117+
// lock table names
118+
KernelLockAnalysisTableName = "Kernel Lock Analysis "
117119
)
118120

119121
const (
@@ -606,6 +608,17 @@ var tableDefinitions = map[string]TableDefinition{
606608
},
607609
FieldsFunc: codePathFrequencyTableValues,
608610
HTMLTableRendererFunc: codePathFrequencyTableHTMLRenderer},
611+
//
612+
// kernel lock analysis tables
613+
//
614+
KernelLockAnalysisTableName: {
615+
Name: KernelLockAnalysisTableName,
616+
ScriptNames: []string{
617+
script.ProfileKernelLockScriptName,
618+
},
619+
FieldsFunc: kernelLockAnalysisTableValues,
620+
HTMLTableRendererFunc: kernelLockAnalysisHTMLRenderer,
621+
},
609622
}
610623

611624
// GetScriptNamesForTable returns the script names required to generate the table with the given name
@@ -1889,3 +1902,14 @@ func codePathFrequencyTableValues(outputs map[string]script.ScriptOutput) []Fiel
18891902
}
18901903
return fields
18911904
}
1905+
1906+
func kernelLockAnalysisTableValues(outputs map[string]script.ScriptOutput) []Field {
1907+
fields := []Field{
1908+
{Name: "Hotspot without Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_hotspot_no_children")}},
1909+
{Name: "Hotspot with Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_hotspot_callgraph")}},
1910+
{Name: "Cache2Cache without Callstack", Values: []string{sectionValueFromOutput(outputs, "perf_c2c_no_children")}},
1911+
{Name: "Cache2Cache with CallStack", Values: []string{sectionValueFromOutput(outputs, "perf_c2c_callgraph")}},
1912+
{Name: "Lock Contention", Values: []string{sectionValueFromOutput(outputs, "perf_lock_contention")}},
1913+
}
1914+
return fields
1915+
}

internal/report/table_helpers.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1741,3 +1741,13 @@ func systemFoldedFromOutput(outputs map[string]script.ScriptOutput) string {
17411741
}
17421742
return folded
17431743
}
1744+
1745+
func sectionValueFromOutput(outputs map[string]script.ScriptOutput, sectionName string) string {
1746+
sections := getSectionsFromOutput(outputs, script.ProfileKernelLockScriptName)
1747+
1748+
value := sections[sectionName]
1749+
if value == "" {
1750+
slog.Warn("No content for section:", slog.String("warning", sectionName))
1751+
}
1752+
return value
1753+
}

internal/script/script_defs.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ const (
8787
PMUBusyScriptName = "pmu busy"
8888
ProfileJavaScriptName = "profile java"
8989
ProfileSystemScriptName = "profile system"
90+
ProfileKernelLockScriptName = "profile kernel lock"
9091
GaudiInfoScriptName = "gaudi info"
9192
GaudiFirmwareScriptName = "gaudi firmware"
9293
GaudiNumaScriptName = "gaudi numa"
@@ -958,6 +959,65 @@ fi
958959
Superuser: true,
959960
Depends: []string{"perf", "stackcollapse-perf.pl"},
960961
},
962+
{
963+
Name: ProfileKernelLockScriptName,
964+
Script: func() string {
965+
return fmt.Sprintf(`# system-wide lock profile collection
966+
# adjust perf_event_paranoid and kptr_restrict
967+
PERF_EVENT_PARANOID=$( cat /proc/sys/kernel/perf_event_paranoid )
968+
echo -1 >/proc/sys/kernel/perf_event_paranoid
969+
KPTR_RESTRICT=$( cat /proc/sys/kernel/kptr_restrict )
970+
echo 0 >/proc/sys/kernel/kptr_restrict
971+
972+
frequency=%d
973+
duration=%d
974+
975+
# collect hotspot
976+
perf record -F $frequency -a -g --call-graph dwarf -W -d --phys-data --sample-cpu -e cycles:pp,instructions:pp,cpu/mem-loads,ldlat=30/P,cpu/mem-stores/P -o perf_hotspot.data -- sleep $duration &
977+
PERF_HOTSPOT_PID=$!
978+
979+
# check the availability perf lock -b option
980+
perf lock contention -a -bv --max-stack 20 2>/dev/null -- sleep 0
981+
PERF_LOCK_CONTENTION_BPF=$?
982+
983+
# collect lock
984+
if [ ${PERF_LOCK_CONTENTION_BPF} -eq 0 ]; then
985+
perf lock contention -a -bv --max-stack 20 2>perf_lock_contention.txt -- sleep $duration &
986+
PERF_LOCK_PID=$!
987+
fi
988+
989+
wait ${PERF_HOTSPOT_PID}
990+
991+
if [ ${PERF_LOCK_CONTENTION_BPF} -eq 0 ]; then
992+
wait ${PERF_LOCK_PID}
993+
fi
994+
995+
# restore perf_event_paranoid and kptr_restrict
996+
echo "$PERF_EVENT_PARANOID" > /proc/sys/kernel/perf_event_paranoid
997+
echo "$KPTR_RESTRICT" > /proc/sys/kernel/kptr_restrict
998+
999+
# collapse perf data
1000+
if [ -f "perf_hotspot.data" ]; then
1001+
echo "########## perf_hotspot_no_children ##########"
1002+
perf report -i perf_hotspot.data --no-children --call-graph none --stdio
1003+
echo "########## perf_hotspot_callgraph ##########"
1004+
perf report -i perf_hotspot.data --stdio
1005+
fi
1006+
if [ -f "perf_hotspot.data" ]; then
1007+
echo "########## perf_c2c_no_children ##########"
1008+
perf c2c report -i perf_hotspot.data --call-graph none --stdio
1009+
echo "########## perf_c2c_callgraph ##########"
1010+
perf c2c report -i perf_hotspot.data --stdio
1011+
fi
1012+
if [ -f "perf_lock_contention.txt" ]; then
1013+
echo "########## perf_lock_contention ##########"
1014+
cat perf_lock_contention.txt
1015+
fi
1016+
`, frequency, duration)
1017+
}(),
1018+
Superuser: true,
1019+
Depends: []string{"perf"},
1020+
},
9611021
}
9621022

9631023
// validate script definitions

0 commit comments

Comments
 (0)