Skip to content

Commit c668797

Browse files
committed
enable indefinite duration for telemetry collection
1 parent 5ca30df commit c668797

File tree

4 files changed

+122
-53
lines changed

4 files changed

+122
-53
lines changed

cmd/telemetry/telemetry.go

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ func getFlagGroups() []common.FlagGroup {
163163
},
164164
{
165165
Name: flagDurationName,
166-
Help: "number of seconds to run the collection. If 0, the collection will run indefinitely. Ctrl-C to stop.",
166+
Help: "number of seconds to run the collection. If 0, the collection will run indefinitely. Ctrl+c to stop.",
167167
},
168168
{
169169
Name: flagIntervalName,
@@ -220,8 +220,17 @@ func validateFlags(cmd *cobra.Command, args []string) error {
220220
return err
221221
}
222222
}
223-
if flagDuration <= 0 {
224-
err := fmt.Errorf("duration must be greater than 0")
223+
if flagDuration < 0 {
224+
err := fmt.Errorf("duration must be 0 or greater")
225+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
226+
return err
227+
}
228+
target, err := cmd.Flags().GetString("target")
229+
if err != nil {
230+
panic("failed to get target flag")
231+
}
232+
if flagDuration == 0 && target != "" {
233+
err := fmt.Errorf("duration must be greater than 0 when collecting from a remote target")
225234
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
226235
return err
227236
}

internal/common/common.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,10 @@ func collectOnTarget(cmd *cobra.Command, duration int, myTarget target.Target, s
403403
}
404404
// run the scripts on the target
405405
status := "collecting data"
406-
if duration > 0 {
407-
status = fmt.Sprintf("%s, duration=%ds", status, duration)
406+
if cmd.Name() == "telemetry" && duration == 0 { // telemetry is the only command that uses this common code that can run indefinitely
407+
status += ", press Ctrl+c to stop"
408+
} else if duration != 0 {
409+
status += fmt.Sprintf(" for %d seconds", duration)
408410
}
409411
_ = statusUpdate(myTarget.GetName(), status)
410412
scriptOutputs, err := script.RunScripts(myTarget, scriptsToRun, true, localTempDir)

internal/script/script.go

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -283,11 +283,41 @@ func formMasterScript(myTarget target.Target, parallelScripts []ScriptDefinition
283283
// in a variable named after the script
284284
var masterScript strings.Builder
285285
targetTempDirectory := myTarget.GetTempDirectory()
286+
287+
masterScript.WriteString("#!/bin/bash\n")
288+
289+
// set dir var and change working directory to dir in case any of the scripts write out temporary files
286290
masterScript.WriteString(fmt.Sprintf("script_dir=%s\n", targetTempDirectory))
287-
// change working directory to target temporary directory in case any of the scripts write out temporary files
288291
masterScript.WriteString(fmt.Sprintf("cd %s\n", targetTempDirectory))
289-
// the master script will run all parallel scripts in the background
290-
masterScript.WriteString("\n# run all scripts in the background\n")
292+
293+
// function to print the output of each script
294+
masterScript.WriteString("\nprint_output() {\n")
295+
for _, script := range parallelScripts {
296+
masterScript.WriteString("\techo \"<---------------------->\"\n")
297+
masterScript.WriteString(fmt.Sprintf("\techo SCRIPT NAME: %s\n", script.Name))
298+
masterScript.WriteString(fmt.Sprintf("\techo STDOUT:\n\tcat %s\n", path.Join("$script_dir", sanitizeScriptName(script.Name)+".stdout")))
299+
masterScript.WriteString(fmt.Sprintf("\techo STDERR:\n\tcat %s\n", path.Join("$script_dir", sanitizeScriptName(script.Name)+".stderr")))
300+
masterScript.WriteString(fmt.Sprintf("\techo EXIT CODE: $%s_exitcode\n", sanitizeScriptName(script.Name)))
301+
}
302+
masterScript.WriteString("}\n")
303+
304+
// function to handle SIGINT
305+
masterScript.WriteString("\nhandle_sigint() {\n")
306+
for _, script := range parallelScripts {
307+
masterScript.WriteString(fmt.Sprintf("\tkill -SIGINT $%s_pid\n", sanitizeScriptName(script.Name)))
308+
if script.NeedsKill {
309+
masterScript.WriteString(fmt.Sprintf("\tkill -SIGKILL $(cat %s_cmd.pid)\n", sanitizeScriptName(script.Name)))
310+
}
311+
}
312+
masterScript.WriteString("\tprint_output\n")
313+
masterScript.WriteString("\texit 0\n")
314+
masterScript.WriteString("}\n")
315+
316+
// call handle_sigint func when SIGINT is received
317+
masterScript.WriteString("\ntrap handle_sigint SIGINT\n")
318+
319+
// run all parallel scripts in the background
320+
masterScript.WriteString("\n")
291321
needsElevatedPrivileges := false
292322
for _, script := range parallelScripts {
293323
if script.Superuser {
@@ -302,21 +332,15 @@ func formMasterScript(myTarget target.Target, parallelScripts []ScriptDefinition
302332
)
303333
masterScript.WriteString(fmt.Sprintf("%s_pid=$!\n", sanitizeScriptName(script.Name)))
304334
}
305-
// the master script will wait for all parallel scripts to finish
306-
masterScript.WriteString("\n# wait for all scripts to finish\n")
335+
336+
// wait for all parallel scripts to finish then print their output
337+
masterScript.WriteString("\n")
307338
for _, script := range parallelScripts {
308339
masterScript.WriteString(fmt.Sprintf("wait \"$%s_pid\"\n", sanitizeScriptName(script.Name)))
309340
masterScript.WriteString(fmt.Sprintf("%s_exitcode=$?\n", sanitizeScriptName(script.Name)))
310341
}
311-
// the master script will print the output of each script
312-
masterScript.WriteString("\n# print output of each script\n")
313-
for _, script := range parallelScripts {
314-
masterScript.WriteString("echo \"<---------------------->\"\n")
315-
masterScript.WriteString(fmt.Sprintf("echo SCRIPT NAME: %s\n", script.Name))
316-
masterScript.WriteString(fmt.Sprintf("echo STDOUT:\ncat %s\n", path.Join("$script_dir", sanitizeScriptName(script.Name)+".stdout")))
317-
masterScript.WriteString(fmt.Sprintf("echo STDERR:\ncat %s\n", path.Join("$script_dir", sanitizeScriptName(script.Name)+".stderr")))
318-
masterScript.WriteString(fmt.Sprintf("echo EXIT CODE: $%s_exitcode\n", sanitizeScriptName(script.Name)))
319-
}
342+
masterScript.WriteString("\nprint_output\n")
343+
320344
return masterScript.String(), needsElevatedPrivileges
321345
}
322346

@@ -421,6 +445,8 @@ func prepareTargetToRunScripts(myTarget target.Target, scripts []ScriptDefinitio
421445
dependenciesToCopy[path.Join(targetArchitecture, dependency)] = 1
422446
}
423447
}
448+
// replace any placeholders in the script with the actual values
449+
script.Script = strings.ReplaceAll(script.Script, "{cmd_pid}", sanitizeScriptName(script.Name)+"_cmd.pid")
424450
// add user's path to script
425451
scriptWithPath := fmt.Sprintf("export PATH=\"%s\"\n%s", userPath, script.Script)
426452
if script.Name == "" {

internal/script/script_defs.go

Lines changed: 66 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ type ScriptDefinition struct {
2121
Depends []string // binary dependencies that must be available for the script to run
2222
Superuser bool // requires sudo or root
2323
Sequential bool // run script sequentially (not at the same time as others)
24+
NeedsKill bool // process/script needs to be killed after run without a duration specified, i.e., it doesn't stop through SIGINT
2425
}
2526

2627
const (
@@ -902,11 +903,17 @@ rm -rf $test_dir`, params.StorageDir)
902903
countInt := params.Duration / params.Interval
903904
count = strconv.Itoa(countInt)
904905
}
905-
return fmt.Sprintf(`mpstat -u -T -I SCPU -P ALL %d %s`, params.Interval, count)
906+
script := fmt.Sprintf(`mpstat -u -T -I SCPU -P ALL %d %s`, params.Interval, count)
907+
script += " &" // run it in the background
908+
script += "\necho $! > {cmd_pid}\n" // this is used to kill the command
909+
script += "wait\n" // wait for the command to finish
910+
return script
911+
906912
}(),
907913
Superuser: true,
908914
Lkms: []string{},
909915
Depends: []string{"mpstat"},
916+
NeedsKill: true,
910917
},
911918
{
912919
Name: IostatScriptName,
@@ -916,11 +923,16 @@ rm -rf $test_dir`, params.StorageDir)
916923
countInt := params.Duration / params.Interval
917924
count = strconv.Itoa(countInt)
918925
}
919-
return fmt.Sprintf(`S_TIME_FORMAT=ISO iostat -d -t %d %s | sed '/^loop/d'`, params.Interval, count)
926+
script := fmt.Sprintf(`S_TIME_FORMAT=ISO iostat -d -t %d %s | sed '/^loop/d'`, params.Interval, count)
927+
script += " &" // run it in the background
928+
script += "\necho $! > {cmd_pid}\n" // this is used to kill the command
929+
script += "wait\n" // wait for the command to finish
930+
return script
920931
}(),
921932
Superuser: true,
922933
Lkms: []string{},
923934
Depends: []string{"iostat"},
935+
NeedsKill: true,
924936
},
925937
{
926938
Name: SarMemoryScriptName,
@@ -930,11 +942,16 @@ rm -rf $test_dir`, params.StorageDir)
930942
countInt := params.Duration / params.Interval
931943
count = strconv.Itoa(countInt)
932944
}
933-
return fmt.Sprintf(`sar -r %d %s`, params.Interval, count)
945+
script := fmt.Sprintf(`sar -r %d %s`, params.Interval, count)
946+
script += " &" // run it in the background
947+
script += "\necho $! > {cmd_pid}\n" // this is used to kill the command
948+
script += "wait\n" // wait for the command to finish
949+
return script
934950
}(),
935951
Superuser: true,
936952
Lkms: []string{},
937953
Depends: []string{"sar", "sadc"},
954+
NeedsKill: true,
938955
},
939956
{
940957
Name: SarNetworkScriptName,
@@ -944,11 +961,16 @@ rm -rf $test_dir`, params.StorageDir)
944961
countInt := params.Duration / params.Interval
945962
count = strconv.Itoa(countInt)
946963
}
947-
return fmt.Sprintf(`sar -n DEV %d %s`, params.Interval, count)
964+
script := fmt.Sprintf(`sar -n DEV %d %s`, params.Interval, count)
965+
script += " &" // run it in the background
966+
script += "\necho $! > {cmd_pid}\n" // this is used to kill the command
967+
script += "wait\n" // wait for the command to finish
968+
return script
948969
}(),
949970
Superuser: true,
950971
Lkms: []string{},
951972
Depends: []string{"sar", "sadc"},
973+
NeedsKill: true,
952974
},
953975
{
954976
Name: TurbostatScriptName,
@@ -958,11 +980,50 @@ rm -rf $test_dir`, params.StorageDir)
958980
countInt := params.Duration / params.Interval
959981
count = "-n " + strconv.Itoa(countInt)
960982
}
961-
return fmt.Sprintf(`turbostat -S -s PkgWatt,RAMWatt -q -i %d %s`, params.Interval, count) + ` | awk '{ print strftime("%H:%M:%S"), $0 }'`
983+
script := fmt.Sprintf(`turbostat -S -s PkgWatt,RAMWatt -q -i %d %s`, params.Interval, count) + ` | awk '{ print strftime("%H:%M:%S"), $0 }'`
984+
script += " &" // run it in the background
985+
script += "\necho $! > {cmd_pid}\n" // this is used to kill the command
986+
script += "wait\n" // wait for the command to finish
987+
return script
962988
}(),
963989
Superuser: true,
964990
Lkms: []string{"msr"},
965991
Depends: []string{"turbostat"},
992+
NeedsKill: true,
993+
},
994+
{
995+
Name: InstructionMixScriptName,
996+
Script: func() string {
997+
script := fmt.Sprintf("echo TIME: $(date +\"%%H:%%M:%%S\")\necho INTERVAL: %d\n", params.Interval)
998+
commandParts := []string{
999+
"processwatch -c",
1000+
}
1001+
// if no PID specified, increase the sampling interval (defaults to 100,000) to reduce overhead
1002+
if params.PID == 0 {
1003+
commandParts = append(commandParts, fmt.Sprintf("-s %d", 1000000))
1004+
} else {
1005+
commandParts = append(commandParts, fmt.Sprintf("-p %d", params.PID))
1006+
}
1007+
for _, cat := range params.Filter {
1008+
commandParts = append(commandParts, fmt.Sprintf("-f %s", cat))
1009+
}
1010+
if params.Duration != 0 && params.Interval != 0 {
1011+
count := params.Duration / params.Interval
1012+
commandParts = append(commandParts, fmt.Sprintf("-n %d", count))
1013+
}
1014+
if params.Interval != 0 {
1015+
commandParts = append(commandParts, fmt.Sprintf("-i %d", params.Interval))
1016+
}
1017+
script += strings.Join(commandParts, " ")
1018+
script += " &" // run it in the background
1019+
script += "\necho $! > {cmd_pid}\n" // this is used to kill the command
1020+
script += "wait\n" // wait for the command to finish
1021+
return script
1022+
}(),
1023+
Superuser: true,
1024+
Lkms: []string{"msr"},
1025+
Depends: []string{"processwatch"},
1026+
NeedsKill: true,
9661027
},
9671028

9681029
// flamegraph scripts
@@ -1098,35 +1159,6 @@ fi
10981159
Superuser: true,
10991160
Depends: []string{"perf"},
11001161
},
1101-
{
1102-
Name: InstructionMixScriptName,
1103-
Script: func() string {
1104-
script := fmt.Sprintf("echo TIME: $(date +\"%%H:%%M:%%S\")\necho INTERVAL: %d\n", params.Interval)
1105-
scriptParts := []string{
1106-
"processwatch -c",
1107-
}
1108-
// if no PID specified, increase the sampling interval (defaults to 100,000) to reduce overhead
1109-
if params.PID == 0 {
1110-
scriptParts = append(scriptParts, fmt.Sprintf("-s %d", 1000000))
1111-
} else {
1112-
scriptParts = append(scriptParts, fmt.Sprintf("-p %d", params.PID))
1113-
}
1114-
for _, cat := range params.Filter {
1115-
scriptParts = append(scriptParts, fmt.Sprintf("-f %s", cat))
1116-
}
1117-
if params.Duration != 0 && params.Interval != 0 {
1118-
count := params.Duration / params.Interval
1119-
scriptParts = append(scriptParts, fmt.Sprintf("-n %d", count))
1120-
}
1121-
if params.Interval != 0 {
1122-
scriptParts = append(scriptParts, fmt.Sprintf("-i %d", params.Interval))
1123-
}
1124-
return script + strings.Join(scriptParts, " ")
1125-
}(),
1126-
Superuser: true,
1127-
Lkms: []string{"msr"},
1128-
Depends: []string{"processwatch"},
1129-
},
11301162
}
11311163

11321164
// validate script definitions

0 commit comments

Comments
 (0)