Skip to content

Commit 043964a

Browse files
committed
move gaudi telemetry to hidden feature
Signed-off-by: Harper, Jason M <[email protected]>
1 parent f86adbf commit 043964a

File tree

2 files changed

+19
-15
lines changed

2 files changed

+19
-15
lines changed

cmd/telemetry/telemetry.go

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package telemetry
77
import (
88
"fmt"
99
"log/slog"
10+
"os"
1011
"slices"
1112
"strconv"
1213
"strings"
@@ -60,7 +61,6 @@ var (
6061
flagPower bool
6162
flagTemperature bool
6263
flagInstrMix bool
63-
flagGaudi bool
6464

6565
flagNoSystemSummary bool
6666

@@ -85,7 +85,6 @@ const (
8585
flagPowerName = "power"
8686
flagTemperatureName = "temperature"
8787
flagInstrMixName = "instrmix"
88-
flagGaudiName = "gaudi"
8988

9089
flagNoSystemSummaryName = "no-summary"
9190

@@ -107,7 +106,6 @@ var categories = []common.Category{
107106
{FlagName: flagStorageName, FlagVar: &flagStorage, DefaultValue: false, Help: "monitor storage", TableNames: []string{report.DriveTelemetryTableName}},
108107
{FlagName: flagIRQRateName, FlagVar: &flagIRQRate, DefaultValue: false, Help: "monitor IRQ rate", TableNames: []string{report.IRQRateTelemetryTableName}},
109108
{FlagName: flagInstrMixName, FlagVar: &flagInstrMix, DefaultValue: false, Help: "monitor instruction mix", TableNames: []string{report.InstructionTelemetryTableName}},
110-
{FlagName: flagGaudiName, FlagVar: &flagGaudi, DefaultValue: false, Help: "monitor gaudi", TableNames: []string{report.GaudiTelemetryTableName}},
111109
}
112110

113111
const (
@@ -289,6 +287,12 @@ func runCmd(cmd *cobra.Command, args []string) error {
289287
flagInstrMixFrequency = instrmixFrequencyDefaultPerPID
290288
}
291289
}
290+
// hidden feature - Gaudi telemetry, only enabled when PERFSPECT_GAUDI_HLSMI_PATH is set
291+
gaudiHlsmiPath := os.Getenv("PERFSPECT_GAUDI_HLSMI_PATH") // must be full path to hlsmi binary
292+
if gaudiHlsmiPath != "" {
293+
slog.Info("Gaudi telemetry enabled", slog.String("hlsmi_path", gaudiHlsmiPath))
294+
tableNames = append(tableNames, report.GaudiTelemetryTableName)
295+
}
292296
// hidden feature - PDU telemetry, only enabled when four environment variables are set
293297
// PERFSPECT_PDU_HOST, PERFSPECT_PDU_USER, PERFSPECT_PDU_PASSWORD, PERFSPECT_PDU_OUTLET
294298
// pduHost := os.Getenv("PERFSPECT_PDU_HOST")
@@ -317,6 +321,7 @@ func runCmd(cmd *cobra.Command, args []string) error {
317321
"Duration": strconv.Itoa(flagDuration),
318322
"InstrMixPID": strconv.Itoa(flagInstrMixPid),
319323
"InstrMixFrequency": strconv.Itoa(flagInstrMixFrequency),
324+
"GaudiHlsmiPath": gaudiHlsmiPath,
320325
// "PDUHost": pduHost,
321326
// "PDUUser": pduUser,
322327
// "PDUPassword": pduPassword,

internal/script/script_defs.go

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,19 +1307,18 @@ wait
13071307
GaudiTelemetryScriptName: {
13081308
Name: GaudiTelemetryScriptName,
13091309
ScriptTemplate: `
1310-
# if the hl-smi program is in the path
1311-
if command -v hl-smi &> /dev/null; then
1312-
hl-smi --query-aip=timestamp,name,temperature.aip,module_id,utilization.aip,memory.total,memory.free,memory.used,power.draw --format=csv,nounits -l {{.Interval}} &
1313-
echo $! > {{.ScriptName}}_cmd.pid
1314-
# if duration is set, sleep for the duration then kill the process
1315-
if [ {{.Duration}} -ne 0 ]; then
1316-
sleep {{.Duration}}
1317-
kill -SIGINT $(cat {{.ScriptName}}_cmd.pid)
1318-
fi
1319-
wait
1310+
if command -v {{.GaudiHlsmiPath}} &> /dev/null; then
1311+
{{.GaudiHlsmiPath}} --query-aip=timestamp,name,temperature.aip,module_id,utilization.aip,memory.total,memory.free,memory.used,power.draw --format=csv,nounits -l {{.Interval}} &
1312+
echo $! > {{.ScriptName}}_cmd.pid
1313+
# if duration is set, sleep for the duration then kill the process
1314+
if [ {{.Duration}} -ne 0 ]; then
1315+
sleep {{.Duration}}
1316+
kill -SIGINT $(cat {{.ScriptName}}_cmd.pid)
1317+
fi
1318+
wait
13201319
else
1321-
echo "hl-smi not found in the path" >&2
1322-
exit 1
1320+
echo "hl-smi not found at {{.GaudiHlsmiPath}}" >&2
1321+
exit 1
13231322
fi
13241323
`,
13251324
Superuser: true,

0 commit comments

Comments
 (0)