Skip to content

Commit a0156a8

Browse files
committed
tools: pfpsyncchk: enhance printed output
remove redundant `knlog.InfoS` and ensure that the final reported PFP status is reflected in the output; as such: ``` ... ***FINAL PFP SYNC STATUSES*** control-plane-1: desynced! control-plane-2: desynced! control-plane-3: desynced! ``` Signed-off-by: Shereen Haj <[email protected]>
1 parent 02d5072 commit a0156a8

File tree

1 file changed

+42
-19
lines changed

1 file changed

+42
-19
lines changed

tools/pfpsyncchk/pfpsyncchk.go

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -163,28 +163,39 @@ func getDifference(refinedSchedStatuses map[string]record.RecordedStatus, refine
163163
}
164164

165165
// verifyPFPSync verifies if the PFP statuses from RTE and scheduler are in sync, and prints out the difference between them if any.
166-
func verifyPFPSync(fromRTE []record.RecordedStatus, fromSched []record.RecordedStatus) error {
166+
func verifyPFPSync(fromRTE []record.RecordedStatus, fromSched []record.RecordedStatus) (bool, error) {
167+
isFinalStatusSynced := false
168+
latest := fromSched[len(fromSched)-1]
169+
for _, st := range fromSched {
170+
if st.RecordTime.After(latest.RecordTime) {
171+
latest = st
172+
}
173+
}
174+
if latest.FingerprintComputed == latest.FingerprintExpected {
175+
isFinalStatusSynced = true
176+
}
177+
167178
if len(fromRTE) == 0 || len(fromSched) == 0 { // should never happen
168-
return fmt.Errorf("sync check is skipped, recorded statuses are missing: RTEStatusListLength=%d, schedulerStatusListLength=%d", len(fromRTE), len(fromSched))
179+
return isFinalStatusSynced, fmt.Errorf("sync check is skipped, recorded statuses are missing: RTEStatusListLength=%d, schedulerStatusListLength=%d", len(fromRTE), len(fromSched))
169180
}
170181

171-
//make map from sched where the expectedPFP is the key, makes sure no duplication and uses the freshest report
182+
// make map from sched where the expectedPFP is the key, makes sure no duplication and uses the freshest report
172183
refinedSchedStatuses := refineListToMap(fromSched)
173184
if len(refinedSchedStatuses) == 0 {
174185
klog.InfoS("all scheduler PFP status trace is synced with RTE!")
175-
return nil
186+
return true, nil
176187
}
177188

178189
refinedRTEStatuses := refineListToMap(fromRTE)
179190

180191
diffStatuses := getDifference(refinedSchedStatuses, refinedRTEStatuses)
181192
if len(diffStatuses) == 0 {
182193
// should never happen, if it happened there is likely a bug in the scheduler reporting the statuses
183-
return fmt.Errorf("no differences found between pod lists of RTE and scheduler, but node is reported to be Dirty on the scheduler")
194+
return isFinalStatusSynced, fmt.Errorf("no differences found between pod lists of RTE and scheduler, but node is reported to be Dirty on the scheduler")
184195
}
185196
printPrettyString(diffStatuses)
186197

187-
return nil
198+
return isFinalStatusSynced, nil
188199
}
189200

190201
func processFile(filePath string) ([]record.RecordedStatus, error) {
@@ -220,22 +231,17 @@ func processFile(filePath string) ([]record.RecordedStatus, error) {
220231
return statusData, nil
221232
}
222233

223-
func singlePairPFPSyncCheck(rteFilePath string, schedulerFilePath string) error {
234+
func singlePairPFPSyncCheck(rteFilePath string, schedulerFilePath string) (bool, error) {
224235
rteData, err := processFile(rteFilePath)
225236
if err != nil {
226-
return err
237+
return false, err
227238
}
228239
schedData, err := processFile(schedulerFilePath)
229240
if err != nil {
230-
return err
241+
return false, err
231242
}
232243

233-
err = verifyPFPSync(rteData, schedData)
234-
if err != nil {
235-
klog.InfoS("PFP sync finished with error", "error", err)
236-
return err
237-
}
238-
return nil
244+
return verifyPFPSync(rteData, schedData)
239245
}
240246

241247
func findFiles(mustgatherDir string, suffix string) (map[string]string, error) {
@@ -308,6 +314,7 @@ func mustGatherPFPSyncCheck(mustGatherDirPath string) error {
308314
return nil
309315
}
310316

317+
finalSyncStatuses := make(map[string]bool, len(schedNodeFiles))
311318
for node, schedFilePath := range schedNodeFiles {
312319
rteFilePath, ok := rteNodeFiles[node]
313320
if !ok {
@@ -318,16 +325,31 @@ func mustGatherPFPSyncCheck(mustGatherDirPath string) error {
318325
fmt.Println("******************************************************************************************")
319326
fmt.Println("Processing node: ", node)
320327
fmt.Println("******************************************************************************************")
321-
// call verifyPFPSync with the two files
322-
err := singlePairPFPSyncCheck(rteFilePath, schedFilePath)
328+
isFinalStatusSynced, err := singlePairPFPSyncCheck(rteFilePath, schedFilePath)
323329
if err != nil {
324-
klog.InfoS("PFP sync finished with error", "error", err)
325330
return err
326331
}
332+
finalSyncStatuses[node] = isFinalStatusSynced
327333
}
334+
335+
printFinalStatuses(finalSyncStatuses)
336+
328337
return nil
329338
}
330339

340+
func printFinalStatuses(statuses map[string]bool) {
341+
var sb strings.Builder
342+
sb.WriteString("***FINAL PFP SYNC STATUSES***\n")
343+
for nname, st := range statuses {
344+
val := "synced!"
345+
if !st {
346+
val = "desynced!"
347+
}
348+
sb.WriteString(fmt.Sprintf("%s: %s\n", nname, val))
349+
}
350+
fmt.Println(sb.String())
351+
}
352+
331353
func logProgramProperUsage(programName string) {
332354
klog.InfoS("Usage:", "program", programName, "( -from-rte <file-1-path> -from-scheduler <file-2-path> ) or -must-gather <must-gather-directory-path>")
333355
klog.InfoS("must specify either 2 input files or a must-gather directory; must-gather directory takes precedence")
@@ -366,11 +388,12 @@ func main() {
366388
}
367389

368390
if *mustGatherDirPath == "" {
369-
err := singlePairPFPSyncCheck(*rteFilePath, *schedulerFilePath)
391+
finalStatus, err := singlePairPFPSyncCheck(*rteFilePath, *schedulerFilePath)
370392
if err != nil {
371393
klog.InfoS("PFP sync finished with error", "error", err)
372394
os.Exit(exitCodeErrSyncCheck)
373395
}
396+
printFinalStatuses(map[string]bool{"node": finalStatus})
374397
os.Exit(exitCodeSuccess)
375398
}
376399

0 commit comments

Comments
 (0)