Skip to content

Commit 2b83fa3

Browse files
committed
tools: pfpsyncchk: enhance printed output
remove redundant `knlog.InfoS` and ensure that the final reported PFP status is reflected in the output; as such: ``` ... ***FINAL PFP SYNC STATUSES*** control-plane-1: desynced! control-plane-2: desynced! control-plane-3: desynced! ``` Signed-off-by: Shereen Haj <[email protected]>
1 parent 2e91d69 commit 2b83fa3

File tree

1 file changed

+37
-19
lines changed

1 file changed

+37
-19
lines changed

tools/pfpsyncchk/pfpsyncchk.go

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -163,28 +163,39 @@ func getDifference(refinedSchedStatuses map[string]record.RecordedStatus, refine
163163
}
164164

165165
// verifyPFPSync verifies if the PFP statuses from RTE and scheduler are in sync, and prints out the difference between them if any.
166-
func verifyPFPSync(fromRTE []record.RecordedStatus, fromSched []record.RecordedStatus) error {
166+
func verifyPFPSync(fromRTE []record.RecordedStatus, fromSched []record.RecordedStatus) (bool, error) {
167+
isFinalStatusSynced := false
168+
latest := fromSched[len(fromSched)-1]
169+
for _, st := range fromSched {
170+
if st.RecordTime.After(latest.RecordTime) {
171+
latest = st
172+
}
173+
}
174+
if latest.FingerprintComputed == latest.FingerprintExpected {
175+
isFinalStatusSynced = true
176+
}
177+
167178
if len(fromRTE) == 0 || len(fromSched) == 0 { // should never happen
168-
return fmt.Errorf("sync check is skipped, recorded statuses are missing: RTEStatusListLength=%d, schedulerStatusListLength=%d", len(fromRTE), len(fromSched))
179+
return isFinalStatusSynced, fmt.Errorf("sync check is skipped, recorded statuses are missing: RTEStatusListLength=%d, schedulerStatusListLength=%d", len(fromRTE), len(fromSched))
169180
}
170181

171-
//make map from sched where the expectedPFP is the key, makes sure no duplication and uses the freshest report
182+
// make map from sched where the expectedPFP is the key, makes sure no duplication and uses the freshest report
172183
refinedSchedStatuses := refineListToMap(fromSched)
173184
if len(refinedSchedStatuses) == 0 {
174185
klog.InfoS("all scheduler PFP status trace is synced with RTE!")
175-
return nil
186+
return true, nil
176187
}
177188

178189
refinedRTEStatuses := refineListToMap(fromRTE)
179190

180191
diffStatuses := getDifference(refinedSchedStatuses, refinedRTEStatuses)
181192
if len(diffStatuses) == 0 {
182193
// should never happen, if it happened there is likely a bug in the scheduler reporting the statuses
183-
return fmt.Errorf("no differences found between pod lists of RTE and scheduler, but node is reported to be Dirty on the scheduler")
194+
return isFinalStatusSynced, fmt.Errorf("no differences found between pod lists of RTE and scheduler, but node is reported to be Dirty on the scheduler")
184195
}
185196
printPrettyString(diffStatuses)
186197

187-
return nil
198+
return isFinalStatusSynced, nil
188199
}
189200

190201
func processFile(filePath string) ([]record.RecordedStatus, error) {
@@ -220,22 +231,17 @@ func processFile(filePath string) ([]record.RecordedStatus, error) {
220231
return statusData, nil
221232
}
222233

223-
func singlePairPFPSyncCheck(rteFilePath string, schedulerFilePath string) error {
234+
func singlePairPFPSyncCheck(rteFilePath string, schedulerFilePath string) (bool, error) {
224235
rteData, err := processFile(rteFilePath)
225236
if err != nil {
226-
return err
237+
return false, err
227238
}
228239
schedData, err := processFile(schedulerFilePath)
229240
if err != nil {
230-
return err
241+
return false, err
231242
}
232243

233-
err = verifyPFPSync(rteData, schedData)
234-
if err != nil {
235-
klog.InfoS("PFP sync finished with error", "error", err)
236-
return err
237-
}
238-
return nil
244+
return verifyPFPSync(rteData, schedData)
239245
}
240246

241247
func findFiles(mustgatherDir string, suffix string) (map[string]string, error) {
@@ -308,6 +314,7 @@ func mustGatherPFPSyncCheck(mustGatherDirPath string) error {
308314
return nil
309315
}
310316

317+
finalSyncStatuses := make(map[string]bool, len(schedNodeFiles))
311318
for node, schedFilePath := range schedNodeFiles {
312319
rteFilePath, ok := rteNodeFiles[node]
313320
if !ok {
@@ -318,13 +325,24 @@ func mustGatherPFPSyncCheck(mustGatherDirPath string) error {
318325
fmt.Println("******************************************************************************************")
319326
fmt.Println("Processing node: ", node)
320327
fmt.Println("******************************************************************************************")
321-
// call verifyPFPSync with the two files
322-
err := singlePairPFPSyncCheck(rteFilePath, schedFilePath)
328+
isFinalStatusSynced, err := singlePairPFPSyncCheck(rteFilePath, schedFilePath)
323329
if err != nil {
324-
klog.InfoS("PFP sync finished with error", "error", err)
325330
return err
326331
}
332+
finalSyncStatuses[node] = isFinalStatusSynced
333+
}
334+
335+
var sb strings.Builder
336+
sb.WriteString("***FINAL PFP SYNC STATUSES***\n")
337+
for nname, st := range finalSyncStatuses {
338+
val := "synced!"
339+
if !st {
340+
val = "desynced!"
341+
}
342+
sb.WriteString(fmt.Sprintf("%s: %s\n", nname, val))
327343
}
344+
fmt.Println(sb.String())
345+
328346
return nil
329347
}
330348

@@ -366,7 +384,7 @@ func main() {
366384
}
367385

368386
if *mustGatherDirPath == "" {
369-
err := singlePairPFPSyncCheck(*rteFilePath, *schedulerFilePath)
387+
_, err := singlePairPFPSyncCheck(*rteFilePath, *schedulerFilePath)
370388
if err != nil {
371389
klog.InfoS("PFP sync finished with error", "error", err)
372390
os.Exit(exitCodeErrSyncCheck)

0 commit comments

Comments
 (0)