Skip to content

Commit 27d2c5c

Browse files
authored
Merge pull request #12997 from fabriziopandini/add-more-info-to-rollout-planner-logs
🌱 Add more info to logs for rollout changes
2 parents 2e9eba9 + dfb2579 commit 27d2c5c

File tree

55 files changed

+4588
-5315
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+4588
-5315
lines changed

internal/controllers/machinedeployment/machinedeployment_controller.go

Lines changed: 66 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package machinedeployment
1919
import (
2020
"context"
2121
"fmt"
22+
"sort"
2223
"strings"
2324

2425
"github.com/pkg/errors"
@@ -332,24 +333,53 @@ func (r *Reconciler) reconcile(ctx context.Context, s *scope) error {
332333
// Note: When the newMS has been created by the rollout planner, also wait for the cache to be up to date.
333334
func (r *Reconciler) createOrUpdateMachineSetsAndSyncMachineDeploymentRevision(ctx context.Context, p *rolloutPlanner) error {
334335
log := ctrl.LoggerFrom(ctx)
335-
allMSs := append(p.oldMSs, p.newMS)
336336

337-
for _, ms := range allMSs {
338-
log = log.WithValues("MachineSet", klog.KObj(ms))
339-
ctx = ctrl.LoggerInto(ctx, log)
337+
// Note: newMS goes first so in the logs we will have first create/scale up newMS, then scale down oldMSs
338+
allMSs := append([]*clusterv1.MachineSet{p.newMS}, p.oldMSs...)
340339

340+
// Get all the diff introduced by the rollout planner.
341+
// Note: collect all the diff first, so for each change we can add an overview of all the MachineSets
342+
// in the MachineDeployment, because those info are required to understand why a change happened.
343+
machineSetsDiff := map[string]machineSetDiff{}
344+
machineSetsSummary := map[string]string{}
345+
for _, ms := range allMSs {
346+
// Update spec.Replicas in the MachineSet.
341347
if scaleIntent, ok := p.scaleIntents[ms.Name]; ok {
342-
ms.Spec.Replicas = &scaleIntent
348+
ms.Spec.Replicas = ptr.To(scaleIntent)
343349
}
344350

345-
if ms.GetUID() == "" {
351+
diff := p.getMachineSetDiff(ms)
352+
machineSetsDiff[ms.Name] = diff
353+
machineSetsSummary[ms.Name] = diff.Summary
354+
}
355+
356+
// Apply changes to MachineSets.
357+
for _, ms := range allMSs {
358+
log := log.WithValues("MachineSet", klog.KObj(ms))
359+
ctx := ctrl.LoggerInto(ctx, log)
360+
361+
// Retrieve the diff for the MachineSet.
362+
// Note: no need to check for diff doesn't exist, all the diff have been computed right above.
363+
diff := machineSetsDiff[ms.Name]
364+
365+
// Add to the log kv pairs providing the overview of all the MachineSets computed above.
366+
// Note: This value should not be added to the context to prevent propagation to other func.
367+
log = log.WithValues("machineSets", machineSetsSummary)
368+
369+
if diff.OriginalMS == nil {
346370
// Create the MachineSet.
347371
if err := ssa.Patch(ctx, r.Client, machineDeploymentManagerName, ms); err != nil {
348372
r.recorder.Eventf(p.md, corev1.EventTypeWarning, "FailedCreate", "Failed to create MachineSet %s: %v", klog.KObj(ms), err)
349373
return errors.Wrapf(err, "failed to create MachineSet %s", klog.KObj(ms))
350374
}
351-
log.Info(fmt.Sprintf("MachineSet created (%s)", p.createReason))
352-
r.recorder.Eventf(p.md, corev1.EventTypeNormal, "SuccessfulCreate", "Created MachineSet %s with %d replicas", klog.KObj(ms), ptr.Deref(ms.Spec.Replicas, 0))
375+
if len(p.oldMSs) > 0 {
376+
log.Info(fmt.Sprintf("MachineSets need rollout: %s", strings.Join(machineSetNames(p.oldMSs), ", ")), "reason", p.createReason)
377+
}
378+
log.Info(fmt.Sprintf("MachineSet %s created, it is now the current MachineSet", ms.Name))
379+
if diff.DesiredReplicas > 0 {
380+
log.Info(fmt.Sprintf("Scaled up current MachineSet %s from 0 to %d replicas (+%[2]d)", ms.Name, diff.DesiredReplicas))
381+
}
382+
r.recorder.Eventf(p.md, corev1.EventTypeNormal, "SuccessfulCreate", "Created MachineSet %s with %d replicas", klog.KObj(ms), diff.DesiredReplicas)
353383

354384
// Keep trying to get the MachineSet. This will force the cache to update and prevent any future reconciliation of
355385
// the MachineDeployment to reconcile with an outdated list of MachineSets which could lead to unwanted creation of
@@ -361,14 +391,21 @@ func (r *Reconciler) createOrUpdateMachineSetsAndSyncMachineDeploymentRevision(c
361391
continue
362392
}
363393

364-
// Update the MachineSet to propagate in-place mutable fields from the MachineDeployment and/or changes applied by the rollout planner.
365-
originalMS, ok := p.originalMS[ms.Name]
366-
if !ok {
367-
return errors.Errorf("failed to update MachineSet %s, original MS is missing", klog.KObj(ms))
394+
// Add to the log kv pairs providing context and details about changes in this MachineSet (reason, diff)
395+
// Note: Those values should not be added to the context to prevent propagation to other func.
396+
statusToLogKeyAndValues := []any{
397+
"reason", diff.Reason,
398+
"diff", diff.OtherChanges,
368399
}
369-
originalReplicas := ptr.Deref(originalMS.Spec.Replicas, 0)
400+
if len(p.acknowledgedMachineNames) > 0 {
401+
statusToLogKeyAndValues = append(statusToLogKeyAndValues, "acknowledgedMachines", sortAndJoin(p.acknowledgedMachineNames))
402+
}
403+
if len(p.updatingMachineNames) > 0 {
404+
statusToLogKeyAndValues = append(statusToLogKeyAndValues, "updatingMachines", sortAndJoin(p.updatingMachineNames))
405+
}
406+
log = log.WithValues(statusToLogKeyAndValues...)
370407

371-
err := ssa.Patch(ctx, r.Client, machineDeploymentManagerName, ms, ssa.WithCachingProxy{Cache: r.ssaCache, Original: originalMS})
408+
err := ssa.Patch(ctx, r.Client, machineDeploymentManagerName, ms, ssa.WithCachingProxy{Cache: r.ssaCache, Original: diff.OriginalMS})
372409
if err != nil {
373410
// Note: If we are Applying a MachineSet with UID set and the MachineSet does not exist anymore, the
374411
// kube-apiserver returns a conflict error.
@@ -379,25 +416,20 @@ func (r *Reconciler) createOrUpdateMachineSetsAndSyncMachineDeploymentRevision(c
379416
return errors.Wrapf(err, "failed to update MachineSet %s", klog.KObj(ms))
380417
}
381418

382-
changes := getAnnotationChanges(originalMS, ms)
383-
384-
newReplicas := ptr.Deref(ms.Spec.Replicas, 0)
385-
if newReplicas < originalReplicas {
386-
changes = append(changes, fmt.Sprintf("replicas %d", newReplicas))
387-
log.Info(fmt.Sprintf("Scaled down MachineSet %s to %d replicas (-%d)", ms.Name, newReplicas, originalReplicas-newReplicas), "diff", strings.Join(changes, ","))
388-
r.recorder.Eventf(p.md, corev1.EventTypeNormal, "SuccessfulScale", "Scaled down MachineSet %v: %d -> %d", ms.Name, originalReplicas, newReplicas)
419+
if diff.DesiredReplicas < diff.OriginalReplicas {
420+
log.Info(fmt.Sprintf("Scaled down %s MachineSet %s from %d to %d replicas (-%d)", diff.Type, ms.Name, diff.OriginalReplicas, diff.DesiredReplicas, diff.OriginalReplicas-diff.DesiredReplicas))
421+
r.recorder.Eventf(p.md, corev1.EventTypeNormal, "SuccessfulScale", "Scaled down MachineSet %v: %d -> %d", ms.Name, diff.OriginalReplicas, diff.DesiredReplicas)
389422
}
390-
if newReplicas > originalReplicas {
391-
changes = append(changes, fmt.Sprintf("replicas %d", newReplicas))
392-
log.Info(fmt.Sprintf("Scaled up MachineSet %s to %d replicas (+%d)", ms.Name, newReplicas, newReplicas-originalReplicas), "diff", strings.Join(changes, ","))
393-
r.recorder.Eventf(p.md, corev1.EventTypeNormal, "SuccessfulScale", "Scaled up MachineSet %v: %d -> %d", ms.Name, originalReplicas, newReplicas)
423+
if diff.DesiredReplicas > diff.OriginalReplicas {
424+
log.Info(fmt.Sprintf("Scaled up %s MachineSet %s from %d to %d replicas (+%d)", diff.Type, ms.Name, diff.OriginalReplicas, diff.DesiredReplicas, diff.DesiredReplicas-diff.OriginalReplicas))
425+
r.recorder.Eventf(p.md, corev1.EventTypeNormal, "SuccessfulScale", "Scaled up MachineSet %v: %d -> %d", ms.Name, diff.OriginalReplicas, diff.DesiredReplicas)
394426
}
395-
if newReplicas == originalReplicas && len(changes) > 0 {
396-
log.Info(fmt.Sprintf("Updated MachineSet %s", ms.Name), "diff", strings.Join(changes, ","))
427+
if diff.DesiredReplicas == diff.OriginalReplicas && diff.OtherChanges != "" {
428+
log.Info(fmt.Sprintf("Updated %s MachineSet %s", diff.Type, ms.Name))
397429
}
398430

399431
// Only wait for cache if the object was changed.
400-
if originalMS.ResourceVersion != ms.ResourceVersion {
432+
if diff.OriginalMS.ResourceVersion != ms.ResourceVersion {
401433
if err := clientutil.WaitForCacheToBeUpToDate(ctx, r.Client, "MachineSet update", ms); err != nil {
402434
return err
403435
}
@@ -415,40 +447,13 @@ func (r *Reconciler) createOrUpdateMachineSetsAndSyncMachineDeploymentRevision(c
415447
return nil
416448
}
417449

418-
func getAnnotationChanges(originalMS *clusterv1.MachineSet, ms *clusterv1.MachineSet) []string {
419-
changes := []string{}
420-
if originalMS.Annotations[clusterv1.MachineSetMoveMachinesToMachineSetAnnotation] != ms.Annotations[clusterv1.MachineSetMoveMachinesToMachineSetAnnotation] {
421-
if value, ok := ms.Annotations[clusterv1.MachineSetMoveMachinesToMachineSetAnnotation]; ok {
422-
changes = append(changes, fmt.Sprintf("%s: %s", clusterv1.MachineSetMoveMachinesToMachineSetAnnotation, value))
423-
} else {
424-
changes = append(changes, fmt.Sprintf("%s removed", clusterv1.MachineSetMoveMachinesToMachineSetAnnotation))
425-
}
426-
}
427-
428-
if originalMS.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation] != ms.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation] {
429-
if value, ok := ms.Annotations[clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation]; ok {
430-
changes = append(changes, fmt.Sprintf("%s: %s", clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation, value))
431-
} else {
432-
changes = append(changes, fmt.Sprintf("%s removed", clusterv1.MachineSetReceiveMachinesFromMachineSetsAnnotation))
433-
}
434-
}
435-
436-
if originalMS.Annotations[clusterv1.AcknowledgedMoveAnnotation] != ms.Annotations[clusterv1.AcknowledgedMoveAnnotation] {
437-
if value, ok := ms.Annotations[clusterv1.AcknowledgedMoveAnnotation]; ok {
438-
changes = append(changes, fmt.Sprintf("%s: %s", clusterv1.AcknowledgedMoveAnnotation, value))
439-
} else {
440-
changes = append(changes, fmt.Sprintf("%s removed", clusterv1.AcknowledgedMoveAnnotation))
441-
}
442-
}
443-
444-
if originalMS.Annotations[clusterv1.DisableMachineCreateAnnotation] != ms.Annotations[clusterv1.DisableMachineCreateAnnotation] {
445-
if value, ok := ms.Annotations[clusterv1.DisableMachineCreateAnnotation]; ok {
446-
changes = append(changes, fmt.Sprintf("%s: %s", clusterv1.DisableMachineCreateAnnotation, value))
447-
} else {
448-
changes = append(changes, fmt.Sprintf("%s removed", clusterv1.DisableMachineCreateAnnotation))
449-
}
450+
func machineSetNames(machineSets []*clusterv1.MachineSet) []string {
451+
names := []string{}
452+
for _, ms := range machineSets {
453+
names = append(names, ms.Name)
450454
}
451-
return changes
455+
sort.Strings(names)
456+
return names
452457
}
453458

454459
func (r *Reconciler) reconcileDelete(ctx context.Context, s *scope) error {

internal/controllers/machinedeployment/machinedeployment_rollout_ondelete.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ func (p *rolloutPlanner) reconcileOldMachineSetsOnDelete(ctx context.Context) {
111111
scaleDownCount := max(ptr.Deref(oldMS.Spec.Replicas, 0)-ptr.Deref(oldMS.Status.Replicas, 0), 0)
112112
if scaleDownCount > 0 {
113113
newScaleIntent := max(ptr.Deref(oldMS.Spec.Replicas, 0)-scaleDownCount, 0)
114+
p.addNote(oldMS, "scale down to align to existing Machines")
114115
log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDownCount), "MachineSet", klog.KObj(oldMS))
115116
p.scaleIntents[oldMS.Name] = newScaleIntent
116117

@@ -138,6 +139,7 @@ func (p *rolloutPlanner) reconcileOldMachineSetsOnDelete(ctx context.Context) {
138139
scaleDownCount := min(scaleIntent, totalScaleDownCount)
139140
if scaleDownCount > 0 {
140141
newScaleIntent := max(ptr.Deref(oldMS.Spec.Replicas, 0)-scaleDownCount, 0)
142+
p.addNote(oldMS, "scale down to align MachineSet spec.replicas to MachineDeployment spec.replicas")
141143
log.V(5).Info(fmt.Sprintf("Setting scale down intent for MachineSet %s to %d replicas (-%d)", oldMS.Name, newScaleIntent, scaleDownCount), "MachineSet", klog.KObj(oldMS))
142144
p.scaleIntents[oldMS.Name] = newScaleIntent
143145

internal/controllers/machinedeployment/machinedeployment_rollout_ondelete_test.go

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"context"
2121
"fmt"
2222
"math/rand"
23-
"os"
2423
"strings"
2524
"testing"
2625
"time"
@@ -29,7 +28,6 @@ import (
2928
. "github.com/onsi/gomega"
3029
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3130
"k8s.io/klog/v2"
32-
"k8s.io/klog/v2/textlogger"
3331
"k8s.io/utils/ptr"
3432
ctrl "sigs.k8s.io/controller-runtime"
3533

@@ -45,6 +43,7 @@ func TestReconcileOldMachineSetsOnDelete(t *testing.T) {
4543
newMachineSet *clusterv1.MachineSet
4644
oldMachineSets []*clusterv1.MachineSet
4745
expectScaleIntent map[string]int32
46+
expectedNotes map[string][]string
4847
error error
4948
}{
5049
{
@@ -98,6 +97,7 @@ func TestReconcileOldMachineSetsOnDelete(t *testing.T) {
9897
expectScaleIntent: map[string]int32{
9998
// no new intent for old MS
10099
},
100+
expectedNotes: map[string][]string{},
101101
},
102102
{
103103
name: "OnDelete strategy: scale down oldMSs when machines have been deleted",
@@ -152,6 +152,10 @@ func TestReconcileOldMachineSetsOnDelete(t *testing.T) {
152152
"ms1": 2, // scale down by one, one machines has been deleted
153153
"ms2": 0, // scale down by one, one machines has been deleted
154154
},
155+
expectedNotes: map[string][]string{
156+
"ms1": {"scale down to align to existing Machines"},
157+
"ms2": {"scale down to align to existing Machines"},
158+
},
155159
},
156160
{
157161
name: "OnDelete strategy: scale down oldMSs when md has been scaled down",
@@ -206,6 +210,10 @@ func TestReconcileOldMachineSetsOnDelete(t *testing.T) {
206210
"ms1": 0, // scale down by one, 1 exceeding machine removed from ms1
207211
"ms2": 1, // scale down by one, 1 exceeding machine removed from ms2
208212
},
213+
expectedNotes: map[string][]string{
214+
"ms1": {"scale down to align MachineSet spec.replicas to MachineDeployment spec.replicas"},
215+
"ms2": {"scale down to align MachineSet spec.replicas to MachineDeployment spec.replicas"},
216+
},
209217
},
210218
{
211219
name: "OnDelete strategy: scale down oldMSs when md has been scaled down, keeps into account newMS scale intent",
@@ -263,6 +271,10 @@ func TestReconcileOldMachineSetsOnDelete(t *testing.T) {
263271
"ms1": 0, // scale down by one, 1 exceeding machine removed from ms1
264272
"ms2": 1, // scale down by one, 1 exceeding machine removed from ms2
265273
},
274+
expectedNotes: map[string][]string{
275+
"ms1": {"scale down to align MachineSet spec.replicas to MachineDeployment spec.replicas"},
276+
"ms2": {"scale down to align MachineSet spec.replicas to MachineDeployment spec.replicas"},
277+
},
266278
},
267279
{
268280
name: "OnDelete strategy: scale down oldMSs when md has been scaled down keeps into account deleted machines",
@@ -317,6 +329,10 @@ func TestReconcileOldMachineSetsOnDelete(t *testing.T) {
317329
"ms1": 0, // scale down by one, one machines has been deleted
318330
"ms2": 1, // scale down by one, 1 exceeding machine removed from ms1
319331
},
332+
expectedNotes: map[string][]string{
333+
"ms1": {"scale down to align to existing Machines"},
334+
"ms2": {"scale down to align MachineSet spec.replicas to MachineDeployment spec.replicas"},
335+
},
320336
},
321337
}
322338
for _, tt := range testCases {
@@ -330,6 +346,7 @@ func TestReconcileOldMachineSetsOnDelete(t *testing.T) {
330346

331347
planner.reconcileOldMachineSetsOnDelete(ctx)
332348
g.Expect(planner.scaleIntents).To(Equal(tt.expectScaleIntent), "unexpected scaleIntents")
349+
g.Expect(planner.notes).To(Equal(tt.expectedNotes), "unexpected notes")
333350
})
334351
}
335352
}
@@ -390,9 +407,6 @@ type onDeleteSequenceTestCase struct {
390407
}
391408

392409
func Test_OnDeleteSequences(t *testing.T) {
393-
ctx := context.Background()
394-
ctx = ctrl.LoggerInto(ctx, textlogger.NewLogger(textlogger.NewConfig(textlogger.Verbosity(5), textlogger.Output(os.Stdout))))
395-
396410
tests := []onDeleteSequenceTestCase{
397411
{ // delete 1
398412
name: "3 replicas, maxUserUnavailable 1",
@@ -553,7 +567,7 @@ func runOnDeleteTestCase(ctx context.Context, t *testing.T, tt onDeleteSequenceT
553567
desired := computeDesiredRolloutScope(current, tt.desiredMachineNames)
554568

555569
// Log initial state
556-
fLogger.Logf("[Test] Initial state\n%s", current)
570+
fLogger.Logf("[Test] Initial state\n%s", current.summary())
557571
random := ""
558572
if tt.randomControllerOrder {
559573
random = fmt.Sprintf(", random(%d)", tt.seed)
@@ -577,7 +591,6 @@ func runOnDeleteTestCase(ctx context.Context, t *testing.T, tt onDeleteSequenceT
577591
task := taskList[taskID]
578592
if task == "md" {
579593
fLogger.Logf("[MD controller] Iteration %d, Reconcile md", i)
580-
fLogger.Logf("[MD controller] - Input to rollout planner\n%s", current)
581594

582595
// Running a small subset of MD reconcile (the rollout logic and a bit of setReplicas)
583596
p := newRolloutPlanner(nil, nil)
@@ -618,7 +631,7 @@ func runOnDeleteTestCase(ctx context.Context, t *testing.T, tt onDeleteSequenceT
618631
current.machineDeployment.Status.AvailableReplicas = mdutil.GetAvailableReplicaCountForMachineSets(current.machineSets)
619632

620633
// Log state after this reconcile
621-
fLogger.Logf("[MD controller] - Result of rollout planner\n%s", current)
634+
fLogger.Logf("[MD controller] - Result of rollout planner\n%s", current.rolloutPlannerResultSummary(p))
622635
}
623636

624637
// Simulate the user deleting machines not upToDate; in order to make this realistic deletion will be performed
@@ -666,7 +679,7 @@ func runOnDeleteTestCase(ctx context.Context, t *testing.T, tt onDeleteSequenceT
666679
// Run mutators faking MS controllers
667680
for _, ms := range current.machineSets {
668681
if ms.Name == task {
669-
fLogger.Logf("[MS controller] Iteration %d, Reconcile %s, %s", i, ms.Name, msLog(ms, current.machineSetMachines[ms.Name]))
682+
fLogger.Logf("[MS controller] Iteration %d, Reconcile %s", i, current.machineSetSummary(ms))
670683
err := machineSetControllerMutator(fLogger, ms, current)
671684
g.Expect(err).ToNot(HaveOccurred())
672685
break
@@ -686,7 +699,7 @@ func runOnDeleteTestCase(ctx context.Context, t *testing.T, tt onDeleteSequenceT
686699

687700
// Check if we are at the desired state
688701
if current.Equal(desired) {
689-
fLogger.Logf("[Test] Final state\n%s", current)
702+
fLogger.Logf("[Test] Final state\n%s", current.summary())
690703
break
691704
}
692705

@@ -696,7 +709,7 @@ func runOnDeleteTestCase(ctx context.Context, t *testing.T, tt onDeleteSequenceT
696709
// NOTE: the following can be used to set a breakpoint for debugging why the system is not reaching desired state after maxIterations (to check what is not yet equal)
697710
current.Equal(desired)
698711
// Log desired state we never reached
699-
fLogger.Logf("[Test] Desired state\n%s", desired)
712+
fLogger.Logf("[Test] Desired state\n%s", desired.summary())
700713
g.Fail(fmt.Sprintf("Failed to reach desired state in %d iterations", maxIterations))
701714
}
702715
}

0 commit comments

Comments
 (0)