From c8cf42a956d5a6a43cd2db13cad356e3c25053ff Mon Sep 17 00:00:00 2001 From: Michael Weibel Date: Mon, 7 Oct 2024 16:22:08 +0200 Subject: [PATCH] fix(machinepool): clear VirtualMachineProfile if no model/custom data update Scale up/down with MachinePool always updates the VM image model to use. This change sets the VirtualMachineProfile to nil when no change is necessary and ensures therefore less churn on scale up/downs leading to model updates which may require manual fixing Fixes some linting errors related to unnecessary params, too. --- azure/scope/machinepool.go | 26 ++-- azure/scope/machinepool_test.go | 149 +++++++++++++++++++++ azure/services/scalesets/scalesets_test.go | 4 +- azure/services/scalesets/spec.go | 12 ++ azure/services/scalesets/spec_test.go | 103 ++++++++++---- 5 files changed, 254 insertions(+), 40 deletions(-) diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index e5cbdc7452e..8637b0c14ce 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -81,6 +81,7 @@ type ( capiMachinePoolPatchHelper *patch.Helper vmssState *azure.VMSS cache *MachinePoolCache + skuCache *resourceskus.Cache } // NodeStatus represents the status of a Kubernetes node. @@ -164,12 +165,15 @@ func (m *MachinePoolScope) InitMachinePoolCache(ctx context.Context) error { return err } - skuCache, err := resourceskus.GetCache(m, m.Location()) - if err != nil { - return err + if m.skuCache == nil { + skuCache, err := resourceskus.GetCache(m, m.Location()) + if err != nil { + return errors.Wrap(err, "failed to init resourceskus cache") + } + m.skuCache = skuCache } - m.cache.VMSKU, err = skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines) + m.cache.VMSKU, err = m.skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines) if err != nil { return errors.Wrapf(err, "failed to get VM SKU %s in compute api", m.AzureMachinePool.Spec.Template.VMSize) } @@ -222,10 +226,8 @@ func (m *MachinePoolScope) ScaleSetSpec(ctx context.Context) azure.ResourceSpecG } if m.cache != nil { - if m.HasReplicasExternallyManaged(ctx) { - spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges - log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData) - } + spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges + log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData) spec.VMSSExtensionSpecs = m.VMSSExtensionSpecs() spec.SKU = m.cache.VMSKU spec.VMImage = m.cache.VMImage @@ -705,11 +707,9 @@ func (m *MachinePoolScope) Close(ctx context.Context) error { if err := m.updateReplicasAndProviderIDs(ctx); err != nil { return errors.Wrap(err, "failed to update replicas and providerIDs") } - if m.HasReplicasExternallyManaged(ctx) { - if err := m.updateCustomDataHash(ctx); err != nil { - // ignore errors to calculating the custom data hash since it's not absolutely crucial. - log.V(4).Error(err, "unable to update custom data hash, ignoring.") - } + if err := m.updateCustomDataHash(ctx); err != nil { + // ignore errors to calculating the custom data hash since it's not absolutely crucial. + log.V(4).Error(err, "unable to update custom data hash, ignoring.") } } diff --git a/azure/scope/machinepool_test.go b/azure/scope/machinepool_test.go index 895d5c668a8..f9b4e2a9f54 100644 --- a/azure/scope/machinepool_test.go +++ b/azure/scope/machinepool_test.go @@ -18,13 +18,17 @@ package scope import ( "context" + "crypto/sha256" + "encoding/base64" "fmt" + "io" "reflect" "testing" "time" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" azureautorest "github.com/Azure/go-autorest/autorest/azure" "github.com/Azure/go-autorest/autorest/azure/auth" . "github.com/onsi/gomega" @@ -1576,3 +1580,148 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) { }) } } + +func TestBootstrapDataChanges(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + scheme := runtime.NewScheme() + _ = clusterv1.AddToScheme(scheme) + _ = infrav1.AddToScheme(scheme) + _ = infrav1exp.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + var ( + g = NewWithT(t) + mockCtrl = gomock.NewController(t) + cb = fake.NewClientBuilder().WithScheme(scheme) + cluster = &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster1", + Namespace: "default", + }, + Spec: clusterv1.ClusterSpec{ + InfrastructureRef: &corev1.ObjectReference{ + Name: "azCluster1", + }, + }, + Status: clusterv1.ClusterStatus{ + InfrastructureReady: true, + }, + } + azureCluster = &infrav1.AzureCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "azCluster1", + Namespace: "default", + }, + Spec: infrav1.AzureClusterSpec{ + AzureClusterClassSpec: infrav1.AzureClusterClassSpec{ + Location: "test", + }, + }, + } + mp = &expv1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mp1", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: "cluster1", + Kind: "Cluster", + APIVersion: clusterv1.GroupVersion.String(), + }, + }, + }, + Spec: expv1.MachinePoolSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + DataSecretName: ptr.To("mp-secret"), + }, + Version: ptr.To("v1.31.0"), + }, + }, + }, + } + bootstrapData = "test" + bootstrapDataHash = sha256Hash(base64.StdEncoding.EncodeToString([]byte(bootstrapData))) + bootstrapSecret = corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mp-secret", + Namespace: "default", + }, + Data: map[string][]byte{"value": []byte(bootstrapData)}, + } + amp = &infrav1exp.AzureMachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "amp1", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: "mp1", + Kind: "MachinePool", + APIVersion: expv1.GroupVersion.String(), + }, + }, + Annotations: map[string]string{ + azure.CustomDataHashAnnotation: fmt.Sprintf("%x", bootstrapDataHash), + }, + }, + Spec: infrav1exp.AzureMachinePoolSpec{ + Template: infrav1exp.AzureMachinePoolMachineTemplate{ + Image: &infrav1.Image{}, + NetworkInterfaces: []infrav1.NetworkInterface{ + { + SubnetName: "test", + }, + }, + VMSize: "VM_SIZE", + }, + }, + } + vmssState = &azure.VMSS{} + ) + defer mockCtrl.Finish() + + s := &MachinePoolScope{ + client: cb. + WithObjects(&bootstrapSecret). + Build(), + ClusterScoper: &ClusterScope{ + Cluster: cluster, + AzureCluster: azureCluster, + }, + skuCache: resourceskus.NewStaticCache([]armcompute.ResourceSKU{ + { + Name: ptr.To("VM_SIZE"), + }, + }, "test"), + MachinePool: mp, + AzureMachinePool: amp, + vmssState: vmssState, + } + + g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred()) + + spec := s.ScaleSetSpec(ctx) + sSpec := spec.(*scalesets.ScaleSetSpec) + g.Expect(sSpec.ShouldPatchCustomData).To(Equal(false)) + + amp.Annotations[azure.CustomDataHashAnnotation] = "old" + + // reset cache to be able to build up the cache again + s.cache = nil + g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred()) + + spec = s.ScaleSetSpec(ctx) + sSpec = spec.(*scalesets.ScaleSetSpec) + g.Expect(sSpec.ShouldPatchCustomData).To(Equal(true)) +} + +func sha256Hash(text string) []byte { + h := sha256.New() + _, err := io.WriteString(h, text) + if err != nil { + panic(err) + } + return h.Sum(nil) +} diff --git a/azure/services/scalesets/scalesets_test.go b/azure/services/scalesets/scalesets_test.go index 36cc4657de4..15cb9e5e43b 100644 --- a/azure/services/scalesets/scalesets_test.go +++ b/azure/services/scalesets/scalesets_test.go @@ -741,8 +741,8 @@ func newWindowsVMSSSpec() ScaleSetSpec { return vmss } -func newDefaultExistingVMSS(vmSize string) armcompute.VirtualMachineScaleSet { - vmss := newDefaultVMSS(vmSize) +func newDefaultExistingVMSS() armcompute.VirtualMachineScaleSet { + vmss := newDefaultVMSS("VM_SIZE") vmss.ID = ptr.To("subscriptions/1234/resourceGroups/my_resource_group/providers/Microsoft.Compute/virtualMachines/my-vm") return vmss } diff --git a/azure/services/scalesets/spec.go b/azure/services/scalesets/spec.go index e75d9ac62e2..3781d987082 100644 --- a/azure/services/scalesets/spec.go +++ b/azure/services/scalesets/spec.go @@ -93,6 +93,9 @@ func (s *ScaleSetSpec) OwnerResourceName() string { } func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interface{}) (parameters interface{}, err error) { + ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesets.ScaleSetSpec.existingParameters") + defer done() + existingVMSS, ok := existing.(armcompute.VirtualMachineScaleSet) if !ok { return nil, errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", existing) @@ -132,6 +135,15 @@ func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interfac return nil, nil } + // if there are no model changes and no change in custom data, remove VirtualMachineProfile to avoid unnecessary VMSS model + // updates. + if !hasModelChanges && !s.ShouldPatchCustomData { + log.V(4).Info("removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData) + vmss.Properties.VirtualMachineProfile = nil + } else { + log.V(4).Info("has changes, not removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData) + } + return vmss, nil } diff --git a/azure/services/scalesets/spec_test.go b/azure/services/scalesets/spec_test.go index de28224c2f3..b10515132b5 100644 --- a/azure/services/scalesets/spec_test.go +++ b/azure/services/scalesets/spec_test.go @@ -33,26 +33,28 @@ import ( ) var ( - defaultSpec, defaultVMSS = getDefaultVMSS() - windowsSpec, windowsVMSS = getDefaultWindowsVMSS() - acceleratedNetworkingSpec, acceleratedNetworkingVMSS = getAcceleratedNetworkingVMSS() - customSubnetSpec, customSubnetVMSS = getCustomSubnetVMSS() - customNetworkingSpec, customNetworkingVMSS = getCustomNetworkingVMSS() - spotVMSpec, spotVMVMSS = getSpotVMVMSS() - ephemeralSpec, ephemeralVMSS = getEPHVMSSS() - resourceDiskSpec, resourceDiskVMSS = getResourceDiskVMSS() - evictionSpec, evictionVMSS = getEvictionPolicyVMSS() - maxPriceSpec, maxPriceVMSS = getMaxPriceVMSS() - encryptionSpec, encryptionVMSS = getEncryptionVMSS() - userIdentitySpec, userIdentityVMSS = getUserIdentityVMSS() - hostEncryptionSpec, hostEncryptionVMSS = getHostEncryptionVMSS() - hostEncryptionUnsupportedSpec = getHostEncryptionUnsupportedSpec() - ephemeralReadSpec, ephemeralReadVMSS = getEphemeralReadOnlyVMSS() - defaultExistingSpec, defaultExistingVMSS, defaultExistingVMSSClone = getExistingDefaultVMSS() - userManagedStorageAccountDiagnosticsSpec, userManagedStorageAccountDiagnosticsVMSS = getUserManagedAndStorageAcccountDiagnosticsVMSS() - managedDiagnosticsSpec, managedDiagnoisticsVMSS = getManagedDiagnosticsVMSS() - disabledDiagnosticsSpec, disabledDiagnosticsVMSS = getDisabledDiagnosticsVMSS() - nilDiagnosticsProfileSpec, nilDiagnosticsProfileVMSS = getNilDiagnosticsProfileVMSS() + defaultSpec, defaultVMSS = getDefaultVMSS() + windowsSpec, windowsVMSS = getDefaultWindowsVMSS() + acceleratedNetworkingSpec, acceleratedNetworkingVMSS = getAcceleratedNetworkingVMSS() + customSubnetSpec, customSubnetVMSS = getCustomSubnetVMSS() + customNetworkingSpec, customNetworkingVMSS = getCustomNetworkingVMSS() + spotVMSpec, spotVMVMSS = getSpotVMVMSS() + ephemeralSpec, ephemeralVMSS = getEPHVMSSS() + resourceDiskSpec, resourceDiskVMSS = getResourceDiskVMSS() + evictionSpec, evictionVMSS = getEvictionPolicyVMSS() + maxPriceSpec, maxPriceVMSS = getMaxPriceVMSS() + encryptionSpec, encryptionVMSS = getEncryptionVMSS() + userIdentitySpec, userIdentityVMSS = getUserIdentityVMSS() + hostEncryptionSpec, hostEncryptionVMSS = getHostEncryptionVMSS() + hostEncryptionUnsupportedSpec = getHostEncryptionUnsupportedSpec() + ephemeralReadSpec, ephemeralReadVMSS = getEphemeralReadOnlyVMSS() + defaultExistingSpec, defaultExistingVMSS, defaultExistingVMSSClone = getExistingDefaultVMSS() + defaultExistingSpecOnlyCapacityChange, defaultExistingVMSSOnlyCapacityChange, defaultExistingVMSSResultOnlyCapacityChange = getExistingDefaultVMSSOnlyCapacityChange() + defaultExistingSpecOnlyCapacityChangeWithCustomDataChange, defaultExistingVMSSOnlyCapacityChangeWithCustomDataChange, defaultExistingVMSSResultOnlyCapacityChangeWithCustomDataChange = getExistingDefaultVMSSOnlyCapacityChangeWithCustomDataChange() + userManagedStorageAccountDiagnosticsSpec, userManagedStorageAccountDiagnosticsVMSS = getUserManagedAndStorageAcccountDiagnosticsVMSS() + managedDiagnosticsSpec, managedDiagnoisticsVMSS = getManagedDiagnosticsVMSS() + disabledDiagnosticsSpec, disabledDiagnosticsVMSS = getDisabledDiagnosticsVMSS() + nilDiagnosticsProfileSpec, nilDiagnosticsProfileVMSS = getNilDiagnosticsProfileVMSS() ) func getDefaultVMSS() (ScaleSetSpec, armcompute.VirtualMachineScaleSet) { @@ -427,22 +429,57 @@ func getExistingDefaultVMSS() (s ScaleSetSpec, existing armcompute.VirtualMachin }, } - existingVMSS := newDefaultExistingVMSS("VM_SIZE") + existingVMSS := newDefaultExistingVMSS() existingVMSS.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} existingVMSS.SKU.Capacity = ptr.To[int64](2) - existingVMSS.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} - clone := newDefaultExistingVMSS("VM_SIZE") + clone := newDefaultExistingVMSS() clone.SKU.Capacity = ptr.To[int64](3) clone.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} clone.Properties.VirtualMachineProfile.NetworkProfile = nil clone.Properties.VirtualMachineProfile.StorageProfile.ImageReference.Version = ptr.To("2.0") - clone.Properties.VirtualMachineProfile.NetworkProfile = nil return spec, existingVMSS, clone } +func getExistingDefaultVMSSOnlyCapacityChange() (s ScaleSetSpec, existing armcompute.VirtualMachineScaleSet, result armcompute.VirtualMachineScaleSet) { + spec := newDefaultVMSSSpec() + spec.Capacity = 3 + + existingVMSS := newDefaultExistingVMSS() + + result = newDefaultExistingVMSS() + result.Properties.VirtualMachineProfile = nil + result.SKU.Capacity = ptr.To[int64](3) + + return spec, existingVMSS, result +} + +func getExistingDefaultVMSSOnlyCapacityChangeWithCustomDataChange() (s ScaleSetSpec, existing armcompute.VirtualMachineScaleSet, result armcompute.VirtualMachineScaleSet) { + spec := newDefaultVMSSSpec() + spec.Capacity = 3 + spec.DataDisks = append(spec.DataDisks, infrav1.DataDisk{ + NameSuffix: "my_disk_with_ultra_disks", + DiskSizeGB: 128, + Lun: ptr.To[int32](3), + ManagedDisk: &infrav1.ManagedDiskParameters{ + StorageAccountType: "UltraSSD_LRS", + }, + }) + spec.ShouldPatchCustomData = true + + existingVMSS := newDefaultExistingVMSS() + existingVMSS.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} + + result = newDefaultExistingVMSS() + result.SKU.Capacity = ptr.To[int64](3) + result.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} + result.Properties.VirtualMachineProfile.NetworkProfile = nil + + return spec, existingVMSS, result +} + func getUserManagedAndStorageAcccountDiagnosticsVMSS() (ScaleSetSpec, armcompute.VirtualMachineScaleSet) { storageURI := "https://fakeurl" spec := newDefaultVMSSSpec() @@ -712,6 +749,20 @@ func TestScaleSetParameters(t *testing.T) { expected: nilDiagnosticsProfileVMSS, expectedError: "", }, + { + name: "update for existing vmss with only capacity change", + spec: defaultExistingSpecOnlyCapacityChange, + existing: defaultExistingVMSSOnlyCapacityChange, + expected: defaultExistingVMSSResultOnlyCapacityChange, + expectedError: "", + }, + { + name: "update for existing vmss with only capacity change but should patch custom data", + spec: defaultExistingSpecOnlyCapacityChangeWithCustomDataChange, + existing: defaultExistingVMSSOnlyCapacityChangeWithCustomDataChange, + expected: defaultExistingVMSSResultOnlyCapacityChangeWithCustomDataChange, + expectedError: "", + }, } for _, tc := range testcases { t.Run(tc.name, func(t *testing.T) { @@ -731,7 +782,9 @@ func TestScaleSetParameters(t *testing.T) { if !ok { t.Fatalf("expected type VirtualMachineScaleSet, got %T", param) } - result.Properties.VirtualMachineProfile.OSProfile.AdminPassword = nil // Override this field as it's randomly generated. We can't set anything in tc.expected to match it. + if result.Properties.VirtualMachineProfile != nil { + result.Properties.VirtualMachineProfile.OSProfile.AdminPassword = nil // Override this field as it's randomly generated. We can't set anything in tc.expected to match it. + } if !reflect.DeepEqual(tc.expected, result) { t.Errorf("Diff between actual result and expected result:\n%s", cmp.Diff(result, tc.expected))