diff --git a/cluster-autoscaler/context/autoscaling_context.go b/cluster-autoscaler/context/autoscaling_context.go index 0010d43c2414..71cdfc8b0586 100644 --- a/cluster-autoscaler/context/autoscaling_context.go +++ b/cluster-autoscaler/context/autoscaling_context.go @@ -17,6 +17,10 @@ limitations under the License. package context import ( + "time" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" "k8s.io/autoscaler/cluster-autoscaler/clusterstate" "k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils" @@ -29,7 +33,9 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot" draprovider "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/provider" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" + "k8s.io/autoscaler/cluster-autoscaler/utils/taints" "k8s.io/client-go/informers" kube_client "k8s.io/client-go/kubernetes" kube_record "k8s.io/client-go/tools/record" @@ -61,10 +67,19 @@ type AutoscalingContext struct { RemainingPdbTracker pdb.RemainingPdbTracker // ClusterStateRegistry tracks the health of the node groups and pending scale-ups and scale-downs ClusterStateRegistry *clusterstate.ClusterStateRegistry - //ProvisionRequstScaleUpMode indicates whether ClusterAutoscaler tries to accommodate ProvisioningRequest in current scale up iteration. + // ProvisionRequstScaleUpMode indicates whether ClusterAutoscaler tries to accommodate ProvisioningRequest in current scale up iteration. ProvisioningRequestScaleUpMode bool // DraProvider is the provider for dynamic resources allocation. DraProvider *draprovider.Provider + // TemplateNodeInfoRegistry allows accessing template node infos. + TemplateNodeInfoRegistry TemplateNodeInfoRegistry +} + +// TemplateNodeInfoRegistry is the interface for getting template node infos. +type TemplateNodeInfoRegistry interface { + GetNodeInfo(id string) (*framework.NodeInfo, bool) + GetNodeInfos() map[string]*framework.NodeInfo + Recompute(autoscalingCtx *AutoscalingContext, nodes []*apiv1.Node, daemonsets []*appsv1.DaemonSet, taintConfig taints.TaintConfig, currentTime time.Time) errors.AutoscalerError } // AutoscalingKubeClients contains all Kubernetes API clients, @@ -112,19 +127,21 @@ func NewAutoscalingContext( remainingPdbTracker pdb.RemainingPdbTracker, clusterStateRegistry *clusterstate.ClusterStateRegistry, draProvider *draprovider.Provider, + templateNodeInfoRegistry TemplateNodeInfoRegistry, ) *AutoscalingContext { return &AutoscalingContext{ - AutoscalingOptions: options, - CloudProvider: cloudProvider, - AutoscalingKubeClients: *autoscalingKubeClients, - FrameworkHandle: fwHandle, - ClusterSnapshot: clusterSnapshot, - ExpanderStrategy: expanderStrategy, - ProcessorCallbacks: processorCallbacks, - DebuggingSnapshotter: debuggingSnapshotter, - RemainingPdbTracker: remainingPdbTracker, - ClusterStateRegistry: clusterStateRegistry, - DraProvider: draProvider, + AutoscalingOptions: options, + CloudProvider: cloudProvider, + AutoscalingKubeClients: *autoscalingKubeClients, + FrameworkHandle: fwHandle, + ClusterSnapshot: clusterSnapshot, + ExpanderStrategy: expanderStrategy, + ProcessorCallbacks: processorCallbacks, + DebuggingSnapshotter: debuggingSnapshotter, + RemainingPdbTracker: remainingPdbTracker, + ClusterStateRegistry: clusterStateRegistry, + DraProvider: draProvider, + TemplateNodeInfoRegistry: templateNodeInfoRegistry, } } diff --git a/cluster-autoscaler/core/static_autoscaler.go b/cluster-autoscaler/core/static_autoscaler.go index 60ac1fe9d5aa..adce2e39939e 100644 --- a/cluster-autoscaler/core/static_autoscaler.go +++ b/cluster-autoscaler/core/static_autoscaler.go @@ -43,6 +43,7 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/metrics" "k8s.io/autoscaler/cluster-autoscaler/observers/loopstart" ca_processors "k8s.io/autoscaler/cluster-autoscaler/processors" + "k8s.io/autoscaler/cluster-autoscaler/processors/nodeinfosprovider" "k8s.io/autoscaler/cluster-autoscaler/processors/status" "k8s.io/autoscaler/cluster-autoscaler/resourcequotas" "k8s.io/autoscaler/cluster-autoscaler/simulator" @@ -154,6 +155,9 @@ func NewStaticAutoscaler( } clusterStateRegistry := clusterstate.NewClusterStateRegistry(cloudProvider, clusterStateConfig, autoscalingKubeClients.LogRecorder, backoff, processors.NodeGroupConfigProcessor, processors.AsyncNodeGroupStateChecker) processorCallbacks := newStaticAutoscalerProcessorCallbacks() + + templateNodeInfoRegistry := nodeinfosprovider.NewTemplateNodeInfoRegistry(processors.TemplateNodeInfoProvider) + autoscalingCtx := ca_context.NewAutoscalingContext( opts, fwHandle, @@ -165,7 +169,8 @@ func NewStaticAutoscaler( debuggingSnapshotter, remainingPdbTracker, clusterStateRegistry, - draProvider) + draProvider, + templateNodeInfoRegistry) taintConfig := taints.NewTaintConfig(opts) processors.ScaleDownCandidatesNotifier.Register(clusterStateRegistry) @@ -358,15 +363,14 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr return typedErr.AddPrefix("failed to initialize RemainingPdbTracker: ") } - nodeInfosForGroups, autoscalerError := a.processors.TemplateNodeInfoProvider.Process(autoscalingCtx, allNodes, daemonsets, a.taintConfig, currentTime) - if autoscalerError != nil { - klog.Errorf("Failed to get node infos for groups: %v", autoscalerError) - return autoscalerError.AddPrefix("failed to build node infos for node groups: ") + if autoscalerError := a.AutoscalingContext.TemplateNodeInfoRegistry.Recompute(a.AutoscalingContext, allNodes, daemonsets, a.taintConfig, currentTime); autoscalerError != nil { + klog.Errorf("Failed to recompute template node infos: %v", autoscalerError) + return autoscalerError.AddPrefix("failed to recompute template node infos: ") } - a.DebuggingSnapshotter.SetTemplateNodes(nodeInfosForGroups) + a.DebuggingSnapshotter.SetTemplateNodes(autoscalingCtx.TemplateNodeInfoRegistry.GetNodeInfos()) - if typedErr := a.updateClusterState(allNodes, nodeInfosForGroups, currentTime); typedErr != nil { + if typedErr := a.updateClusterState(allNodes, currentTime); typedErr != nil { klog.Errorf("Failed to update cluster state: %v", typedErr) return typedErr } @@ -458,7 +462,7 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr // them and not trigger another scale-up. // The fake nodes are intentionally not added to the all nodes list, so that they are not considered as candidates for scale-down (which // doesn't make sense as they're not real). - err = a.addUpcomingNodesToClusterSnapshot(upcomingCounts, nodeInfosForGroups) + err = a.addUpcomingNodesToClusterSnapshot(upcomingCounts) if err != nil { klog.Errorf("Failed adding upcoming nodes to cluster snapshot: %v", err) return caerrors.ToAutoscalerError(caerrors.InternalError, err) @@ -563,7 +567,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr for i, nodeInfo := range allNodeInfos { nodes[i] = nodeInfo.Node() } - scaleUpStatus, typedErr = a.scaleUpOrchestrator.ScaleUp(unschedulablePodsToHelp, nodes, daemonsets, nodeInfosForGroups, false) + nodeInfos := a.AutoscalingContext.TemplateNodeInfoRegistry.GetNodeInfos() + scaleUpStatus, typedErr = a.scaleUpOrchestrator.ScaleUp(unschedulablePodsToHelp, nodes, daemonsets, nodeInfos, false) postScaleUp(scaleUpStart) } @@ -672,7 +677,8 @@ func (a *StaticAutoscaler) RunOnce(currentTime time.Time) caerrors.AutoscalerErr for i, nodeInfo := range allNodeInfos { nodes[i] = nodeInfo.Node() } - scaleUpStatus, typedErr = a.scaleUpOrchestrator.ScaleUpToNodeGroupMinSize(nodes, nodeInfosForGroups) + nodeInfos := a.AutoscalingContext.TemplateNodeInfoRegistry.GetNodeInfos() + scaleUpStatus, typedErr = a.scaleUpOrchestrator.ScaleUpToNodeGroupMinSize(nodes, nodeInfos) postScaleUp(scaleUpStart) } @@ -694,11 +700,12 @@ func (a *StaticAutoscaler) updateSoftDeletionTaints(allNodes []*apiv1.Node) { } } -func (a *StaticAutoscaler) addUpcomingNodesToClusterSnapshot(upcomingCounts map[string]int, nodeInfosForGroups map[string]*framework.NodeInfo) error { +func (a *StaticAutoscaler) addUpcomingNodesToClusterSnapshot(upcomingCounts map[string]int) error { nodeGroups := a.nodeGroupsById() upcomingNodeGroups := make(map[string]int) upcomingNodesFromUpcomingNodeGroups := 0 - upcomingNodeInfosPerNg, err := getUpcomingNodeInfos(upcomingCounts, nodeInfosForGroups) + nodeInfos := a.AutoscalingContext.TemplateNodeInfoRegistry.GetNodeInfos() + upcomingNodeInfosPerNg, err := getUpcomingNodeInfos(upcomingCounts, nodeInfos) if err != nil { return err } @@ -1043,8 +1050,9 @@ func filterNodesFromSelectedGroups(cp cloudprovider.CloudProvider, nodes ...*api return filtered } -func (a *StaticAutoscaler) updateClusterState(allNodes []*apiv1.Node, nodeInfosForGroups map[string]*framework.NodeInfo, currentTime time.Time) caerrors.AutoscalerError { - err := a.clusterStateRegistry.UpdateNodes(allNodes, nodeInfosForGroups, currentTime) +func (a *StaticAutoscaler) updateClusterState(allNodes []*apiv1.Node, currentTime time.Time) caerrors.AutoscalerError { + nodeInfos := a.AutoscalingContext.TemplateNodeInfoRegistry.GetNodeInfos() + err := a.clusterStateRegistry.UpdateNodes(allNodes, nodeInfos, currentTime) if err != nil { klog.Errorf("Failed to update node registry: %v", err) a.scaleDownPlanner.CleanUpUnneededNodes() diff --git a/cluster-autoscaler/core/static_autoscaler_test.go b/cluster-autoscaler/core/static_autoscaler_test.go index f70b3637e9b9..589b36154fed 100644 --- a/cluster-autoscaler/core/static_autoscaler_test.go +++ b/cluster-autoscaler/core/static_autoscaler_test.go @@ -2485,7 +2485,7 @@ func TestStaticAutoscalerUpcomingScaleDownCandidates(t *testing.T) { csr := clusterstate.NewClusterStateRegistry(provider, csrConfig, autoscalingCtx.LogRecorder, NewBackoff(), nodegroupconfig.NewDefaultNodeGroupConfigProcessor(config.NodeGroupAutoscalingOptions{MaxNodeProvisionTime: 15 * time.Minute, MaxNodeStartupTime: 15 * time.Minute}), processors.AsyncNodeGroupStateChecker) // Setting the Actuator is necessary for testing any scale-down logic, it shouldn't have anything to do in this test. - actuator := actuation.NewActuator(&autoscalingCtx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), options.NodeDeleteOptions{}, nil, processorstest.NewTestProcessors(&autoscalingCtx).NodeGroupConfigProcessor) + actuator := actuation.NewActuator(&autoscalingCtx, csr, deletiontracker.NewNodeDeletionTracker(0*time.Second), options.NodeDeleteOptions{}, nil, processors.NodeGroupConfigProcessor) autoscalingCtx.ScaleDownActuator = actuator // Fake planner that keeps track of the scale-down candidates passed to UpdateClusterState. diff --git a/cluster-autoscaler/processors/customresources/dra_processor.go b/cluster-autoscaler/processors/customresources/dra_processor.go index defad5443556..ada3501a3e0a 100644 --- a/cluster-autoscaler/processors/customresources/dra_processor.go +++ b/cluster-autoscaler/processors/customresources/dra_processor.go @@ -19,6 +19,7 @@ package customresources import ( apiv1 "k8s.io/api/core/v1" resourceapi "k8s.io/api/resource/v1" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" @@ -57,11 +58,21 @@ func (p *DraCustomResourcesProcessor) FilterOutNodesWithUnreadyResources(autosca continue } - nodeInfo, err := ng.TemplateNodeInfo() - if err != nil { - newReadyNodes = append(newReadyNodes, node) - klog.Warningf("Failed to get template node info for node group %s with error: %v", ng.Id(), err) - continue + var nodeInfo *framework.NodeInfo + if autoscalingCtx.TemplateNodeInfoRegistry != nil { + // Prefer the cached template from the registry. This template may contain enrichments (e.g. + // custom DRA slices) that are not present in the raw CloudProvider template. + if ni, found := autoscalingCtx.TemplateNodeInfoRegistry.GetNodeInfo(ng.Id()); found { + nodeInfo = ni + } + } + if nodeInfo == nil { + nodeInfo, err = ng.TemplateNodeInfo() + if err != nil { + newReadyNodes = append(newReadyNodes, node) + klog.Warningf("Failed to get template node info for node group %s with error: %v", ng.Id(), err) + continue + } } nodeResourcesSlices, _ := draSnapshot.NodeResourceSlices(node.Name) diff --git a/cluster-autoscaler/processors/customresources/dra_processor_test.go b/cluster-autoscaler/processors/customresources/dra_processor_test.go index 59639791801b..b15e6e5cf5c2 100644 --- a/cluster-autoscaler/processors/customresources/dra_processor_test.go +++ b/cluster-autoscaler/processors/customresources/dra_processor_test.go @@ -21,11 +21,14 @@ import ( "testing" "time" + appsv1 "k8s.io/api/apps/v1" resourceapi "k8s.io/api/resource/v1" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/store" "k8s.io/autoscaler/cluster-autoscaler/simulator/clustersnapshot/testsnapshot" drasnapshot "k8s.io/autoscaler/cluster-autoscaler/simulator/dynamicresources/snapshot" "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/autoscaler/cluster-autoscaler/utils/taints" "github.com/stretchr/testify/assert" apiv1 "k8s.io/api/core/v1" @@ -35,12 +38,36 @@ import ( utils "k8s.io/autoscaler/cluster-autoscaler/utils/test" ) +type mockTemplateNodeInfoRegistry struct { + nodeInfos map[string]*framework.NodeInfo +} + +func newMockTemplateNodeInfoRegistry(nodeInfos map[string]*framework.NodeInfo) *mockTemplateNodeInfoRegistry { + return &mockTemplateNodeInfoRegistry{ + nodeInfos: nodeInfos, + } +} + +func (m *mockTemplateNodeInfoRegistry) GetNodeInfo(id string) (*framework.NodeInfo, bool) { + nodeInfo, found := m.nodeInfos[id] + return nodeInfo, found +} + +func (m *mockTemplateNodeInfoRegistry) GetNodeInfos() map[string]*framework.NodeInfo { + return m.nodeInfos +} + +func (m *mockTemplateNodeInfoRegistry) Recompute(_ *ca_context.AutoscalingContext, _ []*apiv1.Node, _ []*appsv1.DaemonSet, _ taints.TaintConfig, _ time.Time) errors.AutoscalerError { + return nil +} + func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) { testCases := map[string]struct { nodeGroupsAllNodes map[string][]*apiv1.Node nodeGroupsTemplatesSlices map[string][]*resourceapi.ResourceSlice nodesSlices map[string][]*resourceapi.ResourceSlice expectedNodesReadiness map[string]bool + registryNodeInfos map[string]*framework.NodeInfo }{ "1 DRA node group all totally ready": { nodeGroupsAllNodes: map[string][]*apiv1.Node{ @@ -306,6 +333,29 @@ func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) { "node_7": true, }, }, + "Custom DRA driver retrieved via cached template node info": { + nodeGroupsAllNodes: map[string][]*apiv1.Node{ + "ng1": { + buildTestNode("node_1", true), + buildTestNode("node_2", true), + }, + }, + nodeGroupsTemplatesSlices: map[string][]*resourceapi.ResourceSlice{}, + registryNodeInfos: map[string]*framework.NodeInfo{ + "ng1": framework.NewNodeInfo( + buildTestNode("ng1_template", true), + createNodeResourceSlices("ng1_template", []int{1}), + ), + }, + nodesSlices: map[string][]*resourceapi.ResourceSlice{ + "node_1": createNodeResourceSlices("node_1", []int{1}), + "node_2": {}, + }, + expectedNodesReadiness: map[string]bool{ + "node_1": true, + "node_2": false, + }, + }, } for tcName, tc := range testCases { @@ -336,7 +386,11 @@ func TestFilterOutNodesWithUnreadyDRAResources(t *testing.T) { clusterSnapshotStore.SetClusterState([]*apiv1.Node{}, []*apiv1.Pod{}, draSnapshot) clusterSnapshot, _, _ := testsnapshot.NewCustomTestSnapshotAndHandle(clusterSnapshotStore) - autoscalingCtx := &ca_context.AutoscalingContext{CloudProvider: provider, ClusterSnapshot: clusterSnapshot} + autoscalingCtx := &ca_context.AutoscalingContext{ + CloudProvider: provider, + ClusterSnapshot: clusterSnapshot, + TemplateNodeInfoRegistry: newMockTemplateNodeInfoRegistry(tc.registryNodeInfos), + } processor := DraCustomResourcesProcessor{} newAllNodes, newReadyNodes := processor.FilterOutNodesWithUnreadyResources(autoscalingCtx, initialAllNodes, initialReadyNodes, draSnapshot) diff --git a/cluster-autoscaler/processors/nodeinfosprovider/template_node_info_registry.go b/cluster-autoscaler/processors/nodeinfosprovider/template_node_info_registry.go new file mode 100644 index 000000000000..3e85ea8344fa --- /dev/null +++ b/cluster-autoscaler/processors/nodeinfosprovider/template_node_info_registry.go @@ -0,0 +1,82 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeinfosprovider + +import ( + "maps" + "sync" + "time" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + + ca_context "k8s.io/autoscaler/cluster-autoscaler/context" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/autoscaler/cluster-autoscaler/utils/taints" +) + +// TemplateNodeInfoRegistry is a component that stores and exposes template NodeInfos. +// It is updated once per autoscaling loop iteration via Recompute() and provides a consistent view of node templates to all processors. +type TemplateNodeInfoRegistry struct { + processor TemplateNodeInfoProvider + nodeInfos map[string]*framework.NodeInfo + lock sync.RWMutex +} + +// NewTemplateNodeInfoRegistry creates a new TemplateNodeInfoRegistry. +func NewTemplateNodeInfoRegistry(processor TemplateNodeInfoProvider) *TemplateNodeInfoRegistry { + return &TemplateNodeInfoRegistry{ + processor: processor, + nodeInfos: make(map[string]*framework.NodeInfo), + } +} + +// Recompute calls the embedded processor to update the cached node infos. +func (r *TemplateNodeInfoRegistry) Recompute(autoscalingCtx *ca_context.AutoscalingContext, nodes []*apiv1.Node, daemonsets []*appsv1.DaemonSet, taintConfig taints.TaintConfig, currentTime time.Time) errors.AutoscalerError { + nodeInfos, err := r.processor.Process(autoscalingCtx, nodes, daemonsets, taintConfig, currentTime) + if err != nil { + return err + } + + r.lock.Lock() + defer r.lock.Unlock() + r.nodeInfos = nodeInfos + return nil +} + +// CleanUp cleans up the embedded processor. +func (r *TemplateNodeInfoRegistry) CleanUp() { + r.processor.CleanUp() +} + +// GetNodeInfo returns the template NodeInfo for the given node group id. +func (r *TemplateNodeInfoRegistry) GetNodeInfo(id string) (*framework.NodeInfo, bool) { + r.lock.RLock() + defer r.lock.RUnlock() + nodeInfo, found := r.nodeInfos[id] + return nodeInfo, found +} + +// GetNodeInfos returns a copy of the full map of template NodeInfos. +func (r *TemplateNodeInfoRegistry) GetNodeInfos() map[string]*framework.NodeInfo { + r.lock.RLock() + defer r.lock.RUnlock() + result := make(map[string]*framework.NodeInfo, len(r.nodeInfos)) + maps.Copy(result, r.nodeInfos) + return result +} diff --git a/cluster-autoscaler/processors/nodeinfosprovider/template_node_info_registry_test.go b/cluster-autoscaler/processors/nodeinfosprovider/template_node_info_registry_test.go new file mode 100644 index 000000000000..ccb5c7d0bde7 --- /dev/null +++ b/cluster-autoscaler/processors/nodeinfosprovider/template_node_info_registry_test.go @@ -0,0 +1,118 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nodeinfosprovider + +import ( + "sync" + "testing" + "time" + + appsv1 "k8s.io/api/apps/v1" + apiv1 "k8s.io/api/core/v1" + + "github.com/stretchr/testify/assert" + ca_context "k8s.io/autoscaler/cluster-autoscaler/context" + "k8s.io/autoscaler/cluster-autoscaler/simulator/framework" + "k8s.io/autoscaler/cluster-autoscaler/utils/errors" + "k8s.io/autoscaler/cluster-autoscaler/utils/taints" +) + +type mockTemplateNodeInfoProvider struct { + nodeInfos map[string]*framework.NodeInfo +} + +func (p *mockTemplateNodeInfoProvider) Process(_ *ca_context.AutoscalingContext, _ []*apiv1.Node, _ []*appsv1.DaemonSet, _ taints.TaintConfig, _ time.Time) (map[string]*framework.NodeInfo, errors.AutoscalerError) { + return p.nodeInfos, nil +} + +func (p *mockTemplateNodeInfoProvider) CleanUp() {} + +func TestTemplateNodeInfoRegistry(t *testing.T) { + mockProvider := &mockTemplateNodeInfoProvider{ + nodeInfos: map[string]*framework.NodeInfo{ + "ng1": {}, + }, + } + registry := NewTemplateNodeInfoRegistry(mockProvider) + + // Test Recompute + err := registry.Recompute(nil, nil, nil, taints.TaintConfig{}, time.Now()) + assert.NoError(t, err) + + // Test GetNodeInfo + info, found := registry.GetNodeInfo("ng1") + assert.True(t, found) + assert.NotNil(t, info) + + info, found = registry.GetNodeInfo("ng2") + assert.False(t, found) + assert.Nil(t, info) + + // Test GetNodeInfos + infos := registry.GetNodeInfos() + assert.Len(t, infos, 1) + assert.Contains(t, infos, "ng1") + + // Test Update + mockProvider.nodeInfos = map[string]*framework.NodeInfo{ + "ng1": {}, + "ng2": {}, + } + err = registry.Recompute(nil, nil, nil, taints.TaintConfig{}, time.Now()) + assert.NoError(t, err) + + info, found = registry.GetNodeInfo("ng2") + assert.True(t, found) + assert.NotNil(t, info) +} + +func TestTemplateNodeInfoRegistry_Concurrent(t *testing.T) { + mockProvider := &mockTemplateNodeInfoProvider{ + nodeInfos: map[string]*framework.NodeInfo{ + "ng1": {}, + }, + } + registry := NewTemplateNodeInfoRegistry(mockProvider) + + var wg sync.WaitGroup + wg.Add(2) + + go func() { + defer wg.Done() + for i := 0; i < 100; i++ { + err := registry.Recompute(nil, nil, nil, taints.TaintConfig{}, time.Now()) + assert.NoError(t, err) + } + }() + + go func() { + defer wg.Done() + for i := 0; i < 100; i++ { + registry.GetNodeInfo("ng1") + registry.GetNodeInfos() + } + }() + + wg.Wait() + + // Basic check after concurrent access + info, found := registry.GetNodeInfo("ng1") + assert.True(t, found) + assert.NotNil(t, info) + infos := registry.GetNodeInfos() + assert.Len(t, infos, 1) +} diff --git a/cluster-autoscaler/processors/test/common.go b/cluster-autoscaler/processors/test/common.go index 3201c2026dcc..8d92559a4a3a 100644 --- a/cluster-autoscaler/processors/test/common.go +++ b/cluster-autoscaler/processors/test/common.go @@ -38,7 +38,13 @@ import ( ) // NewTestProcessors returns a set of simple processors for use in tests. +// Note: This function injects a default TemplateNodeInfoRegistry into the provided AutoscalingContext. +// This is a necessary workaround for synthetic tests that manually construct the context without using NewStaticAutoscaler, ensuring they have access to the registry. func NewTestProcessors(autoscalingCtx *ca_context.AutoscalingContext) *processors.AutoscalingProcessors { + templateNodeInfoProvider := nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false) + templateNodeInfoRegistry := nodeinfosprovider.NewTemplateNodeInfoRegistry(templateNodeInfoProvider) + autoscalingCtx.TemplateNodeInfoRegistry = templateNodeInfoRegistry + return &processors.AutoscalingProcessors{ PodListProcessor: podlistprocessor.NewDefaultPodListProcessor(scheduling.ScheduleAnywhere), NodeGroupListProcessor: &nodegroups.NoOpNodeGroupListProcessor{}, @@ -50,7 +56,7 @@ func NewTestProcessors(autoscalingCtx *ca_context.AutoscalingContext) *processor ScaleDownStatusProcessor: &status.NoOpScaleDownStatusProcessor{}, AutoscalingStatusProcessor: &status.NoOpAutoscalingStatusProcessor{}, NodeGroupManager: nodegroups.NewDefaultNodeGroupManager(), - TemplateNodeInfoProvider: nodeinfosprovider.NewDefaultTemplateNodeInfoProvider(nil, false), + TemplateNodeInfoProvider: templateNodeInfoProvider, NodeGroupConfigProcessor: nodegroupconfig.NewDefaultNodeGroupConfigProcessor(autoscalingCtx.NodeGroupDefaults), CustomResourcesProcessor: customresources.NewDefaultCustomResourcesProcessor(true), ActionableClusterProcessor: actionablecluster.NewDefaultActionableClusterProcessor(),