11package manager
22
33import (
4+ "context"
45 "fmt"
56 "sync"
67
78 networkclient "github.com/GoogleCloudPlatform/gke-networking-api/client/network/clientset/versioned"
89 nodetopologyclient "github.com/GoogleCloudPlatform/gke-networking-api/client/nodetopology/clientset/versioned"
10+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
911 "k8s.io/apimachinery/pkg/types"
1012 "k8s.io/client-go/kubernetes"
1113 providerconfig "k8s.io/ingress-gce/pkg/apis/providerconfig/v1"
@@ -20,9 +22,17 @@ import (
2022 "k8s.io/klog/v2"
2123)
2224
25+ // startNEGController is a package-level variable to allow tests to stub the
26+ // actual NEG controller startup routine.
27+ var startNEGController = neg .StartNEGController
28+
29+ // ProviderConfigControllersManager coordinates lifecycle of controllers scoped to
30+ // a single ProviderConfig. It ensures per-ProviderConfig controller startup is
31+ // idempotent, adds/removes the NEG cleanup finalizer, and wires a stop channel
32+ // for clean shutdown.
2333type ProviderConfigControllersManager struct {
2434 mu sync.Mutex
25- controllers map [string ]* ControllerSet
35+ controllers map [string ]* controllerSet
2636
2737 logger klog.Logger
2838 providerConfigClient providerconfigclient.Interface
@@ -41,10 +51,14 @@ type ProviderConfigControllersManager struct {
4151 globalStopCh <- chan struct {}
4252}
4353
44- type ControllerSet struct {
54+ // controllerSet holds controller-specific resources for a ProviderConfig.
55+ // Unexported because it is an internal implementation detail.
56+ type controllerSet struct {
4557 stopCh chan <- struct {}
4658}
4759
60+ // NewProviderConfigControllerManager constructs a new ProviderConfigControllersManager.
61+ // It does not start any controllers until StartControllersForProviderConfig is invoked.
4862func NewProviderConfigControllerManager (
4963 kubeClient kubernetes.Interface ,
5064 informers * multiprojectinformers.InformerSet ,
@@ -62,7 +76,7 @@ func NewProviderConfigControllerManager(
6276 logger klog.Logger ,
6377) * ProviderConfigControllersManager {
6478 return & ProviderConfigControllersManager {
65- controllers : make (map [string ]* ControllerSet ),
79+ controllers : make (map [string ]* controllerSet ),
6680 logger : logger ,
6781 providerConfigClient : providerConfigClient ,
6882 informers : informers ,
@@ -84,6 +98,10 @@ func providerConfigKey(pc *providerconfig.ProviderConfig) string {
8498 return pc .Name
8599}
86100
101+ // StartControllersForProviderConfig ensures finalizers are present and starts
102+ // the controllers associated with the given ProviderConfig. The call is
103+ // idempotent per ProviderConfig: concurrent or repeated calls for the same
104+ // ProviderConfig will only start controllers once.
87105func (pccm * ProviderConfigControllersManager ) StartControllersForProviderConfig (pc * providerconfig.ProviderConfig ) error {
88106 pccm .mu .Lock ()
89107 defer pccm .mu .Unlock ()
@@ -98,17 +116,23 @@ func (pccm *ProviderConfigControllersManager) StartControllersForProviderConfig(
98116 return nil
99117 }
100118
119+ // Add the cleanup finalizer up front to avoid a window where controllers
120+ // may create external resources without a finalizer present. If deletion
121+ // happens in that window, cleanup could be skipped. We roll this back on
122+ // any subsequent startup error.
101123 err := finalizer .EnsureProviderConfigNEGCleanupFinalizer (pc , pccm .providerConfigClient , logger )
102124 if err != nil {
103- return fmt .Errorf ("failed to ensure NEG cleanup finalizer for project %s: %v " , pcKey , err )
125+ return fmt .Errorf ("failed to ensure NEG cleanup finalizer for project %s: %w " , pcKey , err )
104126 }
105127
106128 cloud , err := pccm .gceCreator .GCEForProviderConfig (pc , logger )
107129 if err != nil {
108- return fmt .Errorf ("failed to create GCE client for provider config %+v: %v" , pc , err )
130+ // If GCE client creation fails after finalizer was added, roll it back.
131+ pccm .rollbackFinalizerOnStartFailure (pc , logger , err )
132+ return fmt .Errorf ("failed to create GCE client for provider config %+v: %w" , pc , err )
109133 }
110134
111- negControllerStopCh , err := neg . StartNEGController (
135+ negControllerStopCh , err := startNEGController (
112136 pccm .informers ,
113137 pccm .kubeClient ,
114138 pccm .eventRecorderClient ,
@@ -125,14 +149,12 @@ func (pccm *ProviderConfigControllersManager) StartControllersForProviderConfig(
125149 pc ,
126150 )
127151 if err != nil {
128- cleanupErr := finalizer .DeleteProviderConfigNEGCleanupFinalizer (pc , pccm .providerConfigClient , logger )
129- if cleanupErr != nil {
130- logger .Error (cleanupErr , "failed to clean up NEG finalizer after controller start failure" , "originalError" , err )
131- }
132- return fmt .Errorf ("failed to start NEG controller for project %s: %v" , pcKey , err )
152+ // If startup fails, make a best-effort to roll back the finalizer.
153+ pccm .rollbackFinalizerOnStartFailure (pc , logger , err )
154+ return fmt .Errorf ("failed to start NEG controller for project %s: %w" , pcKey , err )
133155 }
134156
135- pccm .controllers [pcKey ] = & ControllerSet {
157+ pccm .controllers [pcKey ] = & controllerSet {
136158 stopCh : negControllerStopCh ,
137159 }
138160
@@ -156,9 +178,34 @@ func (pccm *ProviderConfigControllersManager) StopControllersForProviderConfig(p
156178 close (pccm .controllers [csKey ].stopCh )
157179
158180 delete (pccm .controllers , csKey )
159- err := finalizer .DeleteProviderConfigNEGCleanupFinalizer (pc , pccm .providerConfigClient , logger )
181+
182+ // Ensure we remove the finalizer from the latest object state.
183+ pcLatest := pccm .latestPCWithCleanupFinalizer (pc )
184+ err := finalizer .DeleteProviderConfigNEGCleanupFinalizer (pcLatest , pccm .providerConfigClient , logger )
160185 if err != nil {
161186 logger .Error (err , "failed to delete NEG cleanup finalizer for project" )
162187 }
163188 logger .Info ("Stopped controllers for provider config" )
164189}
190+
191+ // rollbackFinalizerOnStartFailure removes the NEG cleanup finalizer after a
192+ // start failure so that ProviderConfig deletion is not blocked.
193+ func (pccm * ProviderConfigControllersManager ) rollbackFinalizerOnStartFailure (pc * providerconfig.ProviderConfig , logger klog.Logger , cause error ) {
194+ pcLatest := pccm .latestPCWithCleanupFinalizer (pc )
195+ if err := finalizer .DeleteProviderConfigNEGCleanupFinalizer (pcLatest , pccm .providerConfigClient , logger ); err != nil {
196+ logger .Error (err , "failed to clean up NEG finalizer after start failure" , "originalError" , cause )
197+ }
198+ }
199+
200+ // latestPCWithCleanupFinalizer returns the latest ProviderConfig from the API server.
201+ // If the Get fails, it returns a local copy of the provided pc with the cleanup
202+ // finalizer appended to ensure a subsequent delete attempt is not a no-op.
203+ func (pccm * ProviderConfigControllersManager ) latestPCWithCleanupFinalizer (pc * providerconfig.ProviderConfig ) * providerconfig.ProviderConfig {
204+ pcLatest , err := pccm .providerConfigClient .CloudV1 ().ProviderConfigs ().Get (context .Background (), pc .Name , metav1.GetOptions {})
205+ if err != nil {
206+ pcCopy := pc .DeepCopy ()
207+ pcCopy .Finalizers = append (pcCopy .Finalizers , finalizer .ProviderConfigNEGCleanupFinalizer )
208+ return pcCopy
209+ }
210+ return pcLatest
211+ }
0 commit comments