Skip to content

Commit bf55f3e

Browse files
committed
multiproject/manager: add tests; harden startup and finalizer handling
- Add manager_test.go covering: - Start adds NEG cleanup finalizer and is idempotent - Start failure and GCE client creation failure roll back finalizer - Stop closes controller channel and removes finalizer - Concurrent starts for same ProviderConfig are single-shot - Improve manager.go: - Introduce test seam via package var startNEGController - Ensure finalizer is added before start; roll back on any startup error - Delete finalizer using latest ProviderConfig from API to avoid stale updates - Wrap errors with %w and add GoDoc comments - Rename exported ControllerSet to unexported controllerSet - No expected behavior change in the happy path; robustness and testability improved.
1 parent 19268b8 commit bf55f3e

File tree

2 files changed

+340
-13
lines changed

2 files changed

+340
-13
lines changed

pkg/multiproject/manager/manager.go

Lines changed: 60 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package manager
22

33
import (
4+
"context"
45
"fmt"
56
"sync"
67

78
networkclient "github.com/GoogleCloudPlatform/gke-networking-api/client/network/clientset/versioned"
89
nodetopologyclient "github.com/GoogleCloudPlatform/gke-networking-api/client/nodetopology/clientset/versioned"
10+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
911
"k8s.io/apimachinery/pkg/types"
1012
"k8s.io/client-go/kubernetes"
1113
providerconfig "k8s.io/ingress-gce/pkg/apis/providerconfig/v1"
@@ -21,9 +23,17 @@ import (
2123
"k8s.io/klog/v2"
2224
)
2325

26+
// startNEGController is a package-level variable to allow tests to stub the
27+
// actual NEG controller startup routine.
28+
var startNEGController = neg.StartNEGController
29+
30+
// ProviderConfigControllersManager coordinates lifecycle of controllers scoped to
31+
// a single ProviderConfig. It ensures per-ProviderConfig controller startup is
32+
// idempotent, adds/removes the NEG cleanup finalizer, and wires a stop channel
33+
// for clean shutdown.
2434
type ProviderConfigControllersManager struct {
2535
mu sync.Mutex
26-
controllers map[string]*ControllerSet
36+
controllers map[string]*controllerSet
2737

2838
logger klog.Logger
2939
providerConfigClient providerconfigclient.Interface
@@ -43,10 +53,14 @@ type ProviderConfigControllersManager struct {
4353
syncerMetrics *syncMetrics.SyncerMetrics
4454
}
4555

46-
type ControllerSet struct {
56+
// controllerSet holds controller-specific resources for a ProviderConfig.
57+
// Unexported because it is an internal implementation detail.
58+
type controllerSet struct {
4759
stopCh chan<- struct{}
4860
}
4961

62+
// NewProviderConfigControllerManager constructs a new ProviderConfigControllersManager.
63+
// It does not start any controllers until StartControllersForProviderConfig is invoked.
5064
func NewProviderConfigControllerManager(
5165
kubeClient kubernetes.Interface,
5266
informers *multiprojectinformers.InformerSet,
@@ -65,7 +79,7 @@ func NewProviderConfigControllerManager(
6579
syncerMetrics *syncMetrics.SyncerMetrics,
6680
) *ProviderConfigControllersManager {
6781
return &ProviderConfigControllersManager{
68-
controllers: make(map[string]*ControllerSet),
82+
controllers: make(map[string]*controllerSet),
6983
logger: logger,
7084
providerConfigClient: providerConfigClient,
7185
informers: informers,
@@ -88,6 +102,10 @@ func providerConfigKey(pc *providerconfig.ProviderConfig) string {
88102
return pc.Name
89103
}
90104

105+
// StartControllersForProviderConfig ensures finalizers are present and starts
106+
// the controllers associated with the given ProviderConfig. The call is
107+
// idempotent per ProviderConfig: concurrent or repeated calls for the same
108+
// ProviderConfig will only start controllers once.
91109
func (pccm *ProviderConfigControllersManager) StartControllersForProviderConfig(pc *providerconfig.ProviderConfig) error {
92110
pccm.mu.Lock()
93111
defer pccm.mu.Unlock()
@@ -102,17 +120,23 @@ func (pccm *ProviderConfigControllersManager) StartControllersForProviderConfig(
102120
return nil
103121
}
104122

123+
// Add the cleanup finalizer up front to avoid a window where controllers
124+
// may create external resources without a finalizer present. If deletion
125+
// happens in that window, cleanup could be skipped. We roll this back on
126+
// any subsequent startup error.
105127
err := finalizer.EnsureProviderConfigNEGCleanupFinalizer(pc, pccm.providerConfigClient, logger)
106128
if err != nil {
107-
return fmt.Errorf("failed to ensure NEG cleanup finalizer for project %s: %v", pcKey, err)
129+
return fmt.Errorf("failed to ensure NEG cleanup finalizer for project %s: %w", pcKey, err)
108130
}
109131

110132
cloud, err := pccm.gceCreator.GCEForProviderConfig(pc, logger)
111133
if err != nil {
112-
return fmt.Errorf("failed to create GCE client for provider config %+v: %v", pc, err)
134+
// If GCE client creation fails after finalizer was added, roll it back.
135+
pccm.rollbackFinalizerOnStartFailure(pc, logger, err)
136+
return fmt.Errorf("failed to create GCE client for provider config %+v: %w", pc, err)
113137
}
114138

115-
negControllerStopCh, err := neg.StartNEGController(
139+
negControllerStopCh, err := startNEGController(
116140
pccm.informers,
117141
pccm.kubeClient,
118142
pccm.eventRecorderClient,
@@ -130,14 +154,12 @@ func (pccm *ProviderConfigControllersManager) StartControllersForProviderConfig(
130154
pccm.syncerMetrics,
131155
)
132156
if err != nil {
133-
cleanupErr := finalizer.DeleteProviderConfigNEGCleanupFinalizer(pc, pccm.providerConfigClient, logger)
134-
if cleanupErr != nil {
135-
logger.Error(cleanupErr, "failed to clean up NEG finalizer after controller start failure", "originalError", err)
136-
}
137-
return fmt.Errorf("failed to start NEG controller for project %s: %v", pcKey, err)
157+
// If startup fails, make a best-effort to roll back the finalizer.
158+
pccm.rollbackFinalizerOnStartFailure(pc, logger, err)
159+
return fmt.Errorf("failed to start NEG controller for project %s: %w", pcKey, err)
138160
}
139161

140-
pccm.controllers[pcKey] = &ControllerSet{
162+
pccm.controllers[pcKey] = &controllerSet{
141163
stopCh: negControllerStopCh,
142164
}
143165

@@ -161,9 +183,34 @@ func (pccm *ProviderConfigControllersManager) StopControllersForProviderConfig(p
161183
close(pccm.controllers[csKey].stopCh)
162184

163185
delete(pccm.controllers, csKey)
164-
err := finalizer.DeleteProviderConfigNEGCleanupFinalizer(pc, pccm.providerConfigClient, logger)
186+
187+
// Ensure we remove the finalizer from the latest object state.
188+
pcLatest := pccm.latestPCWithCleanupFinalizer(pc)
189+
err := finalizer.DeleteProviderConfigNEGCleanupFinalizer(pcLatest, pccm.providerConfigClient, logger)
165190
if err != nil {
166191
logger.Error(err, "failed to delete NEG cleanup finalizer for project")
167192
}
168193
logger.Info("Stopped controllers for provider config")
169194
}
195+
196+
// rollbackFinalizerOnStartFailure removes the NEG cleanup finalizer after a
197+
// start failure so that ProviderConfig deletion is not blocked.
198+
func (pccm *ProviderConfigControllersManager) rollbackFinalizerOnStartFailure(pc *providerconfig.ProviderConfig, logger klog.Logger, cause error) {
199+
pcLatest := pccm.latestPCWithCleanupFinalizer(pc)
200+
if err := finalizer.DeleteProviderConfigNEGCleanupFinalizer(pcLatest, pccm.providerConfigClient, logger); err != nil {
201+
logger.Error(err, "failed to clean up NEG finalizer after start failure", "originalError", cause)
202+
}
203+
}
204+
205+
// latestPCWithCleanupFinalizer returns the latest ProviderConfig from the API server.
206+
// If the Get fails, it returns a local copy of the provided pc with the cleanup
207+
// finalizer appended to ensure a subsequent delete attempt is not a no-op.
208+
func (pccm *ProviderConfigControllersManager) latestPCWithCleanupFinalizer(pc *providerconfig.ProviderConfig) *providerconfig.ProviderConfig {
209+
pcLatest, err := pccm.providerConfigClient.CloudV1().ProviderConfigs().Get(context.Background(), pc.Name, metav1.GetOptions{})
210+
if err != nil {
211+
pcCopy := pc.DeepCopy()
212+
pcCopy.Finalizers = append(pcCopy.Finalizers, finalizer.ProviderConfigNEGCleanupFinalizer)
213+
return pcCopy
214+
}
215+
return pcLatest
216+
}

0 commit comments

Comments
 (0)