Skip to content

Commit 4f8d638

Browse files
zhanggbjchrischdi
andauthored
✨ Add Namespace Scoped Zone Discovery and Watch (#3146)
* Add Namespace Scoped Zone Discovery - Introduce a feature flag to enable Namespace Scoped Zone. - Enhance zone discovery to support Namespace Scoped Zones. - Filter out zones marked for deletion during the discovery process. * vspherecluster: re-write tests for getFailureDomains * Add Watch for Zone event to update FailureDomains accordingly Signed-off-by: Gong Zhang <[email protected]> --------- Signed-off-by: Gong Zhang <[email protected]> Co-authored-by: Christian Schlotter <[email protected]>
1 parent c6eff61 commit 4f8d638

File tree

23 files changed

+848
-1141
lines changed

23 files changed

+848
-1141
lines changed

.golangci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ run:
55
- "zz_generated.*\\.go$"
66
- "_conversion\\.go$"
77
- "vendored_cluster_api\\.go$"
8+
- "^internal/apis/topology/v1alpha1"
89
allow-parallel-runners: true
910

1011
linters:

config/manager/manager.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ spec:
2121
- "--diagnostics-address=${CAPI_DIAGNOSTICS_ADDRESS:=:8443}"
2222
- "--insecure-diagnostics=${CAPI_INSECURE_DIAGNOSTICS:=false}"
2323
- --v=4
24-
- "--feature-gates=NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false}"
24+
- "--feature-gates=NodeAntiAffinity=${EXP_NODE_ANTI_AFFINITY:=false},NamespaceScopedZones=${EXP_NAMESPACE_SCOPED_ZONES:=false}"
2525
image: controller:latest
2626
imagePullPolicy: IfNotPresent
2727
name: manager

config/rbac/role.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,14 @@ rules:
376376
- get
377377
- list
378378
- watch
379+
- apiGroups:
380+
- topology.tanzu.vmware.com
381+
resources:
382+
- zones
383+
verbs:
384+
- get
385+
- list
386+
- watch
379387
- apiGroups:
380388
- vmoperator.vmware.com
381389
resources:

controllers/vmware/vspherecluster_reconciler.go

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ import (
2222
"fmt"
2323

2424
"github.com/pkg/errors"
25-
topologyv1 "github.com/vmware-tanzu/vm-operator/external/tanzu-topology/api/v1alpha1"
2625
apierrors "k8s.io/apimachinery/pkg/api/errors"
2726
"k8s.io/apimachinery/pkg/types"
2827
kerrors "k8s.io/apimachinery/pkg/util/errors"
@@ -40,6 +39,8 @@ import (
4039
"sigs.k8s.io/controller-runtime/pkg/reconcile"
4140

4241
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
42+
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
43+
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/internal/apis/topology/v1alpha1"
4344
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
4445
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services"
4546
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/util"
@@ -160,7 +161,7 @@ func (r *ClusterReconciler) reconcileDelete(clusterCtx *vmware.ClusterContext) {
160161

161162
func (r *ClusterReconciler) reconcileNormal(ctx context.Context, clusterCtx *vmware.ClusterContext) error {
162163
// Get any failure domains to report back to the CAPI core controller.
163-
failureDomains, err := r.getFailureDomains(ctx)
164+
failureDomains, err := r.getFailureDomains(ctx, clusterCtx.VSphereCluster.Namespace)
164165
if err != nil {
165166
return errors.Wrapf(
166167
err,
@@ -369,9 +370,68 @@ func (r *ClusterReconciler) VSphereMachineToCluster(ctx context.Context, o clien
369370
}}
370371
}
371372

373+
// ZoneToVSphereClusters adds reconcile requests for VSphereClusters when Zone has an event.
374+
func (r *ClusterReconciler) ZoneToVSphereClusters(ctx context.Context, o client.Object) []reconcile.Request {
375+
log := ctrl.LoggerFrom(ctx)
376+
377+
zone, ok := o.(*topologyv1.Zone)
378+
if !ok {
379+
log.Error(nil, fmt.Sprintf("Expected a Zone but got a %T", o))
380+
return nil
381+
}
382+
log = log.WithValues("Zone", klog.KObj(zone))
383+
ctx = ctrl.LoggerInto(ctx, log)
384+
385+
vsphereClusters := &vmwarev1.VSphereClusterList{}
386+
err := r.Client.List(ctx, vsphereClusters, &client.ListOptions{Namespace: zone.Namespace})
387+
if err != nil {
388+
log.V(4).Error(err, "Failed to get VSphereClusters from Zone")
389+
return nil
390+
}
391+
392+
log.V(6).Info("Triggering VSphereCluster reconcile for Zone")
393+
requests := []reconcile.Request{}
394+
for _, c := range vsphereClusters.Items {
395+
r := reconcile.Request{
396+
NamespacedName: types.NamespacedName{
397+
Name: c.Name,
398+
Namespace: c.Namespace,
399+
},
400+
}
401+
requests = append(requests, r)
402+
}
403+
404+
return requests
405+
}
406+
372407
// Returns the failure domain information discovered on the cluster
373408
// hosting this controller.
374-
func (r *ClusterReconciler) getFailureDomains(ctx context.Context) (clusterv1.FailureDomains, error) {
409+
func (r *ClusterReconciler) getFailureDomains(ctx context.Context, namespace string) (clusterv1.FailureDomains, error) {
410+
failureDomains := clusterv1.FailureDomains{}
411+
// Determine the source of failure domain based on feature gates NamespaceScopedZones.
412+
// If NamespaceScopedZones is enabled, use Zone which is Namespace scoped,otherwise use
413+
// Availability Zone which is Cluster scoped.
414+
if feature.Gates.Enabled(feature.NamespaceScopedZones) {
415+
zoneList := &topologyv1.ZoneList{}
416+
listOptions := &client.ListOptions{Namespace: namespace}
417+
if err := r.Client.List(ctx, zoneList, listOptions); err != nil {
418+
return nil, errors.Wrapf(err, "failed to list Zones in namespace %s", namespace)
419+
}
420+
421+
for _, zone := range zoneList.Items {
422+
// Skip zones which are in deletion
423+
if !zone.DeletionTimestamp.IsZero() {
424+
continue
425+
}
426+
failureDomains[zone.Name] = clusterv1.FailureDomainSpec{ControlPlane: true}
427+
}
428+
429+
if len(failureDomains) == 0 {
430+
return nil, nil
431+
}
432+
433+
return failureDomains, nil
434+
}
375435
availabilityZoneList := &topologyv1.AvailabilityZoneList{}
376436
if err := r.Client.List(ctx, availabilityZoneList); err != nil {
377437
return nil, err
@@ -380,8 +440,6 @@ func (r *ClusterReconciler) getFailureDomains(ctx context.Context) (clusterv1.Fa
380440
if len(availabilityZoneList.Items) == 0 {
381441
return nil, nil
382442
}
383-
384-
failureDomains := clusterv1.FailureDomains{}
385443
for _, az := range availabilityZoneList.Items {
386444
failureDomains[az.Name] = clusterv1.FailureDomainSpec{
387445
ControlPlane: true,

controllers/vmware/vspherecluster_reconciler_test.go

Lines changed: 151 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,30 @@ limitations under the License.
1717
package vmware
1818

1919
import (
20+
"context"
2021
"os"
2122
"path/filepath"
23+
"reflect"
2224
"testing"
2325

2426
. "github.com/onsi/ginkgo/v2"
2527
"github.com/onsi/ginkgo/v2/types"
2628
. "github.com/onsi/gomega"
27-
topologyv1 "github.com/vmware-tanzu/vm-operator/external/tanzu-topology/api/v1alpha1"
2829
corev1 "k8s.io/api/core/v1"
2930
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
31+
"k8s.io/apimachinery/pkg/runtime"
3032
apirecord "k8s.io/client-go/tools/record"
33+
utilfeature "k8s.io/component-base/featuregate/testing"
34+
"k8s.io/utils/ptr"
3135
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3236
"sigs.k8s.io/cluster-api/util/conditions"
3337
ctrl "sigs.k8s.io/controller-runtime"
38+
"sigs.k8s.io/controller-runtime/pkg/client"
39+
"sigs.k8s.io/controller-runtime/pkg/client/fake"
3440

3541
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
42+
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
43+
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/internal/apis/topology/v1alpha1"
3644
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
3745
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/context/vmware"
3846
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services/network"
@@ -128,34 +136,149 @@ var _ = Describe("Cluster Controller Tests", func() {
128136
Expect(c.Reason).NotTo(Equal(clusterv1.DeletingReason))
129137
})
130138
})
139+
})
131140

132-
Context("Test getFailureDomains", func() {
133-
It("should not find FailureDomains", func() {
134-
fds, err := reconciler.getFailureDomains(ctx)
135-
Expect(err).ToNot(HaveOccurred())
136-
Expect(fds).Should(BeEmpty())
137-
})
141+
func TestClusterReconciler_getFailureDomains(t *testing.T) {
142+
g := NewWithT(t)
143+
ctx := context.Background()
138144

139-
It("should find FailureDomains", func() {
140-
zoneNames := []string{"homer", "marge", "bart"}
141-
for _, name := range zoneNames {
142-
zone := &topologyv1.AvailabilityZone{
143-
TypeMeta: metav1.TypeMeta{
144-
APIVersion: topologyv1.GroupVersion.String(),
145-
Kind: "AvailabilityZone",
146-
},
147-
ObjectMeta: metav1.ObjectMeta{
148-
Name: name,
149-
},
150-
}
151-
152-
Expect(controllerManagerContext.Client.Create(ctx, zone)).To(Succeed())
153-
}
145+
scheme := runtime.NewScheme()
146+
g.Expect(corev1.AddToScheme(scheme)).To(Succeed())
147+
g.Expect(topologyv1.AddToScheme(scheme)).To(Succeed())
154148

155-
fds, err := reconciler.getFailureDomains(ctx)
156-
Expect(err).ToNot(HaveOccurred())
157-
Expect(fds).NotTo(BeNil())
158-
Expect(fds).Should(HaveLen(3))
149+
namespace := &corev1.Namespace{
150+
ObjectMeta: metav1.ObjectMeta{
151+
Name: "test-namespace",
152+
},
153+
}
154+
155+
tests := []struct {
156+
name string
157+
objects []client.Object
158+
want clusterv1.FailureDomains
159+
wantErr bool
160+
featureGate bool
161+
}{
162+
{
163+
name: "Cluster-Wide: should not find any FailureDomains if no exists",
164+
objects: []client.Object{},
165+
want: nil,
166+
wantErr: false,
167+
featureGate: false,
168+
},
169+
{
170+
name: "Namespaced: should not find any FailureDomains if no exists",
171+
objects: []client.Object{},
172+
want: nil,
173+
wantErr: false,
174+
featureGate: true,
175+
},
176+
{
177+
name: "Cluster-Wide: should not find any FailureDomains if only namespaced exist",
178+
objects: []client.Object{zone(namespace.Name, "ns-one", false)},
179+
want: nil,
180+
wantErr: false,
181+
featureGate: false,
182+
},
183+
{
184+
name: "Namespaced: should not find any FailureDomains if only cluster-wide exist",
185+
objects: []client.Object{availabilityZone("c-one")},
186+
want: nil,
187+
wantErr: false,
188+
featureGate: true,
189+
},
190+
{
191+
name: "Cluster-Wide: should find FailureDomains if only cluster-wide exist",
192+
objects: []client.Object{availabilityZone("c-one")},
193+
want: failureDomains("c-one"),
194+
wantErr: false,
195+
featureGate: false,
196+
},
197+
{
198+
name: "Namespaced: should find FailureDomains if only namespaced exist",
199+
objects: []client.Object{zone(namespace.Name, "ns-one", false)},
200+
want: failureDomains("ns-one"),
201+
wantErr: false,
202+
featureGate: true,
203+
},
204+
{
205+
name: "Cluster-Wide: should only find cluster-wide FailureDomains if both types exist",
206+
objects: []client.Object{availabilityZone("c-one"), zone(namespace.Name, "ns-one", false)},
207+
want: failureDomains("c-one"),
208+
wantErr: false,
209+
featureGate: false,
210+
},
211+
{
212+
name: "Namespaced: should only find namespaced FailureDomains if both types exist",
213+
objects: []client.Object{availabilityZone("c-one"), zone(namespace.Name, "ns-one", false)},
214+
want: failureDomains("ns-one"),
215+
wantErr: false,
216+
featureGate: true,
217+
},
218+
{
219+
name: "Namespaced: should only find non-deleting namespaced FailureDomains",
220+
objects: []client.Object{
221+
availabilityZone("c-one"),
222+
zone(namespace.Name, "ns-one", false),
223+
zone(namespace.Name, "ns-two", false),
224+
zone(namespace.Name, "ns-three", false),
225+
zone(namespace.Name, "ns-four", true),
226+
},
227+
want: failureDomains("ns-one", "ns-two", "ns-three"),
228+
wantErr: false,
229+
featureGate: true,
230+
},
231+
}
232+
for _, tt := range tests {
233+
t.Run(tt.name, func(t *testing.T) {
234+
r := &ClusterReconciler{
235+
Client: fake.NewClientBuilder().
236+
WithScheme(scheme).
237+
WithObjects(append([]client.Object{namespace}, tt.objects...)...).
238+
Build(),
239+
}
240+
defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.NamespaceScopedZones, tt.featureGate)()
241+
got, err := r.getFailureDomains(ctx, namespace.Name)
242+
if (err != nil) != tt.wantErr {
243+
t.Errorf("ClusterReconciler.getFailureDomains() error = %v, wantErr %v", err, tt.wantErr)
244+
return
245+
}
246+
if !reflect.DeepEqual(got, tt.want) {
247+
t.Errorf("ClusterReconciler.getFailureDomains() = %v, want %v", got, tt.want)
248+
}
159249
})
160-
})
161-
})
250+
}
251+
}
252+
253+
func availabilityZone(name string) *topologyv1.AvailabilityZone {
254+
return &topologyv1.AvailabilityZone{
255+
ObjectMeta: metav1.ObjectMeta{
256+
Name: name,
257+
},
258+
}
259+
}
260+
261+
func zone(namespace, name string, deleting bool) *topologyv1.Zone {
262+
z := &topologyv1.Zone{
263+
ObjectMeta: metav1.ObjectMeta{
264+
Namespace: namespace,
265+
Name: name,
266+
},
267+
}
268+
269+
if deleting {
270+
z.ObjectMeta.DeletionTimestamp = ptr.To(metav1.Now())
271+
z.ObjectMeta.Finalizers = []string{"deletion.test.io/protection"}
272+
}
273+
return z
274+
}
275+
276+
func failureDomains(names ...string) clusterv1.FailureDomains {
277+
fds := clusterv1.FailureDomains{}
278+
for _, name := range names {
279+
fds[name] = clusterv1.FailureDomainSpec{
280+
ControlPlane: true,
281+
}
282+
}
283+
return fds
284+
}

controllers/vspherecluster_controller.go

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
vmwarev1 "sigs.k8s.io/cluster-api-provider-vsphere/apis/vmware/v1beta1"
3939
"sigs.k8s.io/cluster-api-provider-vsphere/controllers/vmware"
4040
"sigs.k8s.io/cluster-api-provider-vsphere/feature"
41+
topologyv1 "sigs.k8s.io/cluster-api-provider-vsphere/internal/apis/topology/v1alpha1"
4142
capvcontext "sigs.k8s.io/cluster-api-provider-vsphere/pkg/context"
4243
inframanager "sigs.k8s.io/cluster-api-provider-vsphere/pkg/manager"
4344
"sigs.k8s.io/cluster-api-provider-vsphere/pkg/services"
@@ -52,6 +53,7 @@ import (
5253
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch
5354
// +kubebuilder:rbac:groups=topology.tanzu.vmware.com,resources=availabilityzones,verbs=get;list;watch
5455
// +kubebuilder:rbac:groups=topology.tanzu.vmware.com,resources=availabilityzones/status,verbs=get;list;watch
56+
// +kubebuilder:rbac:groups=topology.tanzu.vmware.com,resources=zones,verbs=get;list;watch
5557

5658
// AddClusterControllerToManager adds the cluster controller to the provided
5759
// manager.
@@ -72,15 +74,24 @@ func AddClusterControllerToManager(ctx context.Context, controllerManagerCtx *ca
7274
},
7375
NetworkProvider: networkProvider,
7476
}
75-
return ctrl.NewControllerManagedBy(mgr).
77+
builder := ctrl.NewControllerManagedBy(mgr).
7678
For(&vmwarev1.VSphereCluster{}).
7779
WithOptions(options).
7880
Watches(
7981
&vmwarev1.VSphereMachine{},
8082
handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToCluster),
8183
).
82-
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), controllerManagerCtx.WatchFilterValue)).
83-
Complete(reconciler)
84+
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), controllerManagerCtx.WatchFilterValue))
85+
86+
// Conditionally add a Watch for topologyv1.Zone when the feature gate is enabled
87+
if feature.Gates.Enabled(feature.NamespaceScopedZones) {
88+
builder = builder.Watches(
89+
&topologyv1.Zone{},
90+
handler.EnqueueRequestsFromMapFunc(reconciler.ZoneToVSphereClusters),
91+
)
92+
}
93+
94+
return builder.Complete(reconciler)
8495
}
8596

8697
reconciler := &clusterReconciler{

0 commit comments

Comments
 (0)