Skip to content

Commit 60a913c

Browse files
authored
Merge pull request #4485 from fabriziopandini/stop-updating-ClusterStatus-for-KubernetesVersionGTE-v1.22.0
⚠️ KCP: Stop updating and using Kubeadm's ClusterStatus with Kuberntes v1.22
2 parents 677dea6 + ecf6dde commit 60a913c

File tree

10 files changed

+165
-68
lines changed

10 files changed

+165
-68
lines changed

controlplane/kubeadm/controllers/controller.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileEtcdMembers(ctx context.Context
536536
return ctrl.Result{}, errors.Wrap(err, "cannot get remote client to workload cluster")
537537
}
538538

539-
removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx, nodeNames)
539+
kubernetesVersion := controlPlane.KCP.Spec.Version
540+
parsedVersion, err := semver.ParseTolerant(kubernetesVersion)
541+
if err != nil {
542+
return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion)
543+
}
544+
removedMembers, err := workloadCluster.ReconcileEtcdMembers(ctx, nodeNames, parsedVersion)
540545
if err != nil {
541546
return ctrl.Result{}, errors.Wrap(err, "failed attempt to reconcile etcd members")
542547
}

controlplane/kubeadm/controllers/fakes_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func (f fakeWorkloadCluster) ForwardEtcdLeadership(_ context.Context, _ *cluster
6464
return nil
6565
}
6666

67-
func (f fakeWorkloadCluster) ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error) {
67+
func (f fakeWorkloadCluster) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) {
6868
return nil, nil
6969
}
7070

@@ -100,7 +100,7 @@ func (f fakeWorkloadCluster) RemoveEtcdMemberForMachine(ctx context.Context, mac
100100
return nil
101101
}
102102

103-
func (f fakeWorkloadCluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error {
103+
func (f fakeWorkloadCluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error {
104104
return nil
105105
}
106106

controlplane/kubeadm/controllers/remediation.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"fmt"
2222

23+
"github.com/blang/semver"
2324
"github.com/pkg/errors"
2425
clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3"
2526
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3"
@@ -135,7 +136,12 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
135136
}
136137
}
137138

138-
if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToBeRemediated); err != nil {
139+
kubernetesVersion := controlPlane.KCP.Spec.Version
140+
parsedVersion, err := semver.ParseTolerant(kubernetesVersion)
141+
if err != nil {
142+
return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion)
143+
}
144+
if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToBeRemediated, parsedVersion); err != nil {
139145
logger.Error(err, "Failed to remove machine from kubeadm ConfigMap")
140146
return ctrl.Result{}, err
141147
}
@@ -164,7 +170,7 @@ func (r *KubeadmControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
164170
// - etc.
165171
//
166172
// NOTE: this func assumes the list of members in sync with the list of machines/nodes, it is required to call reconcileEtcdMembers
167-
// ans well as reconcileControlPlaneConditions before this.
173+
// and well as reconcileControlPlaneConditions before this.
168174
func (r *KubeadmControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Context, controlPlane *internal.ControlPlane, machineToBeRemediated *clusterv1.Machine) (bool, error) {
169175
logger := r.Log.WithValues("namespace", controlPlane.KCP.Namespace, "kubeadmControlPlane", controlPlane.KCP.Name, "cluster", controlPlane.Cluster.Name)
170176

controlplane/kubeadm/controllers/remediation_test.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
221221
controlPlane := &internal.ControlPlane{
222222
KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{
223223
Replicas: utilpointer.Int32Ptr(2),
224+
Version: "v1.19.1",
224225
}},
225226
Cluster: &clusterv1.Cluster{},
226227
Machines: internal.NewFilterableMachineCollection(m1, m2),
@@ -270,6 +271,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
270271
controlPlane := &internal.ControlPlane{
271272
KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{
272273
Replicas: utilpointer.Int32Ptr(3),
274+
Version: "v1.19.1",
273275
}},
274276
Cluster: &clusterv1.Cluster{},
275277
Machines: internal.NewFilterableMachineCollection(m1, m2, m3),
@@ -320,6 +322,7 @@ func TestReconcileUnhealthyMachines(t *testing.T) {
320322
controlPlane := &internal.ControlPlane{
321323
KCP: &controlplanev1.KubeadmControlPlane{Spec: controlplanev1.KubeadmControlPlaneSpec{
322324
Replicas: utilpointer.Int32Ptr(4),
325+
Version: "v1.19.1",
323326
}},
324327
Cluster: &clusterv1.Cluster{},
325328
Machines: internal.NewFilterableMachineCollection(m1, m2, m3, m4),

controlplane/kubeadm/controllers/scale.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"context"
2121
"strings"
2222

23+
"github.com/blang/semver"
2324
"github.com/pkg/errors"
2425
corev1 "k8s.io/api/core/v1"
2526
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -128,7 +129,12 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(
128129
}
129130
}
130131

131-
if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete); err != nil {
132+
kubernetesVersion := controlPlane.KCP.Spec.Version
133+
parsedVersion, err := semver.ParseTolerant(kubernetesVersion)
134+
if err != nil {
135+
return ctrl.Result{}, errors.Wrapf(err, "failed to parse kubernetes version %q", kubernetesVersion)
136+
}
137+
if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete, parsedVersion); err != nil {
132138
logger.Error(err, "Failed to remove machine from kubeadm ConfigMap")
133139
return ctrl.Result{}, err
134140
}

controlplane/kubeadm/controllers/scale_test.go

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,11 @@ func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing.
192192
}
193193

194194
cluster := &clusterv1.Cluster{}
195-
kcp := &controlplanev1.KubeadmControlPlane{}
195+
kcp := &controlplanev1.KubeadmControlPlane{
196+
Spec: controlplanev1.KubeadmControlPlaneSpec{
197+
Version: "v1.19.1",
198+
},
199+
}
196200
setKCPHealthy(kcp)
197201
controlPlane := &internal.ControlPlane{
198202
KCP: kcp,
@@ -230,7 +234,11 @@ func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing.
230234
}
231235

232236
cluster := &clusterv1.Cluster{}
233-
kcp := &controlplanev1.KubeadmControlPlane{}
237+
kcp := &controlplanev1.KubeadmControlPlane{
238+
Spec: controlplanev1.KubeadmControlPlaneSpec{
239+
Version: "v1.19.1",
240+
},
241+
}
234242
controlPlane := &internal.ControlPlane{
235243
KCP: kcp,
236244
Cluster: cluster,

controlplane/kubeadm/internal/workload_cluster.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,13 @@ type WorkloadCluster interface {
8282
UpdateKubeProxyImageInfo(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane) error
8383
UpdateCoreDNS(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane) error
8484
RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error
85-
RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error
86-
RemoveNodeFromKubeadmConfigMap(ctx context.Context, nodeName string) error
85+
RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error
86+
RemoveNodeFromKubeadmConfigMap(ctx context.Context, nodeName string, version semver.Version) error
8787
ForwardEtcdLeadership(ctx context.Context, machine *clusterv1.Machine, leaderCandidate *clusterv1.Machine) error
8888
AllowBootstrapTokensToGetNodes(ctx context.Context) error
8989

9090
// State recovery tasks.
91-
ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error)
91+
ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error)
9292
}
9393

9494
// Workload defines operations on workload clusters.
@@ -296,17 +296,28 @@ func (w *Workload) UpdateSchedulerInKubeadmConfigMap(ctx context.Context, schedu
296296
}
297297

298298
// RemoveMachineFromKubeadmConfigMap removes the entry for the machine from the kubeadm configmap.
299-
func (w *Workload) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error {
299+
func (w *Workload) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine, version semver.Version) error {
300300
if machine == nil || machine.Status.NodeRef == nil {
301301
// Nothing to do, no node for Machine
302302
return nil
303303
}
304304

305-
return w.RemoveNodeFromKubeadmConfigMap(ctx, machine.Status.NodeRef.Name)
305+
return w.RemoveNodeFromKubeadmConfigMap(ctx, machine.Status.NodeRef.Name, version)
306306
}
307307

308+
var (
309+
// Starting from v1.22.0 kubeadm dropped usage of the ClusterStatus entry from the kubeadm-config ConfigMap
310+
// so it isn't necessary anymore to remove API endpoints for control plane nodes after deletion.
311+
// NOTE: This assume kubeadm version equals to Kubernetes version.
312+
minKubernetesVersionWithoutClusterStatus = semver.MustParse("1.22.0")
313+
)
314+
308315
// RemoveNodeFromKubeadmConfigMap removes the entry for the node from the kubeadm configmap.
309-
func (w *Workload) RemoveNodeFromKubeadmConfigMap(ctx context.Context, name string) error {
316+
func (w *Workload) RemoveNodeFromKubeadmConfigMap(ctx context.Context, name string, version semver.Version) error {
317+
if version.GTE(minKubernetesVersionWithoutClusterStatus) {
318+
return nil
319+
}
320+
310321
return util.Retry(func() (bool, error) {
311322
configMapKey := ctrlclient.ObjectKey{Name: kubeadmConfigKey, Namespace: metav1.NamespaceSystem}
312323
kubeadmConfigMap, err := w.getConfigMap(ctx, configMapKey)

controlplane/kubeadm/internal/workload_cluster_etcd.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package internal
1919
import (
2020
"context"
2121

22+
"github.com/blang/semver"
2223
"github.com/pkg/errors"
2324
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2425
kerrors "k8s.io/apimachinery/pkg/util/errors"
@@ -35,7 +36,7 @@ type etcdClientFor interface {
3536

3637
// ReconcileEtcdMembers iterates over all etcd members and finds members that do not have corresponding nodes.
3738
// If there are any such members, it deletes them from etcd and removes their nodes from the kubeadm configmap so that kubeadm does not run etcd health checks on them.
38-
func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string) ([]string, error) {
39+
func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string, version semver.Version) ([]string, error) {
3940
removedMembers := []string{}
4041
errs := []error{}
4142
for _, nodeName := range nodeNames {
@@ -73,7 +74,7 @@ func (w *Workload) ReconcileEtcdMembers(ctx context.Context, nodeNames []string)
7374
errs = append(errs, err)
7475
}
7576

76-
if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name); err != nil {
77+
if err := w.RemoveNodeFromKubeadmConfigMap(ctx, member.Name, version); err != nil {
7778
errs = append(errs, err)
7879
}
7980
}

controlplane/kubeadm/internal/workload_cluster_etcd_test.go

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"errors"
2222
"testing"
2323

24+
"github.com/blang/semver"
2425
. "github.com/onsi/gomega"
2526

2627
"go.etcd.io/etcd/clientv3"
@@ -466,21 +467,24 @@ func TestReconcileEtcdMembers(t *testing.T) {
466467
Namespace: metav1.NamespaceSystem,
467468
},
468469
Data: map[string]string{
469-
clusterStatusKey: `apiEndpoints:
470-
ip-10-0-0-1.ec2.internal:
471-
advertiseAddress: 10.0.0.1
472-
bindPort: 6443
473-
ip-10-0-0-2.ec2.internal:
474-
advertiseAddress: 10.0.0.2
475-
bindPort: 6443
476-
someFieldThatIsAddedInTheFuture: bar
477-
ip-10-0-0-3.ec2.internal:
478-
advertiseAddress: 10.0.0.3
479-
bindPort: 6443
480-
apiVersion: kubeadm.k8s.io/vNbetaM
481-
kind: ClusterStatus`,
470+
clusterStatusKey: "apiEndpoints:\n" +
471+
" ip-10-0-0-1.ec2.internal:\n" +
472+
" advertiseAddress: 10.0.0.1\n" +
473+
" bindPort: 6443\n" +
474+
" ip-10-0-0-2.ec2.internal:\n" +
475+
" advertiseAddress: 10.0.0.2\n" +
476+
" bindPort: 6443\n" +
477+
" someFieldThatIsAddedInTheFuture: bar\n" +
478+
" ip-10-0-0-3.ec2.internal:\n" +
479+
" advertiseAddress: 10.0.0.3\n" +
480+
" bindPort: 6443\n" +
481+
"apiVersion: kubeadm.k8s.io/vNbetaM\n" +
482+
"kind: ClusterStatus\n",
482483
},
483484
}
485+
kubeadmConfigWithoutClusterStatus := kubeadmConfig.DeepCopy()
486+
delete(kubeadmConfigWithoutClusterStatus.Data, clusterStatusKey)
487+
484488
node1 := &corev1.Node{
485489
ObjectMeta: metav1.ObjectMeta{
486490
Name: "ip-10-0-0-1.ec2.internal",
@@ -508,25 +512,62 @@ kind: ClusterStatus`,
508512

509513
tests := []struct {
510514
name string
515+
kubernetesVersion semver.Version
511516
objs []runtime.Object
512517
nodes []string
513518
etcdClientGenerator etcdClientFor
514519
expectErr bool
515-
assert func(*WithT)
520+
assert func(*WithT, client.Client)
516521
}{
517522
{
518523
// the node to be removed is ip-10-0-0-3.ec2.internal since the
519524
// other two have nodes
520-
name: "successfully removes the etcd member without a node and removes the node from kubeadm config",
521-
objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfig.DeepCopy()},
522-
nodes: []string{node1.Name, node2.Name},
525+
name: "successfully removes the etcd member without a node and removes the node from kubeadm config for Kubernetes version < 1.22.0",
526+
kubernetesVersion: kubernetesVersionWithClusterStatus, // Kubernetes version < 1.22.0 has ClusterStatus
527+
objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfig.DeepCopy()},
528+
nodes: []string{node1.Name, node2.Name},
529+
etcdClientGenerator: &fakeEtcdClientGenerator{
530+
forNodesClient: &etcd.Client{
531+
EtcdClient: fakeEtcdClient,
532+
},
533+
},
534+
expectErr: false,
535+
assert: func(g *WithT, c client.Client) {
536+
g.Expect(fakeEtcdClient.RemovedMember).To(Equal(uint64(3)))
537+
538+
var actualConfig corev1.ConfigMap
539+
g.Expect(c.Get(
540+
ctx,
541+
ctrlclient.ObjectKey{Name: kubeadmConfigKey, Namespace: metav1.NamespaceSystem},
542+
&actualConfig,
543+
)).To(Succeed())
544+
g.Expect(actualConfig.Data[clusterStatusKey]).To(Equal("apiEndpoints:\n" +
545+
" ip-10-0-0-1.ec2.internal:\n" +
546+
" advertiseAddress: 10.0.0.1\n" +
547+
" bindPort: 6443\n" +
548+
" ip-10-0-0-2.ec2.internal:\n" +
549+
" advertiseAddress: 10.0.0.2\n" +
550+
" bindPort: 6443\n" +
551+
" someFieldThatIsAddedInTheFuture: bar\n" +
552+
"apiVersion: kubeadm.k8s.io/vNbetaM\n" +
553+
"kind: ClusterStatus\n"))
554+
555+
},
556+
},
557+
{
558+
// the node to be removed is ip-10-0-0-3.ec2.internal since the
559+
// other two have nodes
560+
name: "successfully removes the etcd member without a node for Kubernetes version >= 1.22.0",
561+
kubernetesVersion: minKubernetesVersionWithoutClusterStatus, // Kubernetes version >= 1.22.0 should not manage ClusterStatus
562+
objs: []runtime.Object{node1.DeepCopy(), node2.DeepCopy(), kubeadmConfigWithoutClusterStatus.DeepCopy()},
563+
nodes: []string{node1.Name, node2.Name},
523564
etcdClientGenerator: &fakeEtcdClientGenerator{
524565
forNodesClient: &etcd.Client{
525566
EtcdClient: fakeEtcdClient,
526567
},
527568
},
528569
expectErr: false,
529-
assert: func(g *WithT) {
570+
assert: func(g *WithT, c client.Client) {
530571
g.Expect(fakeEtcdClient.RemovedMember).To(Equal(uint64(3)))
531572
},
532573
},
@@ -559,15 +600,15 @@ kind: ClusterStatus`,
559600
etcdClientGenerator: tt.etcdClientGenerator,
560601
}
561602
ctx := context.TODO()
562-
_, err := w.ReconcileEtcdMembers(ctx, tt.nodes)
603+
_, err := w.ReconcileEtcdMembers(ctx, tt.nodes, tt.kubernetesVersion)
563604
if tt.expectErr {
564605
g.Expect(err).To(HaveOccurred())
565606
return
566607
}
567608
g.Expect(err).ToNot(HaveOccurred())
568609

569610
if tt.assert != nil {
570-
tt.assert(g)
611+
tt.assert(g, testEnv.Client)
571612
}
572613
})
573614
}

0 commit comments

Comments
 (0)