From 44f803d9517e7b34493c651dd6ee41970adeca7e Mon Sep 17 00:00:00 2001 From: Xavier Lange Date: Wed, 17 Jun 2026 16:26:26 -0400 Subject: [PATCH] fix: stop swallowing the client-certificate error When an EtcdCluster requests TLS, fetchAndValidateState provisions a client certificate for the operator via createClientCertificate. The error from that call was logged and then ignored, so reconciliation continued even though the credential the data path depends on was never created. Return a requeue with backoff (the existing requeueDuration, matching the sibling error paths in this function) on failure so the provisioning is retried instead of silently swallowed. Adds a unit test driving fetchAndValidateState with a fake client whose scheme omits the cert-manager types, forcing the certificate lookup to fail, and asserts the result is a non-zero RequeueAfter and that no reconcileState is returned (reconcile does not proceed past cert provisioning). Refs #370 Co-Authored-By: Claude Opus 4.8 Signed-off-by: Xavier Lange --- internal/controller/etcdcluster_controller.go | 6 ++- .../controller/etcdcluster_controller_test.go | 44 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index 867886a3..dac5d34d 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -136,7 +136,11 @@ func (r *EtcdClusterReconciler) fetchAndValidateState(ctx context.Context, req c // Ensure the operator has TLS credentials when the cluster requests TLS. if ec.Spec.TLS != nil { if err := createClientCertificate(ctx, ec, r.Client); err != nil { - logger.Error(err, "Failed to create Client Certificate.") + // The data path relies on this client certificate existing; do not + // proceed with reconciliation when it cannot be provisioned. + // Requeue with backoff so the failure is retried instead of swallowed. + logger.Error(err, "Failed to create Client Certificate. Requesting requeue") + return nil, ctrl.Result{RequeueAfter: requeueDuration}, nil } } else { // TODO: instead of logging error, set default autoConfig diff --git a/internal/controller/etcdcluster_controller_test.go b/internal/controller/etcdcluster_controller_test.go index 892e321a..7db3583a 100644 --- a/internal/controller/etcdcluster_controller_test.go +++ b/internal/controller/etcdcluster_controller_test.go @@ -410,6 +410,50 @@ func TestFetchAndValidateState(t *testing.T) { } } +// TestFetchAndValidateStateClientCertificateError verifies that when the cluster +// requests TLS but the client certificate cannot be provisioned, +// fetchAndValidateState requeues with backoff instead of swallowing the error and +// proceeding. The fake client's scheme intentionally omits the cert-manager +// types, so the certificate lookup performed by createClientCertificate fails +// with a non-NotFound error, exercising the provisioning failure path. +func TestFetchAndValidateStateClientCertificateError(t *testing.T) { + scheme := runtime.NewScheme() + _ = corev1.AddToScheme(scheme) + _ = appsv1.AddToScheme(scheme) + _ = ecv1alpha1.AddToScheme(scheme) + // Note: cert-manager's certv1 scheme is deliberately NOT registered. + + ec := &ecv1alpha1.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd", + Namespace: "default", + UID: "1", + }, + Spec: ecv1alpha1.EtcdClusterSpec{ + Size: 1, + Version: "3.5.17", + TLS: &ecv1alpha1.TLSCertificate{ + Provider: "cert-manager", + }, + }, + } + + ctx := t.Context() + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec).Build() + r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} + + req := ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}} + state, res, err := r.fetchAndValidateState(ctx, req) + + // Reconciliation must not proceed past cert provisioning: no state is + // returned, and the result is a requeue with the standard backoff. + assert.Nil(t, state, "reconcile should not proceed past client-certificate provisioning") + assert.NoError(t, err, "the failure should be surfaced via requeue, not a returned error") + assert.Equal(t, ctrl.Result{RequeueAfter: requeueDuration}, res, "expected a requeue with backoff") + assert.NotZero(t, res.RequeueAfter, "RequeueAfter must be non-zero") +} + // TestBootstrapStatefulSet outlines tests for ensuring StatefulSet and Service // creation and bootstrap logic. func TestBootstrapStatefulSet(t *testing.T) {