Skip to content

Commit be5ff73

Browse files
committed
Improve health check logic in getClusterHealthStatus function to handle shutdown delay or missing readyz endpoint
Signed-off-by: huangyanfeng <[email protected]>
1 parent ba5ffba commit be5ff73

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

pkg/controllers/status/cluster_status_controller.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,9 +435,11 @@ func (c *ClusterStatusController) initLeaseController(cluster *clusterv1alpha1.C
435435

436436
func getClusterHealthStatus(clusterClient *util.ClusterClient) (online, healthy bool) {
437437
healthStatus, err := healthEndpointCheck(clusterClient.KubeClient, "/readyz")
438-
if err != nil && healthStatus == http.StatusNotFound {
439-
// do health check with healthz endpoint if the readyz endpoint is not installed in member cluster
440-
healthStatus, err = healthEndpointCheck(clusterClient.KubeClient, "/healthz")
438+
if err != nil && (healthStatus == http.StatusInternalServerError || healthStatus == http.StatusNotFound) {
439+
// do health check with healthz endpoint in two cases:
440+
// 1. StatusInternalServerError(500): When the server is configured with --shutdown-delay-duration, /readyz returns failure but /healthz still serves success
441+
// 2. StatusNotFound(404): When the readyz endpoint is not installed in member cluster
442+
healthStatus, err = healthEndpointCheck(clusterClient.KubeClient, "/healthz")
441443
}
442444

443445
if err != nil {

0 commit comments

Comments
 (0)