Skip to content

Commit fb0f5cc

Browse files
committed
Improve health check logic in getClusterHealthStatus function to handle shutdown delay or missing readyz endpoint
Signed-off-by: huangyanfeng <[email protected]>
1 parent ba5ffba commit fb0f5cc

File tree

2 files changed

+36
-2
lines changed

2 files changed

+36
-2
lines changed

pkg/controllers/status/cluster_status_controller.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,11 @@ func (c *ClusterStatusController) initLeaseController(cluster *clusterv1alpha1.C
435435

436436
func getClusterHealthStatus(clusterClient *util.ClusterClient) (online, healthy bool) {
437437
healthStatus, err := healthEndpointCheck(clusterClient.KubeClient, "/readyz")
438-
if err != nil && healthStatus == http.StatusNotFound {
439-
// do health check with healthz endpoint if the readyz endpoint is not installed in member cluster
438+
if err != nil && (healthStatus == http.StatusInternalServerError || healthStatus == http.StatusNotFound) {
439+
// do health check with healthz endpoint in two cases:
440+
// 1. StatusInternalServerError(500): When the server is configured with --shutdown-delay-duration,
441+
// /readyz returns failure but /healthz still serves success
442+
// 2. StatusNotFound(404): When the readyz endpoint is not installed in member cluster
440443
healthStatus, err = healthEndpointCheck(clusterClient.KubeClient, "/healthz")
441444
}
442445

pkg/controllers/status/cluster_status_controller_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,28 @@ func mockServer(statusCode int, existError bool) *httptest.Server {
11051105
return server
11061106
}
11071107

1108+
func mockNotReadyServer() *httptest.Server {
1109+
respBody := "test"
1110+
resp := &http.Response{
1111+
StatusCode: http.StatusOK,
1112+
Body: io.NopCloser(bytes.NewBufferString(respBody)),
1113+
}
1114+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, request *http.Request) {
1115+
if request.URL.Path == "/readyz" {
1116+
statusCode := http.StatusInternalServerError
1117+
errorMessage := "An error occurred"
1118+
http.Error(w, errorMessage, statusCode)
1119+
} else {
1120+
w.WriteHeader(resp.StatusCode)
1121+
_, err := io.Copy(w, resp.Body)
1122+
if err != nil {
1123+
fmt.Printf("failed to copy, err: %v", err)
1124+
}
1125+
}
1126+
}))
1127+
return server
1128+
}
1129+
11081130
func TestHealthEndpointCheck(t *testing.T) {
11091131
server := mockServer(http.StatusOK, false)
11101132
defer server.Close()
@@ -1141,4 +1163,13 @@ func TestGetClusterHealthStatus(t *testing.T) {
11411163
assert.Equal(t, true, online)
11421164
assert.Equal(t, false, healthy)
11431165
})
1166+
1167+
t.Run("readyz return error and StatusInternalServerError healthz return http.StatusOK", func(t *testing.T) {
1168+
server := mockNotReadyServer()
1169+
defer server.Close()
1170+
clusterClient := generateClusterClient(server.URL)
1171+
online, healthy := getClusterHealthStatus(clusterClient)
1172+
assert.Equal(t, true, online)
1173+
assert.Equal(t, true, healthy)
1174+
})
11441175
}

0 commit comments

Comments
 (0)