Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions pkg/controllers/status/cluster_status_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,8 +435,11 @@ func (c *ClusterStatusController) initLeaseController(cluster *clusterv1alpha1.C

func getClusterHealthStatus(clusterClient *util.ClusterClient) (online, healthy bool) {
healthStatus, err := healthEndpointCheck(clusterClient.KubeClient, "/readyz")
if err != nil && healthStatus == http.StatusNotFound {
// do health check with healthz endpoint if the readyz endpoint is not installed in member cluster
if err != nil && (healthStatus == http.StatusInternalServerError || healthStatus == http.StatusNotFound) {
// do health check with healthz endpoint in two cases:
// 1. StatusInternalServerError(500): When the server is configured with --shutdown-delay-duration,
// /readyz returns failure but /healthz still serves success
// 2. StatusNotFound(404): When the readyz endpoint is not installed in member cluster
healthStatus, err = healthEndpointCheck(clusterClient.KubeClient, "/healthz")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern is that the /healthz has been deprecated since v1.16, it can only be used as a backoff.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My concern is that the /healthz has been deprecated since v1.16, it can only be used as a backoff.

health-checks

The Kubernetes API server provides 3 API endpoints (healthz, livez and readyz) to indicate the current status of the API server. The healthz endpoint is deprecated (since Kubernetes v1.16), and we should use the more specific livez and readyz endpoints instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that's exactly my concern. We don't know when /healthz will be removed, and then will break this solution.

}

Expand Down
31 changes: 31 additions & 0 deletions pkg/controllers/status/cluster_status_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,28 @@ func mockServer(statusCode int, existError bool) *httptest.Server {
return server
}

func mockNotReadyServer() *httptest.Server {
respBody := "test"
resp := &http.Response{
StatusCode: http.StatusOK,
Body: io.NopCloser(bytes.NewBufferString(respBody)),
}
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, request *http.Request) {
if request.URL.Path == "/readyz" {
statusCode := http.StatusInternalServerError
errorMessage := "An error occurred"
http.Error(w, errorMessage, statusCode)
} else {
w.WriteHeader(resp.StatusCode)
_, err := io.Copy(w, resp.Body)
if err != nil {
fmt.Printf("failed to copy, err: %v", err)
}
}
}))
return server
}

func TestHealthEndpointCheck(t *testing.T) {
server := mockServer(http.StatusOK, false)
defer server.Close()
Expand Down Expand Up @@ -1141,4 +1163,13 @@ func TestGetClusterHealthStatus(t *testing.T) {
assert.Equal(t, true, online)
assert.Equal(t, false, healthy)
})

t.Run("readyz return error and StatusInternalServerError healthz return http.StatusOK", func(t *testing.T) {
server := mockNotReadyServer()
defer server.Close()
clusterClient := generateClusterClient(server.URL)
online, healthy := getClusterHealthStatus(clusterClient)
assert.Equal(t, true, online)
assert.Equal(t, true, healthy)
})
}