Skip to content

Commit 40249cd

Browse files
authored
Merge pull request #459 from Iceber/health_checker_with_standalone_tcp
use standalone tcp for health checker
2 parents 5cdd2e5 + d08c4b3 commit 40249cd

File tree

3 files changed

+51
-24
lines changed

3 files changed

+51
-24
lines changed

pkg/synchromanager/clustersynchro/cluster_monitor.go

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ import (
88
apierrors "k8s.io/apimachinery/pkg/api/errors"
99
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1010
"k8s.io/apimachinery/pkg/util/wait"
11-
"k8s.io/client-go/kubernetes"
11+
"k8s.io/client-go/discovery"
12+
"k8s.io/client-go/rest"
1213
"k8s.io/klog/v2"
1314

1415
clusterv1alpha2 "github.com/clusterpedia-io/api/cluster/v1alpha2"
@@ -36,7 +37,9 @@ func (synchro *ClusterSynchro) checkClusterHealthy() {
3637
defer synchro.updateStatus()
3738
lastReadyCondition := synchro.healthyCondition.Load().(metav1.Condition)
3839

39-
if ready, err := checkKubeHealthy(synchro.clusterclient); !ready {
40+
ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second)
41+
defer cancel()
42+
if ready, err := synchro.healthChecker.Ready(ctx); !ready {
4043
// if the last status was not ConditionTrue, stop resource synchros
4144
if lastReadyCondition.Status != metav1.ConditionTrue {
4245
synchro.stopRunner()
@@ -86,17 +89,25 @@ func (synchro *ClusterSynchro) checkClusterHealthy() {
8689
synchro.healthyCondition.Store(condition)
8790
}
8891

89-
// TODO(iceber): resolve for more detailed error
90-
func checkKubeHealthy(client kubernetes.Interface) (bool, error) {
91-
ctx, cancel := context.WithTimeout(context.TODO(), 5*time.Second)
92-
defer cancel()
92+
type healthChecker struct {
93+
client rest.Interface
94+
}
9395

94-
_, err := client.Discovery().RESTClient().Get().AbsPath("/readyz").DoRaw(ctx)
95-
if apierrors.IsNotFound(err) {
96-
_, err = client.Discovery().RESTClient().Get().AbsPath("/healthz").DoRaw(ctx)
97-
}
96+
func newHealthChecker(config *rest.Config) (*healthChecker, error) {
97+
client, err := discovery.NewDiscoveryClientForConfig(config)
9898
if err != nil {
99-
return false, err
99+
return nil, err
100+
}
101+
return &healthChecker{
102+
client: client.RESTClient(),
103+
}, nil
104+
}
105+
106+
// TODO(iceber): resolve for more detailed error
107+
func (checker *healthChecker) Ready(ctx context.Context) (bool, error) {
108+
_, err := checker.client.Get().AbsPath("/readyz").DoRaw(ctx)
109+
if apierrors.IsNotFound(err) {
110+
_, err = checker.client.Get().AbsPath("/healthz").DoRaw(ctx)
100111
}
101-
return true, nil
112+
return err == nil, err
102113
}

pkg/synchromanager/clustersynchro/cluster_synchro.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ package clustersynchro
33
import (
44
"context"
55
"fmt"
6+
"net"
67
"sync"
78
"sync/atomic"
9+
"time"
810

911
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1012
"k8s.io/apimachinery/pkg/runtime/schema"
1113
"k8s.io/apimachinery/pkg/util/wait"
12-
"k8s.io/client-go/kubernetes"
1314
"k8s.io/client-go/rest"
1415
"k8s.io/klog/v2"
1516

@@ -18,6 +19,8 @@ import (
1819
"github.com/clusterpedia-io/clusterpedia/pkg/storage"
1920
"github.com/clusterpedia-io/clusterpedia/pkg/storageconfig"
2021
"github.com/clusterpedia-io/clusterpedia/pkg/synchromanager/clustersynchro/informer"
22+
"github.com/clusterpedia-io/clusterpedia/pkg/synchromanager/features"
23+
clusterpediafeature "github.com/clusterpedia-io/clusterpedia/pkg/utils/feature"
2124
)
2225

2326
type ClusterSynchro struct {
@@ -27,7 +30,7 @@ type ClusterSynchro struct {
2730
ClusterStatusUpdater ClusterStatusUpdater
2831

2932
storage storage.StorageFactory
30-
clusterclient kubernetes.Interface
33+
healthChecker *healthChecker
3134
dynamicDiscovery discovery.DynamicDiscoveryInterface
3235
listerWatcherFactory informer.DynamicListerWatcherFactory
3336

@@ -64,11 +67,6 @@ type ClusterStatusUpdater interface {
6467
type RetryableError error
6568

6669
func New(name string, config *rest.Config, storage storage.StorageFactory, updater ClusterStatusUpdater) (*ClusterSynchro, error) {
67-
clusterclient, err := kubernetes.NewForConfig(config)
68-
if err != nil {
69-
return nil, fmt.Errorf("failed to create a cluster client: %w", err)
70-
}
71-
7270
dynamicDiscovery, err := discovery.NewDynamicDiscoveryManager(name, config)
7371
if err != nil {
7472
return nil, RetryableError(fmt.Errorf("failed to create dynamic discovery manager: %w", err))
@@ -84,13 +82,25 @@ func New(name string, config *rest.Config, storage storage.StorageFactory, updat
8482
return nil, fmt.Errorf("failed to create lister watcher factory: %w", err)
8583
}
8684

85+
checkerConfig := *config
86+
if clusterpediafeature.FeatureGate.Enabled(features.HealthCheckerWithStandaloneTCP) {
87+
checkerConfig.Dial = (&net.Dialer{
88+
Timeout: 30 * time.Second,
89+
KeepAlive: 30 * time.Second,
90+
}).DialContext
91+
}
92+
healthChecker, err := newHealthChecker(&checkerConfig)
93+
if err != nil {
94+
return nil, fmt.Errorf("failed to create a cluster health checker: %w", err)
95+
}
96+
8797
synchro := &ClusterSynchro{
8898
name: name,
8999
RESTConfig: config,
90100
ClusterStatusUpdater: updater,
91101
storage: storage,
92102

93-
clusterclient: clusterclient,
103+
healthChecker: healthChecker,
94104
dynamicDiscovery: dynamicDiscovery,
95105
listerWatcherFactory: listWatchFactory,
96106

pkg/synchromanager/features/features.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,11 @@ const (
4040
// owner: @iceber
4141
// alpha: v0.3.0
4242
AllowSyncAllResources featuregate.Feature = "AllowSyncAllResources"
43+
44+
// HealthCheckerWithStandaloneTCP is a feature gate for the cluster health checker to use standalone tcp
45+
// owner: @iceber
46+
// alpha: v0.6.0
47+
HealthCheckerWithStandaloneTCP featuregate.Feature = "HealthCheckerWithStandaloneTCP"
4348
)
4449

4550
func init() {
@@ -49,8 +54,9 @@ func init() {
4954
// defaultClusterSynchroManagerFeatureGates consists of all known clustersynchro-manager-specific feature keys.
5055
// To add a new feature, define a key for it above and add it here.
5156
var defaultClusterSynchroManagerFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{
52-
PruneManagedFields: {Default: true, PreRelease: featuregate.Beta},
53-
PruneLastAppliedConfiguration: {Default: true, PreRelease: featuregate.Beta},
54-
AllowSyncAllCustomResources: {Default: false, PreRelease: featuregate.Alpha},
55-
AllowSyncAllResources: {Default: false, PreRelease: featuregate.Alpha},
57+
PruneManagedFields: {Default: true, PreRelease: featuregate.Beta},
58+
PruneLastAppliedConfiguration: {Default: true, PreRelease: featuregate.Beta},
59+
AllowSyncAllCustomResources: {Default: false, PreRelease: featuregate.Alpha},
60+
AllowSyncAllResources: {Default: false, PreRelease: featuregate.Alpha},
61+
HealthCheckerWithStandaloneTCP: {Default: false, PreRelease: featuregate.Alpha},
5662
}

0 commit comments

Comments
 (0)