Skip to content

Commit 74b60da

Browse files
committed
CD health check: adjust naming and log messages for clarity
Signed-off-by: Dr. Jan-Philip Gehrcke <[email protected]>
1 parent 2b7e899 commit 74b60da

File tree

2 files changed

+21
-14
lines changed

2 files changed

+21
-14
lines changed

cmd/compute-domain-kubelet-plugin/driver.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,9 +122,9 @@ func NewDriver(ctx context.Context, config *Config) (*driver, error) {
122122
return nil, err
123123
}
124124

125-
healthcheck, err := startHealthcheck(ctx, config)
125+
healthcheck, err := setupHealthcheckPrimitives(ctx, config)
126126
if err != nil {
127-
return nil, fmt.Errorf("start healthcheck: %w", err)
127+
return nil, fmt.Errorf("error setting up healtcheck primitives: %w", err)
128128
}
129129
driver.healthcheck = healthcheck
130130

cmd/compute-domain-kubelet-plugin/health.go

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,17 @@ type healthcheck struct {
4646
draClient drapb.DRAPluginClient
4747
}
4848

49-
func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error) {
49+
func setupHealthcheckPrimitives(ctx context.Context, config *Config) (*healthcheck, error) {
5050
port := config.flags.healthcheckPort
5151
if port < 0 {
5252
return nil, nil
5353
}
5454

55+
// Bind on all available interfaces.
5556
addr := net.JoinHostPort("", strconv.Itoa(port))
5657
lis, err := net.Listen("tcp", addr)
5758
if err != nil {
58-
return nil, fmt.Errorf("failed to listen for healthcheck service at %s: %w", addr, err)
59+
return nil, fmt.Errorf("failed to listen on %s: %w", addr, err)
5960
}
6061

6162
regSockPath := (&url.URL{
@@ -64,26 +65,28 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error)
6465
// are enabled and the filename includes a uid.
6566
Path: path.Join(config.flags.kubeletRegistrarDirectoryPath, DriverName+"-reg.sock"),
6667
}).String()
67-
klog.V(6).Infof("connecting to registration socket path=%s", regSockPath)
68+
69+
klog.V(6).Infof("Connect to registration socket at %s", regSockPath)
6870
regConn, err := grpc.NewClient(
6971
regSockPath,
7072
grpc.WithTransportCredentials(insecure.NewCredentials()),
7173
)
7274
if err != nil {
73-
return nil, fmt.Errorf("connect to registration socket: %w", err)
75+
return nil, fmt.Errorf("error connecting to registration socket: %w", err)
7476
}
7577

7678
draSockPath := (&url.URL{
7779
Scheme: "unix",
7880
Path: path.Join(config.DriverPluginPath(), "dra.sock"),
7981
}).String()
80-
klog.V(6).Infof("connecting to DRA socket path=%s", draSockPath)
82+
83+
klog.V(6).Infof("Connect to plugin socket at %s", draSockPath)
8184
draConn, err := grpc.NewClient(
8285
draSockPath,
8386
grpc.WithTransportCredentials(insecure.NewCredentials()),
8487
)
8588
if err != nil {
86-
return nil, fmt.Errorf("connect to DRA socket: %w", err)
89+
return nil, fmt.Errorf("error connecting to plugin socket: %w", err)
8790
}
8891

8992
server := grpc.NewServer()
@@ -97,9 +100,10 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error)
97100
healthcheck.wg.Add(1)
98101
go func() {
99102
defer healthcheck.wg.Done()
100-
klog.Infof("starting healthcheck service at %s", lis.Addr().String())
103+
klog.Infof("Starting healthcheck server on %s", lis.Addr().String())
101104
if err := server.Serve(lis); err != nil {
102-
klog.Errorf("failed to serve healthcheck service on %s: %v", addr, err)
105+
// Note(JP): let's review if this should be fatal
106+
klog.Errorf("failed to start healthcheck server: %v", err)
103107
}
104108
}()
105109

@@ -108,13 +112,13 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error)
108112

109113
func (h *healthcheck) Stop() {
110114
if h.server != nil {
111-
klog.Info("Stopping healthcheck service")
115+
klog.Info("Stopping healthcheck server")
112116
h.server.GracefulStop()
113117
}
114118
h.wg.Wait()
115119
}
116120

117-
// Check implements [grpc_health_v1.HealthServer].
121+
// Check implements [grpc_health_v1.HealthServer.Check].
118122
func (h *healthcheck) Check(ctx context.Context, req *grpc_health_v1.HealthCheckRequest) (*grpc_health_v1.HealthCheckResponse, error) {
119123
knownServices := map[string]struct{}{"": {}, "liveness": {}}
120124
if _, known := knownServices[req.GetService()]; !known {
@@ -125,16 +129,19 @@ func (h *healthcheck) Check(ctx context.Context, req *grpc_health_v1.HealthCheck
125129
Status: grpc_health_v1.HealthCheckResponse_NOT_SERVING,
126130
}
127131

132+
// This simulates the kubelet reaching out to the plugin for discovery
133+
// (towards registering it).
128134
info, err := h.regClient.GetInfo(ctx, &registerapi.InfoRequest{})
129135
if err != nil {
130-
klog.ErrorS(err, "failed to call GetInfo")
136+
klog.ErrorS(err, "failed to call GetInfo on registration socket")
131137
return status, nil
132138
}
133139
klog.V(6).Infof("Successfully invoked GetInfo: %v", info)
134140

141+
// This simulates the kubelet reaching out to the plugin
135142
_, err = h.draClient.NodePrepareResources(ctx, &drapb.NodePrepareResourcesRequest{})
136143
if err != nil {
137-
klog.ErrorS(err, "failed to call NodePrepareResources")
144+
klog.ErrorS(err, "failed to call NodePrepareResources on plugin socket")
138145
return status, nil
139146
}
140147
klog.V(6).Info("Successfully invoked NodePrepareResources")

0 commit comments

Comments
 (0)