Skip to content

Commit 77bc27f

Browse files
mpignatelli12joshua-kim
authored andcommitted
connectrpc health api
Signed-off-by: Joshua Kim <[email protected]>
1 parent e8c5de4 commit 77bc27f

File tree

7 files changed

+891
-24
lines changed

7 files changed

+891
-24
lines changed
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
// Copyright (C) 2019-2024, Ava Labs, Inc. All rights reserved.
2+
// See the file LICENSE for licensing terms.
3+
4+
package connecthandler
5+
6+
import (
7+
"context"
8+
"time"
9+
10+
"connectrpc.com/connect"
11+
"google.golang.org/protobuf/types/known/timestamppb"
12+
13+
"github.com/ava-labs/avalanchego/api/health"
14+
healthv1 "github.com/ava-labs/avalanchego/proto/pb/health/v1"
15+
)
16+
17+
// NewConnectHealthService returns a ConnectRPC-compatible ConnectHealthService
18+
// that delegates calls to the existing health service implementation.
19+
func NewConnectHealthService(healthService health.Health) *ConnectHealthService {
20+
return &ConnectHealthService{
21+
Service: healthService,
22+
}
23+
}
24+
25+
type ConnectHealthService struct {
26+
Service health.Health
27+
}
28+
29+
func (s *ConnectHealthService) Readiness(
30+
_ context.Context,
31+
req *connect.Request[healthv1.APIArgs],
32+
) (*connect.Response[healthv1.APIReply], error) {
33+
// The health.Health interface has different methods than the service.
34+
// We need to use the methods of the Health interface.
35+
checks, healthy := s.Service.Readiness(req.Msg.Tags...)
36+
37+
// Convert the health.Result map to protobuf format
38+
checksProto := make(map[string]*healthv1.Result, len(checks))
39+
for name, check := range checks {
40+
checksProto[name] = convertResult(check)
41+
}
42+
43+
out := &healthv1.APIReply{
44+
Checks: checksProto,
45+
Healthy: healthy,
46+
}
47+
48+
return connect.NewResponse(out), nil
49+
}
50+
51+
func (s *ConnectHealthService) Health(
52+
_ context.Context,
53+
req *connect.Request[healthv1.APIArgs],
54+
) (*connect.Response[healthv1.APIReply], error) {
55+
// The health.Health interface has different methods than the service.
56+
// We need to use the methods of the Health interface.
57+
checks, healthy := s.Service.Health(req.Msg.Tags...)
58+
59+
// Convert the health.Result map to protobuf format
60+
checksProto := make(map[string]*healthv1.Result, len(checks))
61+
for name, check := range checks {
62+
checksProto[name] = convertResult(check)
63+
}
64+
65+
out := &healthv1.APIReply{
66+
Checks: checksProto,
67+
Healthy: healthy,
68+
}
69+
70+
return connect.NewResponse(out), nil
71+
}
72+
73+
func (s *ConnectHealthService) Liveness(
74+
_ context.Context,
75+
req *connect.Request[healthv1.APIArgs],
76+
) (*connect.Response[healthv1.APIReply], error) {
77+
// The health.Health interface has different methods than the service.
78+
// We need to use the methods of the Health interface.
79+
checks, healthy := s.Service.Liveness(req.Msg.Tags...)
80+
81+
// Convert the health.Result map to protobuf format
82+
checksProto := make(map[string]*healthv1.Result, len(checks))
83+
for name, check := range checks {
84+
checksProto[name] = convertResult(check)
85+
}
86+
87+
out := &healthv1.APIReply{
88+
Checks: checksProto,
89+
Healthy: healthy,
90+
}
91+
92+
return connect.NewResponse(out), nil
93+
}
94+
95+
// convertResult transforms a health.Result into a healthv1.Result
96+
func convertResult(r health.Result) *healthv1.Result {
97+
result := &healthv1.Result{
98+
Message: "", // Will be set below if details exist
99+
ContiguousFailures: r.ContiguousFailures,
100+
DurationNs: r.Duration.Nanoseconds(),
101+
}
102+
103+
// Handle message field (from the Details field)
104+
if r.Details != nil {
105+
if msg, ok := r.Details.(string); ok {
106+
result.Message = msg
107+
}
108+
}
109+
110+
// Set error field if exists
111+
if r.Error != nil {
112+
result.Error = *r.Error
113+
}
114+
115+
// Set timestamp if not zero
116+
if !r.Timestamp.IsZero() {
117+
result.Timestamp = timestamppb.New(r.Timestamp)
118+
}
119+
120+
// Set time of first failure if exists and not nil
121+
if r.TimeOfFirstFailure != nil {
122+
result.TimeOfFirstFailure = timestamppb.New(*r.TimeOfFirstFailure)
123+
} else {
124+
// Use zero time if TimeOfFirstFailure is nil
125+
result.TimeOfFirstFailure = timestamppb.New(time.Time{})
126+
}
127+
128+
return result
129+
}

node/node.go

Lines changed: 42 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@
44
package node
55

66
import (
7+
"connectrpc.com/grpcreflect"
78
"context"
89
"crypto"
910
"crypto/tls"
1011
"encoding/json"
1112
"errors"
1213
"fmt"
14+
healthhandler "github.com/ava-labs/avalanchego/api/health/connecthandler"
15+
infohandler "github.com/ava-labs/avalanchego/api/info/connecthandler"
16+
"github.com/ava-labs/avalanchego/proto/pb/health/v1/healthv1connect"
17+
"github.com/ava-labs/avalanchego/proto/pb/info/v1/infov1connect"
1318
"io"
1419
"io/fs"
1520
"net"
@@ -21,7 +26,6 @@ import (
2126
"sync"
2227
"time"
2328

24-
"connectrpc.com/grpcreflect"
2529
"github.com/prometheus/client_golang/prometheus"
2630
"github.com/prometheus/client_golang/prometheus/collectors"
2731
"github.com/prometheus/client_golang/prometheus/promhttp"
@@ -48,7 +52,6 @@ import (
4852
"github.com/ava-labs/avalanchego/network/dialer"
4953
"github.com/ava-labs/avalanchego/network/peer"
5054
"github.com/ava-labs/avalanchego/network/throttling"
51-
"github.com/ava-labs/avalanchego/proto/pb/info/v1/infov1connect"
5255
"github.com/ava-labs/avalanchego/snow"
5356
"github.com/ava-labs/avalanchego/snow/networking/benchlist"
5457
"github.com/ava-labs/avalanchego/snow/networking/router"
@@ -80,7 +83,6 @@ import (
8083
"github.com/ava-labs/avalanchego/vms/registry"
8184
"github.com/ava-labs/avalanchego/vms/rpcchainvm/runtime"
8285

83-
connecthandler "github.com/ava-labs/avalanchego/api/info/connect_handler"
8486
databasefactory "github.com/ava-labs/avalanchego/database/factory"
8587
avmconfig "github.com/ava-labs/avalanchego/vms/avm/config"
8688
platformconfig "github.com/ava-labs/avalanchego/vms/platformvm/config"
@@ -266,6 +268,26 @@ func New(
266268
return nil, fmt.Errorf("couldn't initialize indexer: %w", err)
267269
}
268270

271+
// mount InfoService onto the shared gRPC mux
272+
infoPattern, infoHandler := infov1connect.NewInfoServiceHandler(
273+
infohandler.NewConnectInfoService(n.info),
274+
)
275+
n.grpcMux.Handle(infoPattern, infoHandler)
276+
277+
// mount HealthService onto the same mux
278+
healthPattern, healthHandler := healthv1connect.NewHealthServiceHandler(
279+
healthhandler.NewConnectHealthService(n.health),
280+
)
281+
n.grpcMux.Handle(healthPattern, healthHandler)
282+
283+
// register reflection for both services, once
284+
reflector := grpcreflect.NewStaticReflector(
285+
infov1connect.InfoServiceName,
286+
healthv1connect.HealthServiceName,
287+
)
288+
reflectPattern, reflectHandler := grpcreflect.NewHandlerV1(reflector)
289+
n.grpcMux.Handle(reflectPattern, reflectHandler)
290+
269291
n.health.Start(context.TODO(), n.Config.HealthCheckFreq)
270292
n.initProfiler()
271293

@@ -310,6 +332,9 @@ type Node struct {
310332
// Monitors node health and runs health checks
311333
health health.Health
312334

335+
// Info service instance
336+
info *info.Info
337+
313338
// Build and parse messages, for both network layer and chain manager
314339
msgCreator message.Creator
315340

@@ -352,6 +377,9 @@ type Node struct {
352377
// Handles HTTP API calls
353378
APIServer server.Server
354379

380+
// Shared HTTP/2 mux for ConnectRPC services
381+
grpcMux *http.ServeMux
382+
355383
// This node's configuration
356384
Config *node.Config
357385

@@ -1010,6 +1038,10 @@ func (n *Node) initAPIServer() error {
10101038
n.Config.HTTPConfig.HTTPConfig,
10111039
n.Config.HTTPAllowedHosts,
10121040
)
1041+
1042+
// Set up a shared HTTP/2 mux for all ConnectRPC services
1043+
n.grpcMux = http.NewServeMux()
1044+
n.APIServer.AddHTTP2Handler(n.grpcMux)
10131045
return err
10141046
}
10151047

@@ -1353,7 +1385,7 @@ func (n *Node) initInfoAPI() error {
13531385
return fmt.Errorf("problem creating proof of possession: %w", err)
13541386
}
13551387

1356-
service, info, err := info.NewService(
1388+
service, infoInst, err := info.NewService(
13571389
info.Parameters{
13581390
Version: version.CurrentApp,
13591391
NodeID: n.ID,
@@ -1376,29 +1408,12 @@ func (n *Node) initInfoAPI() error {
13761408
if err != nil {
13771409
return err
13781410
}
1379-
1380-
// Register the InfoService handler and gRPC reflection handler
1381-
infoPattern, infoHandler := infov1connect.NewInfoServiceHandler(connecthandler.NewConnectInfoService(info))
1382-
1383-
// Register the gRPC reflection handler for InfoService
1384-
refPattern, refHandler := grpcreflect.NewHandlerV1(
1385-
grpcreflect.NewStaticReflector(infov1connect.InfoServiceName),
1386-
)
1387-
1388-
// Create a new ServeMux to handle the InfoService and reflection handlers
1389-
mux := http.NewServeMux()
1390-
mux.Handle(infoPattern, infoHandler)
1391-
mux.Handle(refPattern, refHandler)
1392-
1393-
if !n.APIServer.AddHeaderRoute("info", mux) {
1394-
// TODO do not panic
1395-
panic("could not add info route")
1396-
}
1411+
n.info = infoInst
13971412

13981413
return n.APIServer.AddRoute(
13991414
service,
14001415
"info",
1401-
"info",
1416+
"",
14021417
)
14031418
}
14041419

@@ -1413,10 +1428,13 @@ func (n *Node) initHealthAPI() error {
14131428
return err
14141429
}
14151430

1416-
n.health, err = health.New(n.Log, healthReg)
1431+
// Create the health service
1432+
healthService, err := health.New(n.Log, healthReg)
14171433
if err != nil {
14181434
return err
14191435
}
1436+
// Store the health service in the node's health field
1437+
n.health = healthService
14201438

14211439
if !n.Config.HealthAPIEnabled {
14221440
n.Log.Info("skipping health API initialization because it has been disabled")

proto/health/v1/service.proto

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
syntax = "proto3";
2+
package health.v1;
3+
4+
import "google/protobuf/timestamp.proto";
5+
6+
option go_package = "github.com/ava-labs/avalanchego/proto/pb/health/v1;healthv1";
7+
8+
// HealthService is the API service for health checks on a node
9+
service HealthService {
10+
rpc Readiness(APIArgs) returns (APIReply);
11+
rpc Liveness(APIArgs) returns (APIReply);
12+
rpc Health(APIArgs) returns (APIReply);
13+
}
14+
15+
// Result is the result of a health check.
16+
message Result {
17+
string message = 1;
18+
// Error is the string representation of the error returned by the failing
19+
// HealthCheck. The value is nil if the check passed.
20+
string error = 2;
21+
// Timestamp of the last HealthCheck.
22+
google.protobuf.Timestamp timestamp = 3;
23+
// Duration is the amount of time this HealthCheck last took to evaluate.
24+
int64 duration_ns = 4;
25+
// ContiguousFailures the HealthCheck has returned.
26+
int64 contiguous_failures = 5;
27+
// TimeOfFirstFailure of the HealthCheck,
28+
google.protobuf.Timestamp time_of_first_failure = 6;
29+
}
30+
31+
// APIReply is the response for Readiness, Health, and Liveness.
32+
message APIReply {
33+
map<string, Result> checks = 1;
34+
bool healthy = 2;
35+
}
36+
37+
// APIArgs is the arguments for Readiness, Health, and Liveness.
38+
message APIArgs {
39+
repeated string tags = 1;
40+
}
41+

0 commit comments

Comments
 (0)