FEATURE: Add Health API.

This commit is contained in:
Tyler Smith 2020-05-05 22:44:45 -07:00
parent e21601b4fe
commit fcd13e7d90
No known key found for this signature in database
GPG Key ID: CA38F1A9BE0EC890
6 changed files with 191 additions and 2 deletions

74
api/health/checks.go Normal file
View File

@ -0,0 +1,74 @@
// (c) 2020, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.
package health
import (
"errors"
"time"
)
var (
// ErrHeartbeatNotDetected is returned from a HeartbeatCheckFn when the
// heartbeat has not been detected recently enough
ErrHeartbeatNotDetected = errors.New("heartbeat not detected")
)
// CheckFn returns optional status information and an error indicating health or
// non-health
type CheckFn func() (interface{}, error)
// Check defines a single health check that we want to monitor and consider as
// part of our wider healthiness
type Check struct {
// Name is the identifier for this check and must be unique among all Checks
Name string
// CheckFn is the function to call to perform the the health check
CheckFn CheckFn
// ExecutionPeriod is the duration to wait between executions of this Check
ExecutionPeriod time.Duration
// InitialDelay is the duration to wait before executing the first time
InitialDelay time.Duration
// InitiallyPassing is whether or not to consider the Check healthy before the
// initial execution
InitiallyPassing bool
}
// gosundheitCheck implements the health.Check interface backed by a CheckFn
type gosundheitCheck struct {
name string
checkFn CheckFn
}
// Name implements the health.Check interface by returning a unique name
func (c gosundheitCheck) Name() string { return c.name }
// Execute implements the health.Check interface by executing the checkFn and
// returning the results
func (c gosundheitCheck) Execute() (interface{}, error) { return c.checkFn() }
// heartbeater provides a getter to the most recently observed heartbeat
type heartbeater interface {
GetHeartbeat() time.Time
}
// HeartbeatCheckFn returns a CheckFn that checks the given heartbeater has
// pulsed within the given duration
func HeartbeatCheckFn(hb heartbeater, max time.Duration) CheckFn {
return func() (data interface{}, err error) {
// Get the heartbeat and create a data set to return to the caller
hb := hb.GetHeartbeat()
data = map[string]int64{"heartbeat": hb.UTC().Unix()}
// If the current time is after the last known heartbeat + the limit then
// mark our check as failed
if hb.Add(max).Before(time.Now()) {
err = ErrHeartbeatNotDetected
}
return data, err
}
}

79
api/health/service.go Normal file
View File

@ -0,0 +1,79 @@
// (c) 2020, Ava Labs, Inc. All rights reserved.
// See the file LICENSE for licensing terms.
package health
import (
"net/http"
"time"
"github.com/AppsFlyer/go-sundheit"
"github.com/ava-labs/gecko/snow/engine/common"
"github.com/ava-labs/gecko/utils/json"
"github.com/ava-labs/gecko/utils/logging"
"github.com/gorilla/rpc/v2"
)
// defaultCheckOpts is a Check whose properties represent a default Check
var defaultCheckOpts = Check{ExecutionPeriod: time.Minute}
// Health observes a set of vital signs and makes them
type Health struct {
log logging.Logger
health health.Health
}
// NewService creates a new Health service
func NewService(log logging.Logger) *Health {
return &Health{log, health.New()}
}
// Handler returns an HTTPHandler providing RPC access to the Health service
func (h *Health) Handler() *common.HTTPHandler {
newServer := rpc.NewServer()
codec := json.NewCodec()
newServer.RegisterCodec(codec, "application/json")
newServer.RegisterCodec(codec, "application/json;charset=UTF-8")
newServer.RegisterService(h, "health")
return &common.HTTPHandler{LockOptions: common.NoLock, Handler: newServer}
}
// RegisterHeartbeat adds a check with default options and a CheckFn that checks
// the given heartbeater for a recent heartbeat
func (h *Health) RegisterHeartbeat(name string, hb heartbeater, max time.Duration) error {
return h.RegisterCheckFunc(name, HeartbeatCheckFn(hb, max))
}
// RegisterCheckFunc adds a Check with default options and the given CheckFn
func (h *Health) RegisterCheckFunc(name string, checkFn CheckFn) error {
check := defaultCheckOpts
check.Name = name
check.CheckFn = checkFn
return h.RegisterCheck(check)
}
// RegisterCheck adds the given Check
func (h *Health) RegisterCheck(c Check) error {
return h.health.RegisterCheck(&health.Config{
InitialDelay: c.InitialDelay,
ExecutionPeriod: c.ExecutionPeriod,
InitiallyPassing: c.InitiallyPassing,
Check: gosundheitCheck{c.Name, c.CheckFn},
})
}
// GetHealthArgs are the arguments for GetHealth
type GetHealthArgs struct{}
// GetHealthReply is the response for GetHealth
type GetHealthReply struct {
Checks map[string]health.Result `json:"checks"`
Healthy bool `json:"healthy"`
}
// GetHealth returns a summation of the health of the node
func (service *Health) GetHealth(_ *http.Request, _ *GetHealthArgs, reply *GetHealthReply) error {
service.log.Debug("Health: GetHealth called")
reply.Checks, reply.Healthy = service.health.Results()
return nil
}

View File

@ -128,6 +128,7 @@ func init() {
fs.BoolVar(&Config.AdminAPIEnabled, "api-admin-enabled", true, "If true, this node exposes the Admin API")
fs.BoolVar(&Config.KeystoreAPIEnabled, "api-keystore-enabled", true, "If true, this node exposes the Keystore API")
fs.BoolVar(&Config.MetricsAPIEnabled, "api-metrics-enabled", true, "If true, this node exposes the Metrics API")
fs.BoolVar(&Config.HealthAPIEnabled, "api-health-enabled", true, "If true, this node exposes the Health API")
fs.BoolVar(&Config.IPCEnabled, "api-ipcs-enabled", false, "If true, IPCs can be opened")
// Throughput Server

View File

@ -147,6 +147,8 @@ type Handshake struct {
// If any chain is blocked on connecting to peers, track these blockers here
awaitingLock sync.Mutex
awaiting []*networking.AwaitingConnections
lastHeartbeat time.Time
}
// Initialize to the c networking library. This should only be done once during
@ -201,6 +203,8 @@ func (nm *Handshake) Initialize(
net.RegHandler(Version, salticidae.MsgNetworkMsgCallback(C.version), nil)
net.RegHandler(GetPeerList, salticidae.MsgNetworkMsgCallback(C.getPeerList), nil)
net.RegHandler(PeerList, salticidae.MsgNetworkMsgCallback(C.peerList), nil)
nm.heartbeat()
}
// ConnectTo add the peer as a connection and connects to them.
@ -593,6 +597,16 @@ func (nm *Handshake) checkCompatibility(peerVersion string) bool {
return true
}
// heartbeat registers a new heartbeat to signal liveness
func (nm *Handshake) heartbeat() {
nm.lastHeartbeat = nm.clock.Time()
}
// GetHeartbeat returns the most recent heartbeat time
func (nm *Handshake) GetHeartbeat() time.Time {
return nm.lastHeartbeat
}
// peerHandler notifies a change to the set of connected peers
// connected is true if a new peer is connected
// connected is false if a formerly connected peer has disconnected
@ -667,6 +681,7 @@ func pong(*C.struct_msg_t, *C.struct_msgnetwork_conn_t, unsafe.Pointer) {}
//export getVersion
func getVersion(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) {
HandshakeNet.numGetVersionReceived.Inc()
HandshakeNet.heartbeat()
conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn))
peer := conn.GetPeerID(false)
@ -679,6 +694,7 @@ func getVersion(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsaf
//export version
func version(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) {
HandshakeNet.numVersionReceived.Inc()
HandshakeNet.heartbeat()
msg := salticidae.MsgFromC(salticidae.CMsg(_msg))
conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn))
@ -763,6 +779,7 @@ func version(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.P
//export getPeerList
func getPeerList(_ *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) {
HandshakeNet.numGetPeerlistReceived.Inc()
HandshakeNet.heartbeat()
conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn))
peer := conn.GetPeerID(false)
@ -775,6 +792,7 @@ func getPeerList(_ *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.
//export peerList
func peerList(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) {
HandshakeNet.numPeerlistReceived.Inc()
HandshakeNet.heartbeat()
msg := salticidae.MsgFromC(salticidae.CMsg(_msg))
build := Builder{}

View File

@ -51,6 +51,7 @@ type Config struct {
AdminAPIEnabled bool
KeystoreAPIEnabled bool
MetricsAPIEnabled bool
HealthAPIEnabled bool
// Logging configuration
LoggingConfig logging.Config

View File

@ -17,12 +17,14 @@ import (
"os"
"path"
"sync"
"time"
"unsafe"
"github.com/ava-labs/salticidae-go"
"github.com/ava-labs/gecko/api"
"github.com/ava-labs/gecko/api/admin"
"github.com/ava-labs/gecko/api/health"
"github.com/ava-labs/gecko/api/ipcs"
"github.com/ava-labs/gecko/api/keystore"
"github.com/ava-labs/gecko/api/metrics"
@ -559,6 +561,19 @@ func (n *Node) initAdminAPI() {
}
}
// initHealthAPI initializes the Health API service
// Assumes n.Log, n.ConsensusAPI, and n.ValidatorAPI already initialized
func (n *Node) initHealthAPI() {
if !n.Config.HealthAPIEnabled {
return
}
n.Log.Info("initializing Health API")
service := health.NewService(n.Log)
service.RegisterHeartbeat("network.validators.heartbeat", n.ValidatorAPI, 5*time.Minute)
n.APIServer.AddRoute(service.Handler(), &sync.RWMutex{}, "health", "", n.HTTPLog)
}
// initIPCAPI initializes the IPC API service
// Assumes n.log and n.chainManager already initialized
func (n *Node) initIPCAPI() {
@ -650,8 +665,9 @@ func (n *Node) Initialize(Config *Config, logger logging.Logger, logFactory logg
n.initClients() // Set up the client servers
}
n.initAdminAPI() // Start the Admin API
n.initIPCAPI() // Start the IPC API
n.initAdminAPI() // Start the Admin API
n.initHealthAPI() // Start the Health API
n.initIPCAPI() // Start the IPC API
if err := n.initAliases(); err != nil { // Set up aliases
return err