mirror of https://github.com/poanetwork/gecko.git
FEATURE: Add Health API.
This commit is contained in:
parent
e21601b4fe
commit
fcd13e7d90
|
@ -0,0 +1,74 @@
|
|||
// (c) 2020, Ava Labs, Inc. All rights reserved.
|
||||
// See the file LICENSE for licensing terms.
|
||||
|
||||
package health
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrHeartbeatNotDetected is returned from a HeartbeatCheckFn when the
|
||||
// heartbeat has not been detected recently enough
|
||||
ErrHeartbeatNotDetected = errors.New("heartbeat not detected")
|
||||
)
|
||||
|
||||
// CheckFn returns optional status information and an error indicating health or
|
||||
// non-health
|
||||
type CheckFn func() (interface{}, error)
|
||||
|
||||
// Check defines a single health check that we want to monitor and consider as
|
||||
// part of our wider healthiness
|
||||
type Check struct {
|
||||
// Name is the identifier for this check and must be unique among all Checks
|
||||
Name string
|
||||
|
||||
// CheckFn is the function to call to perform the the health check
|
||||
CheckFn CheckFn
|
||||
|
||||
// ExecutionPeriod is the duration to wait between executions of this Check
|
||||
ExecutionPeriod time.Duration
|
||||
|
||||
// InitialDelay is the duration to wait before executing the first time
|
||||
InitialDelay time.Duration
|
||||
|
||||
// InitiallyPassing is whether or not to consider the Check healthy before the
|
||||
// initial execution
|
||||
InitiallyPassing bool
|
||||
}
|
||||
|
||||
// gosundheitCheck implements the health.Check interface backed by a CheckFn
|
||||
type gosundheitCheck struct {
|
||||
name string
|
||||
checkFn CheckFn
|
||||
}
|
||||
|
||||
// Name implements the health.Check interface by returning a unique name
|
||||
func (c gosundheitCheck) Name() string { return c.name }
|
||||
|
||||
// Execute implements the health.Check interface by executing the checkFn and
|
||||
// returning the results
|
||||
func (c gosundheitCheck) Execute() (interface{}, error) { return c.checkFn() }
|
||||
|
||||
// heartbeater provides a getter to the most recently observed heartbeat
|
||||
type heartbeater interface {
|
||||
GetHeartbeat() time.Time
|
||||
}
|
||||
|
||||
// HeartbeatCheckFn returns a CheckFn that checks the given heartbeater has
|
||||
// pulsed within the given duration
|
||||
func HeartbeatCheckFn(hb heartbeater, max time.Duration) CheckFn {
|
||||
return func() (data interface{}, err error) {
|
||||
// Get the heartbeat and create a data set to return to the caller
|
||||
hb := hb.GetHeartbeat()
|
||||
data = map[string]int64{"heartbeat": hb.UTC().Unix()}
|
||||
|
||||
// If the current time is after the last known heartbeat + the limit then
|
||||
// mark our check as failed
|
||||
if hb.Add(max).Before(time.Now()) {
|
||||
err = ErrHeartbeatNotDetected
|
||||
}
|
||||
return data, err
|
||||
}
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
// (c) 2020, Ava Labs, Inc. All rights reserved.
|
||||
// See the file LICENSE for licensing terms.
|
||||
|
||||
package health
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/AppsFlyer/go-sundheit"
|
||||
"github.com/ava-labs/gecko/snow/engine/common"
|
||||
"github.com/ava-labs/gecko/utils/json"
|
||||
"github.com/ava-labs/gecko/utils/logging"
|
||||
"github.com/gorilla/rpc/v2"
|
||||
)
|
||||
|
||||
// defaultCheckOpts is a Check whose properties represent a default Check
|
||||
var defaultCheckOpts = Check{ExecutionPeriod: time.Minute}
|
||||
|
||||
// Health observes a set of vital signs and makes them
|
||||
type Health struct {
|
||||
log logging.Logger
|
||||
health health.Health
|
||||
}
|
||||
|
||||
// NewService creates a new Health service
|
||||
func NewService(log logging.Logger) *Health {
|
||||
return &Health{log, health.New()}
|
||||
}
|
||||
|
||||
// Handler returns an HTTPHandler providing RPC access to the Health service
|
||||
func (h *Health) Handler() *common.HTTPHandler {
|
||||
newServer := rpc.NewServer()
|
||||
codec := json.NewCodec()
|
||||
newServer.RegisterCodec(codec, "application/json")
|
||||
newServer.RegisterCodec(codec, "application/json;charset=UTF-8")
|
||||
newServer.RegisterService(h, "health")
|
||||
return &common.HTTPHandler{LockOptions: common.NoLock, Handler: newServer}
|
||||
}
|
||||
|
||||
// RegisterHeartbeat adds a check with default options and a CheckFn that checks
|
||||
// the given heartbeater for a recent heartbeat
|
||||
func (h *Health) RegisterHeartbeat(name string, hb heartbeater, max time.Duration) error {
|
||||
return h.RegisterCheckFunc(name, HeartbeatCheckFn(hb, max))
|
||||
}
|
||||
|
||||
// RegisterCheckFunc adds a Check with default options and the given CheckFn
|
||||
func (h *Health) RegisterCheckFunc(name string, checkFn CheckFn) error {
|
||||
check := defaultCheckOpts
|
||||
check.Name = name
|
||||
check.CheckFn = checkFn
|
||||
return h.RegisterCheck(check)
|
||||
}
|
||||
|
||||
// RegisterCheck adds the given Check
|
||||
func (h *Health) RegisterCheck(c Check) error {
|
||||
return h.health.RegisterCheck(&health.Config{
|
||||
InitialDelay: c.InitialDelay,
|
||||
ExecutionPeriod: c.ExecutionPeriod,
|
||||
InitiallyPassing: c.InitiallyPassing,
|
||||
Check: gosundheitCheck{c.Name, c.CheckFn},
|
||||
})
|
||||
}
|
||||
|
||||
// GetHealthArgs are the arguments for GetHealth
|
||||
type GetHealthArgs struct{}
|
||||
|
||||
// GetHealthReply is the response for GetHealth
|
||||
type GetHealthReply struct {
|
||||
Checks map[string]health.Result `json:"checks"`
|
||||
Healthy bool `json:"healthy"`
|
||||
}
|
||||
|
||||
// GetHealth returns a summation of the health of the node
|
||||
func (service *Health) GetHealth(_ *http.Request, _ *GetHealthArgs, reply *GetHealthReply) error {
|
||||
service.log.Debug("Health: GetHealth called")
|
||||
reply.Checks, reply.Healthy = service.health.Results()
|
||||
return nil
|
||||
}
|
|
@ -128,6 +128,7 @@ func init() {
|
|||
fs.BoolVar(&Config.AdminAPIEnabled, "api-admin-enabled", true, "If true, this node exposes the Admin API")
|
||||
fs.BoolVar(&Config.KeystoreAPIEnabled, "api-keystore-enabled", true, "If true, this node exposes the Keystore API")
|
||||
fs.BoolVar(&Config.MetricsAPIEnabled, "api-metrics-enabled", true, "If true, this node exposes the Metrics API")
|
||||
fs.BoolVar(&Config.HealthAPIEnabled, "api-health-enabled", true, "If true, this node exposes the Health API")
|
||||
fs.BoolVar(&Config.IPCEnabled, "api-ipcs-enabled", false, "If true, IPCs can be opened")
|
||||
|
||||
// Throughput Server
|
||||
|
|
|
@ -147,6 +147,8 @@ type Handshake struct {
|
|||
// If any chain is blocked on connecting to peers, track these blockers here
|
||||
awaitingLock sync.Mutex
|
||||
awaiting []*networking.AwaitingConnections
|
||||
|
||||
lastHeartbeat time.Time
|
||||
}
|
||||
|
||||
// Initialize to the c networking library. This should only be done once during
|
||||
|
@ -201,6 +203,8 @@ func (nm *Handshake) Initialize(
|
|||
net.RegHandler(Version, salticidae.MsgNetworkMsgCallback(C.version), nil)
|
||||
net.RegHandler(GetPeerList, salticidae.MsgNetworkMsgCallback(C.getPeerList), nil)
|
||||
net.RegHandler(PeerList, salticidae.MsgNetworkMsgCallback(C.peerList), nil)
|
||||
|
||||
nm.heartbeat()
|
||||
}
|
||||
|
||||
// ConnectTo add the peer as a connection and connects to them.
|
||||
|
@ -593,6 +597,16 @@ func (nm *Handshake) checkCompatibility(peerVersion string) bool {
|
|||
return true
|
||||
}
|
||||
|
||||
// heartbeat registers a new heartbeat to signal liveness
|
||||
func (nm *Handshake) heartbeat() {
|
||||
nm.lastHeartbeat = nm.clock.Time()
|
||||
}
|
||||
|
||||
// GetHeartbeat returns the most recent heartbeat time
|
||||
func (nm *Handshake) GetHeartbeat() time.Time {
|
||||
return nm.lastHeartbeat
|
||||
}
|
||||
|
||||
// peerHandler notifies a change to the set of connected peers
|
||||
// connected is true if a new peer is connected
|
||||
// connected is false if a formerly connected peer has disconnected
|
||||
|
@ -667,6 +681,7 @@ func pong(*C.struct_msg_t, *C.struct_msgnetwork_conn_t, unsafe.Pointer) {}
|
|||
//export getVersion
|
||||
func getVersion(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) {
|
||||
HandshakeNet.numGetVersionReceived.Inc()
|
||||
HandshakeNet.heartbeat()
|
||||
|
||||
conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn))
|
||||
peer := conn.GetPeerID(false)
|
||||
|
@ -679,6 +694,7 @@ func getVersion(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsaf
|
|||
//export version
|
||||
func version(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) {
|
||||
HandshakeNet.numVersionReceived.Inc()
|
||||
HandshakeNet.heartbeat()
|
||||
|
||||
msg := salticidae.MsgFromC(salticidae.CMsg(_msg))
|
||||
conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn))
|
||||
|
@ -763,6 +779,7 @@ func version(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.P
|
|||
//export getPeerList
|
||||
func getPeerList(_ *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) {
|
||||
HandshakeNet.numGetPeerlistReceived.Inc()
|
||||
HandshakeNet.heartbeat()
|
||||
|
||||
conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn))
|
||||
peer := conn.GetPeerID(false)
|
||||
|
@ -775,6 +792,7 @@ func getPeerList(_ *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.
|
|||
//export peerList
|
||||
func peerList(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) {
|
||||
HandshakeNet.numPeerlistReceived.Inc()
|
||||
HandshakeNet.heartbeat()
|
||||
|
||||
msg := salticidae.MsgFromC(salticidae.CMsg(_msg))
|
||||
build := Builder{}
|
||||
|
|
|
@ -51,6 +51,7 @@ type Config struct {
|
|||
AdminAPIEnabled bool
|
||||
KeystoreAPIEnabled bool
|
||||
MetricsAPIEnabled bool
|
||||
HealthAPIEnabled bool
|
||||
|
||||
// Logging configuration
|
||||
LoggingConfig logging.Config
|
||||
|
|
20
node/node.go
20
node/node.go
|
@ -17,12 +17,14 @@ import (
|
|||
"os"
|
||||
"path"
|
||||
"sync"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"github.com/ava-labs/salticidae-go"
|
||||
|
||||
"github.com/ava-labs/gecko/api"
|
||||
"github.com/ava-labs/gecko/api/admin"
|
||||
"github.com/ava-labs/gecko/api/health"
|
||||
"github.com/ava-labs/gecko/api/ipcs"
|
||||
"github.com/ava-labs/gecko/api/keystore"
|
||||
"github.com/ava-labs/gecko/api/metrics"
|
||||
|
@ -559,6 +561,19 @@ func (n *Node) initAdminAPI() {
|
|||
}
|
||||
}
|
||||
|
||||
// initHealthAPI initializes the Health API service
|
||||
// Assumes n.Log, n.ConsensusAPI, and n.ValidatorAPI already initialized
|
||||
func (n *Node) initHealthAPI() {
|
||||
if !n.Config.HealthAPIEnabled {
|
||||
return
|
||||
}
|
||||
|
||||
n.Log.Info("initializing Health API")
|
||||
service := health.NewService(n.Log)
|
||||
service.RegisterHeartbeat("network.validators.heartbeat", n.ValidatorAPI, 5*time.Minute)
|
||||
n.APIServer.AddRoute(service.Handler(), &sync.RWMutex{}, "health", "", n.HTTPLog)
|
||||
}
|
||||
|
||||
// initIPCAPI initializes the IPC API service
|
||||
// Assumes n.log and n.chainManager already initialized
|
||||
func (n *Node) initIPCAPI() {
|
||||
|
@ -650,8 +665,9 @@ func (n *Node) Initialize(Config *Config, logger logging.Logger, logFactory logg
|
|||
n.initClients() // Set up the client servers
|
||||
}
|
||||
|
||||
n.initAdminAPI() // Start the Admin API
|
||||
n.initIPCAPI() // Start the IPC API
|
||||
n.initAdminAPI() // Start the Admin API
|
||||
n.initHealthAPI() // Start the Health API
|
||||
n.initIPCAPI() // Start the IPC API
|
||||
|
||||
if err := n.initAliases(); err != nil { // Set up aliases
|
||||
return err
|
||||
|
|
Loading…
Reference in New Issue