From b9ceddd052804f00c2cc0700e961b73733d5e5f0 Mon Sep 17 00:00:00 2001 From: Alex Willmer Date: Wed, 13 May 2020 21:57:17 +0100 Subject: [PATCH 01/11] api: add --http-host to restrict RPC bind address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ± ./build/ava --http-host localhost --public-ip ___ ________ __ ___ / _ \_/\ / _____/ ____ ____ | | ______ / _ \_/\ \/ \___/ / \ ____/ __ \_/ ___\| |/ / _ \ \/ \___/ \ \_\ \ ___/\ \___| < <_> ) \______ /\___ >\___ >__|_ \____/ \/ \/ \/ \/ ... INFO [05-14|21:09:54] /api/server.go#53: API server listening on "localhost:9650" INFO [05-14|21:09:54] /api/server.go#106: adding route /ext/vm/jvYyfQTxGMJLuGWa55kdP2p2zSUYsQ5Raupu4TW34ZAUBAbtq ... The node continues to partcipate in consensus, but RPC calls are restricted to the localhost interface $ ss -lnt | grep 965 LISTEN 0 4096 127.0.0.1:9650 0.0.0.0:* LISTEN 0 10 0.0.0.0:9651 0.0.0.0:* $ curl -X POST --data '{ > "id": '$(date +%s)', > "jsonrpc": "2.0", > "method": "admin.getNodeID", > "params":{} > }' -H 'content-type:application/json;' 127.0.0.1:9650/ext/admin {"jsonrpc":"2.0","result":{"nodeID":"2iEwniZihec5S2anxDpKGenZB7Cs112Ap"},"id":1589486853} $ curl -X POST --data '{ > "id": '$(date +%s)', > "jsonrpc": "2.0", > "method": "admin.getNodeID", > "params":{} > }' -H 'content-type:application/json;' 192.168.43.60:9650/ext/admin curl: (7) Failed to connect to 192.168.43.60 port 9650: Connection refused --- api/server.go | 29 ++++++++++++++++++++--------- api/server_test.go | 2 +- main/params.go | 2 ++ node/config.go | 1 + node/node.go | 2 +- 5 files changed, 25 insertions(+), 11 deletions(-) diff --git a/api/server.go b/api/server.go index ffbdc03..2985395 100644 --- a/api/server.go +++ b/api/server.go @@ -7,6 +7,7 @@ import ( "errors" "fmt" "io" + "net" "net/http" "net/url" "sync" @@ -28,30 +29,40 @@ var ( // Server maintains the HTTP router type Server struct { - log logging.Logger - factory logging.Factory - router *router - portURL string + log logging.Logger + factory logging.Factory + router *router + listenAddress string } -// Initialize creates the API server at the provided port -func (s *Server) Initialize(log logging.Logger, factory logging.Factory, port uint16) { +// Initialize creates the API server at the provided host and port +func (s *Server) Initialize(log logging.Logger, factory logging.Factory, host string, port uint16) { s.log = log s.factory = factory - s.portURL = fmt.Sprintf(":%d", port) + s.listenAddress = fmt.Sprintf("%s:%d", host, port) s.router = newRouter() } // Dispatch starts the API server func (s *Server) Dispatch() error { handler := cors.Default().Handler(s.router) - return http.ListenAndServe(s.portURL, handler) + listener, err := net.Listen("tcp", s.listenAddress) + if err != nil { + return err + } + s.log.Info("API server listening on %q", s.listenAddress) + return http.Serve(listener, handler) } // DispatchTLS starts the API server with the provided TLS certificate func (s *Server) DispatchTLS(certFile, keyFile string) error { handler := cors.Default().Handler(s.router) - return http.ListenAndServeTLS(s.portURL, certFile, keyFile, handler) + listener, err := net.Listen("tcp", s.listenAddress) + if err != nil { + return err + } + s.log.Info("API server listening on %q", s.listenAddress) + return http.ServeTLS(listener, handler, certFile, keyFile) } // RegisterChain registers the API endpoints associated with this chain That diff --git a/api/server_test.go b/api/server_test.go index 75e03ad..dc0ba9c 100644 --- a/api/server_test.go +++ b/api/server_test.go @@ -30,7 +30,7 @@ func (s *Service) Call(_ *http.Request, args *Args, reply *Reply) error { func TestCall(t *testing.T) { s := Server{} - s.Initialize(logging.NoLog{}, logging.NoFactory{}, 8080) + s.Initialize(logging.NoLog{}, logging.NoFactory{}, "localhost", 8080) serv := &Service{} newServer := rpc.NewServer() diff --git a/main/params.go b/main/params.go index c18a937..269b433 100644 --- a/main/params.go +++ b/main/params.go @@ -93,6 +93,7 @@ func init() { consensusIP := fs.String("public-ip", "", "Public IP of this node") // HTTP Server: + httpHost := fs.String("http-host", "", "Address of the HTTP server") httpPort := fs.Uint("http-port", 9650, "Port of the HTTP server") fs.BoolVar(&Config.EnableHTTPS, "http-tls-enabled", false, "Upgrade the HTTP server to HTTPs") fs.StringVar(&Config.HTTPSKeyFile, "http-tls-key-file", "", "TLS private key file for the HTTPs server") @@ -269,6 +270,7 @@ func init() { } // HTTP: + Config.HTTPHost = *httpHost Config.HTTPPort = uint16(*httpPort) // Logging: diff --git a/node/config.go b/node/config.go index b35d997..848f74a 100644 --- a/node/config.go +++ b/node/config.go @@ -42,6 +42,7 @@ type Config struct { BootstrapPeers []*Peer // HTTP configuration + HTTPHost string HTTPPort uint16 EnableHTTPS bool HTTPSKeyFile string diff --git a/node/node.go b/node/node.go index 289de18..672c8ff 100644 --- a/node/node.go +++ b/node/node.go @@ -477,7 +477,7 @@ func (n *Node) initChains() error { func (n *Node) initAPIServer() { n.Log.Info("Initializing API server") - n.APIServer.Initialize(n.Log, n.LogFactory, n.Config.HTTPPort) + n.APIServer.Initialize(n.Log, n.LogFactory, n.Config.HTTPHost, n.Config.HTTPPort) go n.Log.RecoverAndPanic(func() { if n.Config.EnableHTTPS { From 91de93a26a3745c63e81adbe86890a8b24a44f2f Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Mon, 18 May 2020 11:22:15 -0400 Subject: [PATCH 02/11] If staking is disabled, always attempt to reconnect to a peer --- networking/handshake_handlers.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/networking/handshake_handlers.go b/networking/handshake_handlers.go index d82df74..554f4fb 100644 --- a/networking/handshake_handlers.go +++ b/networking/handshake_handlers.go @@ -515,7 +515,7 @@ func (nm *Handshake) disconnectedFromPeer(peer salticidae.PeerID) { nm.connections.Remove(peer, cert) nm.numPeers.Set(float64(nm.connections.Len())) - if nm.vdrs.Contains(cert) { + if !nm.enableStaking || nm.vdrs.Contains(cert) { nm.reconnectTimeout.Put(peerID, func() { nm.pending.Remove(peer, cert) nm.connections.Remove(peer, cert) From 503c90e8399f6fc2e73f79f1768748d238984e7c Mon Sep 17 00:00:00 2001 From: StephenButtolph Date: Mon, 18 May 2020 11:33:58 -0400 Subject: [PATCH 03/11] Clean up the acceptance of atomic blocks --- vms/platformvm/atomic_block.go | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/vms/platformvm/atomic_block.go b/vms/platformvm/atomic_block.go index 20c2892..3293094 100644 --- a/vms/platformvm/atomic_block.go +++ b/vms/platformvm/atomic_block.go @@ -36,7 +36,7 @@ type AtomicTx interface { // AtomicBlock being accepted results in the transaction contained in the // block to be accepted and committed to the chain. type AtomicBlock struct { - SingleDecisionBlock `serialize:"true"` + CommonDecisionBlock `serialize:"true"` Tx AtomicTx `serialize:"true"` @@ -45,7 +45,7 @@ type AtomicBlock struct { // initialize this block func (ab *AtomicBlock) initialize(vm *VM, bytes []byte) error { - if err := ab.SingleDecisionBlock.initialize(vm, bytes); err != nil { + if err := ab.CommonDecisionBlock.initialize(vm, bytes); err != nil { return err } return ab.Tx.initialize(vm) @@ -123,9 +123,6 @@ func (ab *AtomicBlock) Accept() { ab.onAcceptFunc() } - parent := ab.parentBlock() - // remove this block and its parent from memory - parent.free() ab.free() } @@ -133,11 +130,9 @@ func (ab *AtomicBlock) Accept() { // decision block, has ID [parentID]. func (vm *VM) newAtomicBlock(parentID ids.ID, tx AtomicTx) (*AtomicBlock, error) { ab := &AtomicBlock{ - SingleDecisionBlock: SingleDecisionBlock{CommonDecisionBlock: CommonDecisionBlock{ - CommonBlock: CommonBlock{ - Block: core.NewBlock(parentID), - vm: vm, - }, + CommonDecisionBlock: CommonDecisionBlock{CommonBlock: CommonBlock{ + Block: core.NewBlock(parentID), + vm: vm, }}, Tx: tx, } From e9218c733b402561a29eee49aa82a85e697a9501 Mon Sep 17 00:00:00 2001 From: Collin Montag Date: Mon, 18 May 2020 12:18:48 -0400 Subject: [PATCH 04/11] ks importuser password check --- api/keystore/service.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/api/keystore/service.go b/api/keystore/service.go index fe1b349..ec497b5 100644 --- a/api/keystore/service.go +++ b/api/keystore/service.go @@ -266,8 +266,12 @@ func (ks *Keystore) ImportUser(r *http.Request, args *ImportUserArgs, reply *Imp ks.log.Verbo("ImportUser called for %s", args.Username) - if usr, err := ks.getUser(args.Username); err == nil || usr != nil { + usr, err := ks.getUser(args.Username) + switch { + case err == nil || usr != nil: return fmt.Errorf("user already exists: %s", args.Username) + case !usr.CheckPassword(args.Password): + return fmt.Errorf("incorrect password for user %q", args.Username) } userData := UserDB{} From b2a85ccecdcb1a65f1d8a0d8cc1f62603b01b550 Mon Sep 17 00:00:00 2001 From: Collin Montag Date: Mon, 18 May 2020 14:19:15 -0400 Subject: [PATCH 05/11] ks importuser fix --- api/keystore/service.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/api/keystore/service.go b/api/keystore/service.go index ec497b5..bba5576 100644 --- a/api/keystore/service.go +++ b/api/keystore/service.go @@ -266,18 +266,17 @@ func (ks *Keystore) ImportUser(r *http.Request, args *ImportUserArgs, reply *Imp ks.log.Verbo("ImportUser called for %s", args.Username) - usr, err := ks.getUser(args.Username) - switch { - case err == nil || usr != nil: + if usr, err := ks.getUser(args.Username); err == nil || usr != nil { return fmt.Errorf("user already exists: %s", args.Username) - case !usr.CheckPassword(args.Password): - return fmt.Errorf("incorrect password for user %q", args.Username) } userData := UserDB{} if err := ks.codec.Unmarshal(args.User.Bytes, &userData); err != nil { return err } + if !userData.User.CheckPassword(args.Password) { + return fmt.Errorf("incorrect password for %s", args.Username) + } usrBytes, err := ks.codec.Marshal(&userData.User) if err != nil { From f655592a1c1ac35c3885bc01faa68b5fb3736ef7 Mon Sep 17 00:00:00 2001 From: Collin Montag Date: Mon, 18 May 2020 14:36:37 -0400 Subject: [PATCH 06/11] standard password error --- api/keystore/service.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/keystore/service.go b/api/keystore/service.go index bba5576..5575083 100644 --- a/api/keystore/service.go +++ b/api/keystore/service.go @@ -218,7 +218,7 @@ func (ks *Keystore) ExportUser(_ *http.Request, args *ExportUserArgs, reply *Exp return err } if !usr.CheckPassword(args.Password) { - return fmt.Errorf("incorrect password for %s", args.Username) + return fmt.Errorf("incorrect password for user %q", args.Username) } userDB := prefixdb.New([]byte(args.Username), ks.bcDB) @@ -275,7 +275,7 @@ func (ks *Keystore) ImportUser(r *http.Request, args *ImportUserArgs, reply *Imp return err } if !userData.User.CheckPassword(args.Password) { - return fmt.Errorf("incorrect password for %s", args.Username) + return fmt.Errorf("incorrect password for user %q", args.Username) } usrBytes, err := ks.codec.Marshal(&userData.User) @@ -386,7 +386,7 @@ func (ks *Keystore) GetDatabase(bID ids.ID, username, password string) (database return nil, err } if !usr.CheckPassword(password) { - return nil, fmt.Errorf("incorrect password for user '%s'", username) + return nil, fmt.Errorf("incorrect password for user %q", username) } userDB := prefixdb.New([]byte(username), ks.bcDB) From e2ba60d3aceb0dbc308dfd7ddbdb15a7e6a3503b Mon Sep 17 00:00:00 2001 From: Collin Montag Date: Mon, 18 May 2020 15:07:23 -0400 Subject: [PATCH 07/11] added importuser test --- api/keystore/service_test.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/api/keystore/service_test.go b/api/keystore/service_test.go index 75b7166..b4c805d 100644 --- a/api/keystore/service_test.go +++ b/api/keystore/service_test.go @@ -255,6 +255,17 @@ func TestServiceExportImport(t *testing.T) { newKS := Keystore{} newKS.Initialize(logging.NoLog{}, memdb.New()) + { + reply := ImportUserReply{} + if err := newKS.ImportUser(nil, &ImportUserArgs{ + Username: "bob", + Password: "", + User: exportReply.User, + }, &reply); err == nil { + t.Fatal("Should have errored due to incorrect password") + } + } + { reply := ImportUserReply{} if err := newKS.ImportUser(nil, &ImportUserArgs{ From fcd13e7d902d9910c57f03bc7145ad659d81cde7 Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Tue, 5 May 2020 22:44:45 -0700 Subject: [PATCH 08/11] FEATURE: Add Health API. --- api/health/checks.go | 74 ++++++++++++++++++++++++++++++ api/health/service.go | 79 ++++++++++++++++++++++++++++++++ main/params.go | 1 + networking/handshake_handlers.go | 18 ++++++++ node/config.go | 1 + node/node.go | 20 +++++++- 6 files changed, 191 insertions(+), 2 deletions(-) create mode 100644 api/health/checks.go create mode 100644 api/health/service.go diff --git a/api/health/checks.go b/api/health/checks.go new file mode 100644 index 0000000..5a6a569 --- /dev/null +++ b/api/health/checks.go @@ -0,0 +1,74 @@ +// (c) 2020, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package health + +import ( + "errors" + "time" +) + +var ( + // ErrHeartbeatNotDetected is returned from a HeartbeatCheckFn when the + // heartbeat has not been detected recently enough + ErrHeartbeatNotDetected = errors.New("heartbeat not detected") +) + +// CheckFn returns optional status information and an error indicating health or +// non-health +type CheckFn func() (interface{}, error) + +// Check defines a single health check that we want to monitor and consider as +// part of our wider healthiness +type Check struct { + // Name is the identifier for this check and must be unique among all Checks + Name string + + // CheckFn is the function to call to perform the the health check + CheckFn CheckFn + + // ExecutionPeriod is the duration to wait between executions of this Check + ExecutionPeriod time.Duration + + // InitialDelay is the duration to wait before executing the first time + InitialDelay time.Duration + + // InitiallyPassing is whether or not to consider the Check healthy before the + // initial execution + InitiallyPassing bool +} + +// gosundheitCheck implements the health.Check interface backed by a CheckFn +type gosundheitCheck struct { + name string + checkFn CheckFn +} + +// Name implements the health.Check interface by returning a unique name +func (c gosundheitCheck) Name() string { return c.name } + +// Execute implements the health.Check interface by executing the checkFn and +// returning the results +func (c gosundheitCheck) Execute() (interface{}, error) { return c.checkFn() } + +// heartbeater provides a getter to the most recently observed heartbeat +type heartbeater interface { + GetHeartbeat() time.Time +} + +// HeartbeatCheckFn returns a CheckFn that checks the given heartbeater has +// pulsed within the given duration +func HeartbeatCheckFn(hb heartbeater, max time.Duration) CheckFn { + return func() (data interface{}, err error) { + // Get the heartbeat and create a data set to return to the caller + hb := hb.GetHeartbeat() + data = map[string]int64{"heartbeat": hb.UTC().Unix()} + + // If the current time is after the last known heartbeat + the limit then + // mark our check as failed + if hb.Add(max).Before(time.Now()) { + err = ErrHeartbeatNotDetected + } + return data, err + } +} diff --git a/api/health/service.go b/api/health/service.go new file mode 100644 index 0000000..c1f0e40 --- /dev/null +++ b/api/health/service.go @@ -0,0 +1,79 @@ +// (c) 2020, Ava Labs, Inc. All rights reserved. +// See the file LICENSE for licensing terms. + +package health + +import ( + "net/http" + "time" + + "github.com/AppsFlyer/go-sundheit" + "github.com/ava-labs/gecko/snow/engine/common" + "github.com/ava-labs/gecko/utils/json" + "github.com/ava-labs/gecko/utils/logging" + "github.com/gorilla/rpc/v2" +) + +// defaultCheckOpts is a Check whose properties represent a default Check +var defaultCheckOpts = Check{ExecutionPeriod: time.Minute} + +// Health observes a set of vital signs and makes them +type Health struct { + log logging.Logger + health health.Health +} + +// NewService creates a new Health service +func NewService(log logging.Logger) *Health { + return &Health{log, health.New()} +} + +// Handler returns an HTTPHandler providing RPC access to the Health service +func (h *Health) Handler() *common.HTTPHandler { + newServer := rpc.NewServer() + codec := json.NewCodec() + newServer.RegisterCodec(codec, "application/json") + newServer.RegisterCodec(codec, "application/json;charset=UTF-8") + newServer.RegisterService(h, "health") + return &common.HTTPHandler{LockOptions: common.NoLock, Handler: newServer} +} + +// RegisterHeartbeat adds a check with default options and a CheckFn that checks +// the given heartbeater for a recent heartbeat +func (h *Health) RegisterHeartbeat(name string, hb heartbeater, max time.Duration) error { + return h.RegisterCheckFunc(name, HeartbeatCheckFn(hb, max)) +} + +// RegisterCheckFunc adds a Check with default options and the given CheckFn +func (h *Health) RegisterCheckFunc(name string, checkFn CheckFn) error { + check := defaultCheckOpts + check.Name = name + check.CheckFn = checkFn + return h.RegisterCheck(check) +} + +// RegisterCheck adds the given Check +func (h *Health) RegisterCheck(c Check) error { + return h.health.RegisterCheck(&health.Config{ + InitialDelay: c.InitialDelay, + ExecutionPeriod: c.ExecutionPeriod, + InitiallyPassing: c.InitiallyPassing, + Check: gosundheitCheck{c.Name, c.CheckFn}, + }) +} + +// GetHealthArgs are the arguments for GetHealth +type GetHealthArgs struct{} + +// GetHealthReply is the response for GetHealth +type GetHealthReply struct { + Checks map[string]health.Result `json:"checks"` + Healthy bool `json:"healthy"` +} + +// GetHealth returns a summation of the health of the node +func (service *Health) GetHealth(_ *http.Request, _ *GetHealthArgs, reply *GetHealthReply) error { + service.log.Debug("Health: GetHealth called") + reply.Checks, reply.Healthy = service.health.Results() + return nil +} diff --git a/main/params.go b/main/params.go index c18a937..c4c2d8a 100644 --- a/main/params.go +++ b/main/params.go @@ -128,6 +128,7 @@ func init() { fs.BoolVar(&Config.AdminAPIEnabled, "api-admin-enabled", true, "If true, this node exposes the Admin API") fs.BoolVar(&Config.KeystoreAPIEnabled, "api-keystore-enabled", true, "If true, this node exposes the Keystore API") fs.BoolVar(&Config.MetricsAPIEnabled, "api-metrics-enabled", true, "If true, this node exposes the Metrics API") + fs.BoolVar(&Config.HealthAPIEnabled, "api-health-enabled", true, "If true, this node exposes the Health API") fs.BoolVar(&Config.IPCEnabled, "api-ipcs-enabled", false, "If true, IPCs can be opened") // Throughput Server diff --git a/networking/handshake_handlers.go b/networking/handshake_handlers.go index 554f4fb..2fa7645 100644 --- a/networking/handshake_handlers.go +++ b/networking/handshake_handlers.go @@ -147,6 +147,8 @@ type Handshake struct { // If any chain is blocked on connecting to peers, track these blockers here awaitingLock sync.Mutex awaiting []*networking.AwaitingConnections + + lastHeartbeat time.Time } // Initialize to the c networking library. This should only be done once during @@ -201,6 +203,8 @@ func (nm *Handshake) Initialize( net.RegHandler(Version, salticidae.MsgNetworkMsgCallback(C.version), nil) net.RegHandler(GetPeerList, salticidae.MsgNetworkMsgCallback(C.getPeerList), nil) net.RegHandler(PeerList, salticidae.MsgNetworkMsgCallback(C.peerList), nil) + + nm.heartbeat() } // ConnectTo add the peer as a connection and connects to them. @@ -593,6 +597,16 @@ func (nm *Handshake) checkCompatibility(peerVersion string) bool { return true } +// heartbeat registers a new heartbeat to signal liveness +func (nm *Handshake) heartbeat() { + nm.lastHeartbeat = nm.clock.Time() +} + +// GetHeartbeat returns the most recent heartbeat time +func (nm *Handshake) GetHeartbeat() time.Time { + return nm.lastHeartbeat +} + // peerHandler notifies a change to the set of connected peers // connected is true if a new peer is connected // connected is false if a formerly connected peer has disconnected @@ -667,6 +681,7 @@ func pong(*C.struct_msg_t, *C.struct_msgnetwork_conn_t, unsafe.Pointer) {} //export getVersion func getVersion(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) { HandshakeNet.numGetVersionReceived.Inc() + HandshakeNet.heartbeat() conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn)) peer := conn.GetPeerID(false) @@ -679,6 +694,7 @@ func getVersion(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsaf //export version func version(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) { HandshakeNet.numVersionReceived.Inc() + HandshakeNet.heartbeat() msg := salticidae.MsgFromC(salticidae.CMsg(_msg)) conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn)) @@ -763,6 +779,7 @@ func version(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.P //export getPeerList func getPeerList(_ *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) { HandshakeNet.numGetPeerlistReceived.Inc() + HandshakeNet.heartbeat() conn := salticidae.PeerNetworkConnFromC(salticidae.CPeerNetworkConn(_conn)) peer := conn.GetPeerID(false) @@ -775,6 +792,7 @@ func getPeerList(_ *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe. //export peerList func peerList(_msg *C.struct_msg_t, _conn *C.struct_msgnetwork_conn_t, _ unsafe.Pointer) { HandshakeNet.numPeerlistReceived.Inc() + HandshakeNet.heartbeat() msg := salticidae.MsgFromC(salticidae.CMsg(_msg)) build := Builder{} diff --git a/node/config.go b/node/config.go index b35d997..9612746 100644 --- a/node/config.go +++ b/node/config.go @@ -51,6 +51,7 @@ type Config struct { AdminAPIEnabled bool KeystoreAPIEnabled bool MetricsAPIEnabled bool + HealthAPIEnabled bool // Logging configuration LoggingConfig logging.Config diff --git a/node/node.go b/node/node.go index 289de18..424f044 100644 --- a/node/node.go +++ b/node/node.go @@ -17,12 +17,14 @@ import ( "os" "path" "sync" + "time" "unsafe" "github.com/ava-labs/salticidae-go" "github.com/ava-labs/gecko/api" "github.com/ava-labs/gecko/api/admin" + "github.com/ava-labs/gecko/api/health" "github.com/ava-labs/gecko/api/ipcs" "github.com/ava-labs/gecko/api/keystore" "github.com/ava-labs/gecko/api/metrics" @@ -559,6 +561,19 @@ func (n *Node) initAdminAPI() { } } +// initHealthAPI initializes the Health API service +// Assumes n.Log, n.ConsensusAPI, and n.ValidatorAPI already initialized +func (n *Node) initHealthAPI() { + if !n.Config.HealthAPIEnabled { + return + } + + n.Log.Info("initializing Health API") + service := health.NewService(n.Log) + service.RegisterHeartbeat("network.validators.heartbeat", n.ValidatorAPI, 5*time.Minute) + n.APIServer.AddRoute(service.Handler(), &sync.RWMutex{}, "health", "", n.HTTPLog) +} + // initIPCAPI initializes the IPC API service // Assumes n.log and n.chainManager already initialized func (n *Node) initIPCAPI() { @@ -650,8 +665,9 @@ func (n *Node) Initialize(Config *Config, logger logging.Logger, logFactory logg n.initClients() // Set up the client servers } - n.initAdminAPI() // Start the Admin API - n.initIPCAPI() // Start the IPC API + n.initAdminAPI() // Start the Admin API + n.initHealthAPI() // Start the Health API + n.initIPCAPI() // Start the IPC API if err := n.initAliases(); err != nil { // Set up aliases return err From 14daa76e922ca8d7801141ed2cfcf57e89722e3f Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Thu, 7 May 2020 11:00:25 -0700 Subject: [PATCH 09/11] TWEAK: Rename getHealth to getLiveness. This enables a future getReady that has different checks. --- api/health/service.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/api/health/service.go b/api/health/service.go index c1f0e40..54b07ab 100644 --- a/api/health/service.go +++ b/api/health/service.go @@ -62,18 +62,18 @@ func (h *Health) RegisterCheck(c Check) error { }) } -// GetHealthArgs are the arguments for GetHealth -type GetHealthArgs struct{} +// GetLivenessArgs are the arguments for GetLiveness +type GetLivenessArgs struct{} -// GetHealthReply is the response for GetHealth -type GetHealthReply struct { +// GetLivenessReply is the response for GetLiveness +type GetLivenessReply struct { Checks map[string]health.Result `json:"checks"` Healthy bool `json:"healthy"` } -// GetHealth returns a summation of the health of the node -func (service *Health) GetHealth(_ *http.Request, _ *GetHealthArgs, reply *GetHealthReply) error { - service.log.Debug("Health: GetHealth called") +// GetLiveness returns a summation of the health of the node +func (service *Health) GetLiveness(_ *http.Request, _ *GetLivenessArgs, reply *GetLivenessReply) error { + service.log.Debug("Health: GetLiveness called") reply.Checks, reply.Healthy = service.health.Results() return nil } From 99aa6d372431dc7f2f0347f21400c37c56978765 Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Thu, 7 May 2020 13:27:18 -0700 Subject: [PATCH 10/11] BUGFIX: Use atomic reads/writes for heartbeats. --- api/health/checks.go | 6 +++--- networking/handshake_handlers.go | 9 +++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/api/health/checks.go b/api/health/checks.go index 5a6a569..1278f21 100644 --- a/api/health/checks.go +++ b/api/health/checks.go @@ -53,7 +53,7 @@ func (c gosundheitCheck) Execute() (interface{}, error) { return c.checkFn() } // heartbeater provides a getter to the most recently observed heartbeat type heartbeater interface { - GetHeartbeat() time.Time + GetHeartbeat() int64 } // HeartbeatCheckFn returns a CheckFn that checks the given heartbeater has @@ -62,11 +62,11 @@ func HeartbeatCheckFn(hb heartbeater, max time.Duration) CheckFn { return func() (data interface{}, err error) { // Get the heartbeat and create a data set to return to the caller hb := hb.GetHeartbeat() - data = map[string]int64{"heartbeat": hb.UTC().Unix()} + data = map[string]int64{"heartbeat": hb} // If the current time is after the last known heartbeat + the limit then // mark our check as failed - if hb.Add(max).Before(time.Now()) { + if time.Unix(hb, 0).Add(max).Before(time.Now()) { err = ErrHeartbeatNotDetected } return data, err diff --git a/networking/handshake_handlers.go b/networking/handshake_handlers.go index 2fa7645..a1dbddc 100644 --- a/networking/handshake_handlers.go +++ b/networking/handshake_handlers.go @@ -22,6 +22,7 @@ import ( "strconv" "strings" "sync" + "sync/atomic" "time" "unsafe" @@ -148,7 +149,7 @@ type Handshake struct { awaitingLock sync.Mutex awaiting []*networking.AwaitingConnections - lastHeartbeat time.Time + lastHeartbeat int64 } // Initialize to the c networking library. This should only be done once during @@ -599,12 +600,12 @@ func (nm *Handshake) checkCompatibility(peerVersion string) bool { // heartbeat registers a new heartbeat to signal liveness func (nm *Handshake) heartbeat() { - nm.lastHeartbeat = nm.clock.Time() + atomic.StoreInt64(&nm.lastHeartbeat, nm.clock.Time().Unix()) } // GetHeartbeat returns the most recent heartbeat time -func (nm *Handshake) GetHeartbeat() time.Time { - return nm.lastHeartbeat +func (nm *Handshake) GetHeartbeat() int64 { + return atomic.LoadInt64(&nm.lastHeartbeat) } // peerHandler notifies a change to the set of connected peers From 26fc63429c76d0e357c309d761aa7dd0182f05cb Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Tue, 19 May 2020 11:18:02 -0700 Subject: [PATCH 11/11] CLEANUP: Fix godoc string. --- api/health/service.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/health/service.go b/api/health/service.go index 54b07ab..2c2789d 100644 --- a/api/health/service.go +++ b/api/health/service.go @@ -17,7 +17,8 @@ import ( // defaultCheckOpts is a Check whose properties represent a default Check var defaultCheckOpts = Check{ExecutionPeriod: time.Minute} -// Health observes a set of vital signs and makes them +// Health observes a set of vital signs and makes them available through an HTTP +// API. type Health struct { log logging.Logger health health.Health