Added support for getHealth method, introducing solana_is_healthy, solana_num_slots_behind
(after refactoring)
This commit is contained in:
parent
7be0a718f7
commit
0a0782bb25
|
@ -221,6 +221,7 @@ func main() {
|
|||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
go slotWatcher.WatchSlots(ctx, collector.slotPace)
|
||||
go collector.WatchHealth(context.Background())
|
||||
|
||||
prometheus.MustRegister(collector)
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
|
|
|
@ -0,0 +1,85 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/asymmetric-research/solana_exporter/pkg/rpc"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"k8s.io/klog/v2"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
isHealthy = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "solana_is_healthy",
|
||||
Help: "Is node healthy",
|
||||
})
|
||||
|
||||
numSlotsBehind = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "solana_num_slots_behind",
|
||||
Help: "Number of slots behind",
|
||||
})
|
||||
)
|
||||
|
||||
func init() {
|
||||
prometheus.MustRegister(isHealthy)
|
||||
prometheus.MustRegister(numSlotsBehind)
|
||||
|
||||
}
|
||||
func extractNumSlotsBehind(data map[string]any) (int, error) {
|
||||
if val, ok := data["NumSlotsBehind"]; ok {
|
||||
// Type assert if it's a float64 (common for numbers in JSON)
|
||||
switch v := val.(type) {
|
||||
case float64:
|
||||
return int(v), nil
|
||||
case string:
|
||||
// If it's a string, try to convert it to an int
|
||||
num, err := strconv.Atoi(v)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("failed to convert string to int: %w", err)
|
||||
}
|
||||
return num, nil
|
||||
default:
|
||||
return 0, fmt.Errorf("unexpected type for NumSlotsBehind: %T", v)
|
||||
}
|
||||
}
|
||||
|
||||
return 0, fmt.Errorf("NumSlotsBehind key not found in data")
|
||||
}
|
||||
|
||||
func (c *SolanaCollector) WatchHealth(ctx context.Context) {
|
||||
ticker := time.NewTicker(slotPacerSchedule)
|
||||
|
||||
for {
|
||||
<-ticker.C
|
||||
|
||||
// Get current slot height and epoch info
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||
_, err := c.rpcClient.GetHealth(ctx)
|
||||
if err != nil {
|
||||
klog.Infof("failed to fetch info info, retrying: %v", err)
|
||||
cancel()
|
||||
continue
|
||||
}
|
||||
cancel()
|
||||
isNodeHealthy := 1
|
||||
nodeNumSlotsBehind := 0
|
||||
if err != nil {
|
||||
var rpcError *rpc.RPCError
|
||||
if errors.As(err, &rpcError) {
|
||||
if rpcError.Code != 0 {
|
||||
isNodeHealthy = 0
|
||||
}
|
||||
nodeNumSlotsBehind, _ = extractNumSlotsBehind(rpcError.Data)
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
isHealthy.Set(float64(isNodeHealthy))
|
||||
|
||||
numSlotsBehind.Set(float64(nodeNumSlotsBehind))
|
||||
}
|
||||
}
|
|
@ -73,6 +73,7 @@ type Provider interface {
|
|||
GetLeaderSchedule(ctx context.Context, commitment Commitment, slot int64) (map[string][]int64, error)
|
||||
|
||||
GetBlock(ctx context.Context, commitment Commitment, slot int64, transactionDetails string) (*Block, error)
|
||||
GetHealth(ctx context.Context) (*string, error)
|
||||
}
|
||||
|
||||
func (c Commitment) MarshalJSON() ([]byte, error) {
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
package rpc
|
||||
|
||||
import (
|
||||
"context"
|
||||
)
|
||||
|
||||
type (
|
||||
ErrorData struct {
|
||||
NumSlotsBehind int64 `json:"numSlotsBehind"`
|
||||
}
|
||||
|
||||
GetHealthRpcError struct {
|
||||
Message string `json:"message"`
|
||||
Data ErrorData `json:"data"`
|
||||
Code int64 `json:"code"`
|
||||
}
|
||||
|
||||
getHealthResponse struct {
|
||||
jsonrpc string
|
||||
Result string `json:"result"`
|
||||
Error RPCError `json:"error"`
|
||||
Id int `json:"id"`
|
||||
}
|
||||
)
|
||||
|
||||
// https://docs.solana.com/developing/clients/jsonrpc-api#gethealth
|
||||
func (c *Client) GetHealth(ctx context.Context) (*string, error) {
|
||||
var resp response[string]
|
||||
|
||||
if err := getResponse(ctx, c, "getHealth", []any{}, &resp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &resp.Result, nil
|
||||
}
|
Loading…
Reference in New Issue