From 0a0782bb254a793f9179d959d22dd8da081a5323 Mon Sep 17 00:00:00 2001 From: "l.subbotin" Date: Fri, 18 Oct 2024 11:17:07 +0200 Subject: [PATCH] Added support for getHealth method, introducing solana_is_healthy, solana_num_slots_behind (after refactoring) --- cmd/solana_exporter/exporter.go | 1 + cmd/solana_exporter/health.go | 85 +++++++++++++++++++++++++++++++++ pkg/rpc/client.go | 1 + pkg/rpc/gethealth.go | 34 +++++++++++++ 4 files changed, 121 insertions(+) create mode 100644 cmd/solana_exporter/health.go create mode 100644 pkg/rpc/gethealth.go diff --git a/cmd/solana_exporter/exporter.go b/cmd/solana_exporter/exporter.go index eb7cc2e..4070c0b 100644 --- a/cmd/solana_exporter/exporter.go +++ b/cmd/solana_exporter/exporter.go @@ -221,6 +221,7 @@ func main() { ctx, cancel := context.WithCancel(ctx) defer cancel() go slotWatcher.WatchSlots(ctx, collector.slotPace) + go collector.WatchHealth(context.Background()) prometheus.MustRegister(collector) http.Handle("/metrics", promhttp.Handler()) diff --git a/cmd/solana_exporter/health.go b/cmd/solana_exporter/health.go new file mode 100644 index 0000000..42fe8eb --- /dev/null +++ b/cmd/solana_exporter/health.go @@ -0,0 +1,85 @@ +package main + +import ( + "context" + "errors" + "fmt" + "github.com/asymmetric-research/solana_exporter/pkg/rpc" + "github.com/prometheus/client_golang/prometheus" + "k8s.io/klog/v2" + "strconv" + "time" +) + +var ( + isHealthy = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "solana_is_healthy", + Help: "Is node healthy", + }) + + numSlotsBehind = prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "solana_num_slots_behind", + Help: "Number of slots behind", + }) +) + +func init() { + prometheus.MustRegister(isHealthy) + prometheus.MustRegister(numSlotsBehind) + +} +func extractNumSlotsBehind(data map[string]any) (int, error) { + if val, ok := data["NumSlotsBehind"]; ok { + // Type assert if it's a float64 (common for numbers in JSON) + switch v := val.(type) { + case float64: + return int(v), nil + case string: + // If it's a string, try to convert it to an int + num, err := strconv.Atoi(v) + if err != nil { + return 0, fmt.Errorf("failed to convert string to int: %w", err) + } + return num, nil + default: + return 0, fmt.Errorf("unexpected type for NumSlotsBehind: %T", v) + } + } + + return 0, fmt.Errorf("NumSlotsBehind key not found in data") +} + +func (c *SolanaCollector) WatchHealth(ctx context.Context) { + ticker := time.NewTicker(slotPacerSchedule) + + for { + <-ticker.C + + // Get current slot height and epoch info + + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) + _, err := c.rpcClient.GetHealth(ctx) + if err != nil { + klog.Infof("failed to fetch info info, retrying: %v", err) + cancel() + continue + } + cancel() + isNodeHealthy := 1 + nodeNumSlotsBehind := 0 + if err != nil { + var rpcError *rpc.RPCError + if errors.As(err, &rpcError) { + if rpcError.Code != 0 { + isNodeHealthy = 0 + } + nodeNumSlotsBehind, _ = extractNumSlotsBehind(rpcError.Data) + + } + + } + isHealthy.Set(float64(isNodeHealthy)) + + numSlotsBehind.Set(float64(nodeNumSlotsBehind)) + } +} diff --git a/pkg/rpc/client.go b/pkg/rpc/client.go index a4817d1..5db9e63 100644 --- a/pkg/rpc/client.go +++ b/pkg/rpc/client.go @@ -73,6 +73,7 @@ type Provider interface { GetLeaderSchedule(ctx context.Context, commitment Commitment, slot int64) (map[string][]int64, error) GetBlock(ctx context.Context, commitment Commitment, slot int64, transactionDetails string) (*Block, error) + GetHealth(ctx context.Context) (*string, error) } func (c Commitment) MarshalJSON() ([]byte, error) { diff --git a/pkg/rpc/gethealth.go b/pkg/rpc/gethealth.go new file mode 100644 index 0000000..423f625 --- /dev/null +++ b/pkg/rpc/gethealth.go @@ -0,0 +1,34 @@ +package rpc + +import ( + "context" +) + +type ( + ErrorData struct { + NumSlotsBehind int64 `json:"numSlotsBehind"` + } + + GetHealthRpcError struct { + Message string `json:"message"` + Data ErrorData `json:"data"` + Code int64 `json:"code"` + } + + getHealthResponse struct { + jsonrpc string + Result string `json:"result"` + Error RPCError `json:"error"` + Id int `json:"id"` + } +) + +// https://docs.solana.com/developing/clients/jsonrpc-api#gethealth +func (c *Client) GetHealth(ctx context.Context) (*string, error) { + var resp response[string] + + if err := getResponse(ctx, c, "getHealth", []any{}, &resp); err != nil { + return nil, err + } + return &resp.Result, nil +}