Added support for getHealth method, introducing solana_is_healthy, solana_num_slots_behind

(after refactoring)
This commit is contained in:
l.subbotin 2024-10-18 11:17:07 +02:00
parent 7be0a718f7
commit 0a0782bb25
No known key found for this signature in database
GPG Key ID: EBEF03B4E1049D77
4 changed files with 121 additions and 0 deletions

View File

@ -221,6 +221,7 @@ func main() {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
go slotWatcher.WatchSlots(ctx, collector.slotPace)
go collector.WatchHealth(context.Background())
prometheus.MustRegister(collector)
http.Handle("/metrics", promhttp.Handler())

View File

@ -0,0 +1,85 @@
package main
import (
"context"
"errors"
"fmt"
"github.com/asymmetric-research/solana_exporter/pkg/rpc"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog/v2"
"strconv"
"time"
)
var (
isHealthy = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "solana_is_healthy",
Help: "Is node healthy",
})
numSlotsBehind = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "solana_num_slots_behind",
Help: "Number of slots behind",
})
)
func init() {
prometheus.MustRegister(isHealthy)
prometheus.MustRegister(numSlotsBehind)
}
func extractNumSlotsBehind(data map[string]any) (int, error) {
if val, ok := data["NumSlotsBehind"]; ok {
// Type assert if it's a float64 (common for numbers in JSON)
switch v := val.(type) {
case float64:
return int(v), nil
case string:
// If it's a string, try to convert it to an int
num, err := strconv.Atoi(v)
if err != nil {
return 0, fmt.Errorf("failed to convert string to int: %w", err)
}
return num, nil
default:
return 0, fmt.Errorf("unexpected type for NumSlotsBehind: %T", v)
}
}
return 0, fmt.Errorf("NumSlotsBehind key not found in data")
}
func (c *SolanaCollector) WatchHealth(ctx context.Context) {
ticker := time.NewTicker(slotPacerSchedule)
for {
<-ticker.C
// Get current slot height and epoch info
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
_, err := c.rpcClient.GetHealth(ctx)
if err != nil {
klog.Infof("failed to fetch info info, retrying: %v", err)
cancel()
continue
}
cancel()
isNodeHealthy := 1
nodeNumSlotsBehind := 0
if err != nil {
var rpcError *rpc.RPCError
if errors.As(err, &rpcError) {
if rpcError.Code != 0 {
isNodeHealthy = 0
}
nodeNumSlotsBehind, _ = extractNumSlotsBehind(rpcError.Data)
}
}
isHealthy.Set(float64(isNodeHealthy))
numSlotsBehind.Set(float64(nodeNumSlotsBehind))
}
}

View File

@ -73,6 +73,7 @@ type Provider interface {
GetLeaderSchedule(ctx context.Context, commitment Commitment, slot int64) (map[string][]int64, error)
GetBlock(ctx context.Context, commitment Commitment, slot int64, transactionDetails string) (*Block, error)
GetHealth(ctx context.Context) (*string, error)
}
func (c Commitment) MarshalJSON() ([]byte, error) {

34
pkg/rpc/gethealth.go Normal file
View File

@ -0,0 +1,34 @@
package rpc
import (
"context"
)
type (
ErrorData struct {
NumSlotsBehind int64 `json:"numSlotsBehind"`
}
GetHealthRpcError struct {
Message string `json:"message"`
Data ErrorData `json:"data"`
Code int64 `json:"code"`
}
getHealthResponse struct {
jsonrpc string
Result string `json:"result"`
Error RPCError `json:"error"`
Id int `json:"id"`
}
)
// https://docs.solana.com/developing/clients/jsonrpc-api#gethealth
func (c *Client) GetHealth(ctx context.Context) (*string, error) {
var resp response[string]
if err := getResponse(ctx, c, "getHealth", []any{}, &resp); err != nil {
return nil, err
}
return &resp.Result, nil
}