Added support for getHealth method, introducing solana_is_healthy, solana_num_slots_behind
(after refactoring)
This commit is contained in:
parent
7be0a718f7
commit
0a0782bb25
|
@ -221,6 +221,7 @@ func main() {
|
||||||
ctx, cancel := context.WithCancel(ctx)
|
ctx, cancel := context.WithCancel(ctx)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
go slotWatcher.WatchSlots(ctx, collector.slotPace)
|
go slotWatcher.WatchSlots(ctx, collector.slotPace)
|
||||||
|
go collector.WatchHealth(context.Background())
|
||||||
|
|
||||||
prometheus.MustRegister(collector)
|
prometheus.MustRegister(collector)
|
||||||
http.Handle("/metrics", promhttp.Handler())
|
http.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"github.com/asymmetric-research/solana_exporter/pkg/rpc"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"k8s.io/klog/v2"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
isHealthy = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||||
|
Name: "solana_is_healthy",
|
||||||
|
Help: "Is node healthy",
|
||||||
|
})
|
||||||
|
|
||||||
|
numSlotsBehind = prometheus.NewGauge(prometheus.GaugeOpts{
|
||||||
|
Name: "solana_num_slots_behind",
|
||||||
|
Help: "Number of slots behind",
|
||||||
|
})
|
||||||
|
)
|
||||||
|
|
||||||
|
func init() {
|
||||||
|
prometheus.MustRegister(isHealthy)
|
||||||
|
prometheus.MustRegister(numSlotsBehind)
|
||||||
|
|
||||||
|
}
|
||||||
|
func extractNumSlotsBehind(data map[string]any) (int, error) {
|
||||||
|
if val, ok := data["NumSlotsBehind"]; ok {
|
||||||
|
// Type assert if it's a float64 (common for numbers in JSON)
|
||||||
|
switch v := val.(type) {
|
||||||
|
case float64:
|
||||||
|
return int(v), nil
|
||||||
|
case string:
|
||||||
|
// If it's a string, try to convert it to an int
|
||||||
|
num, err := strconv.Atoi(v)
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("failed to convert string to int: %w", err)
|
||||||
|
}
|
||||||
|
return num, nil
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unexpected type for NumSlotsBehind: %T", v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0, fmt.Errorf("NumSlotsBehind key not found in data")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *SolanaCollector) WatchHealth(ctx context.Context) {
|
||||||
|
ticker := time.NewTicker(slotPacerSchedule)
|
||||||
|
|
||||||
|
for {
|
||||||
|
<-ticker.C
|
||||||
|
|
||||||
|
// Get current slot height and epoch info
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
|
||||||
|
_, err := c.rpcClient.GetHealth(ctx)
|
||||||
|
if err != nil {
|
||||||
|
klog.Infof("failed to fetch info info, retrying: %v", err)
|
||||||
|
cancel()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
cancel()
|
||||||
|
isNodeHealthy := 1
|
||||||
|
nodeNumSlotsBehind := 0
|
||||||
|
if err != nil {
|
||||||
|
var rpcError *rpc.RPCError
|
||||||
|
if errors.As(err, &rpcError) {
|
||||||
|
if rpcError.Code != 0 {
|
||||||
|
isNodeHealthy = 0
|
||||||
|
}
|
||||||
|
nodeNumSlotsBehind, _ = extractNumSlotsBehind(rpcError.Data)
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
isHealthy.Set(float64(isNodeHealthy))
|
||||||
|
|
||||||
|
numSlotsBehind.Set(float64(nodeNumSlotsBehind))
|
||||||
|
}
|
||||||
|
}
|
|
@ -73,6 +73,7 @@ type Provider interface {
|
||||||
GetLeaderSchedule(ctx context.Context, commitment Commitment, slot int64) (map[string][]int64, error)
|
GetLeaderSchedule(ctx context.Context, commitment Commitment, slot int64) (map[string][]int64, error)
|
||||||
|
|
||||||
GetBlock(ctx context.Context, commitment Commitment, slot int64, transactionDetails string) (*Block, error)
|
GetBlock(ctx context.Context, commitment Commitment, slot int64, transactionDetails string) (*Block, error)
|
||||||
|
GetHealth(ctx context.Context) (*string, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Commitment) MarshalJSON() ([]byte, error) {
|
func (c Commitment) MarshalJSON() ([]byte, error) {
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
package rpc
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
)
|
||||||
|
|
||||||
|
type (
|
||||||
|
ErrorData struct {
|
||||||
|
NumSlotsBehind int64 `json:"numSlotsBehind"`
|
||||||
|
}
|
||||||
|
|
||||||
|
GetHealthRpcError struct {
|
||||||
|
Message string `json:"message"`
|
||||||
|
Data ErrorData `json:"data"`
|
||||||
|
Code int64 `json:"code"`
|
||||||
|
}
|
||||||
|
|
||||||
|
getHealthResponse struct {
|
||||||
|
jsonrpc string
|
||||||
|
Result string `json:"result"`
|
||||||
|
Error RPCError `json:"error"`
|
||||||
|
Id int `json:"id"`
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// https://docs.solana.com/developing/clients/jsonrpc-api#gethealth
|
||||||
|
func (c *Client) GetHealth(ctx context.Context) (*string, error) {
|
||||||
|
var resp response[string]
|
||||||
|
|
||||||
|
if err := getResponse(ctx, c, "getHealth", []any{}, &resp); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &resp.Result, nil
|
||||||
|
}
|
Loading…
Reference in New Issue