solana_exporter/cmd/solana-exporter/exporter.go

261 lines
8.4 KiB
Go
Raw Normal View History

package main
import (
2021-01-03 03:23:18 -08:00
"context"
2024-10-18 07:57:44 -07:00
"errors"
2024-10-24 03:09:32 -07:00
"fmt"
"github.com/asymmetric-research/solana-exporter/pkg/rpc"
"github.com/asymmetric-research/solana-exporter/pkg/slog"
"github.com/prometheus/client_golang/prometheus"
2024-10-25 01:16:48 -07:00
"go.uber.org/zap"
)
2024-10-14 05:39:41 -07:00
const (
SkipStatusLabel = "status"
NodekeyLabel = "nodekey"
VotekeyLabel = "votekey"
VersionLabel = "version"
AddressLabel = "address"
EpochLabel = "epoch"
TransactionTypeLabel = "transaction_type"
2024-10-14 05:39:41 -07:00
StatusSkipped = "skipped"
StatusValid = "valid"
2024-10-28 15:15:21 -07:00
TransactionTypeVote = "vote"
TransactionTypeNonVote = "non_vote"
2024-10-14 05:39:41 -07:00
)
type SolanaCollector struct {
2024-10-28 09:19:07 -07:00
rpcClient *rpc.Client
2024-10-25 01:16:48 -07:00
logger *zap.SugaredLogger
2024-10-02 08:08:46 -07:00
config *ExporterConfig
2024-10-02 08:08:46 -07:00
/// descriptors:
2024-10-24 05:20:30 -07:00
ValidatorActiveStake *GaugeDesc
ValidatorLastVote *GaugeDesc
ValidatorRootSlot *GaugeDesc
ValidatorDelinquent *GaugeDesc
AccountBalances *GaugeDesc
NodeVersion *GaugeDesc
NodeIsHealthy *GaugeDesc
NodeNumSlotsBehind *GaugeDesc
NodeMinimumLedgerSlot *GaugeDesc
NodeFirstAvailableBlock *GaugeDesc
}
2024-10-28 09:19:07 -07:00
func NewSolanaCollector(client *rpc.Client, config *ExporterConfig) *SolanaCollector {
2024-10-14 05:39:41 -07:00
collector := &SolanaCollector{
2024-10-28 09:19:07 -07:00
rpcClient: client,
logger: slog.Get(),
config: config,
2024-10-24 05:20:30 -07:00
ValidatorActiveStake: NewGaugeDesc(
2024-10-24 03:09:32 -07:00
"solana_validator_active_stake",
2024-10-30 02:31:53 -07:00
fmt.Sprintf("Active stake (in SOL) per validator (represented by %s and %s)", VotekeyLabel, NodekeyLabel),
2024-10-24 05:20:30 -07:00
VotekeyLabel, NodekeyLabel,
2024-10-01 02:52:02 -07:00
),
2024-10-24 05:20:30 -07:00
ValidatorLastVote: NewGaugeDesc(
"solana_validator_last_vote",
2024-10-24 03:09:32 -07:00
fmt.Sprintf("Last voted-on slot per validator (represented by %s and %s)", VotekeyLabel, NodekeyLabel),
2024-10-24 05:20:30 -07:00
VotekeyLabel, NodekeyLabel,
2024-10-01 02:52:02 -07:00
),
2024-10-24 05:20:30 -07:00
ValidatorRootSlot: NewGaugeDesc(
"solana_validator_root_slot",
2024-10-24 03:09:32 -07:00
fmt.Sprintf("Root slot per validator (represented by %s and %s)", VotekeyLabel, NodekeyLabel),
2024-10-24 05:20:30 -07:00
VotekeyLabel, NodekeyLabel,
2024-10-01 02:52:02 -07:00
),
2024-10-24 05:20:30 -07:00
ValidatorDelinquent: NewGaugeDesc(
"solana_validator_delinquent",
2024-10-24 03:09:32 -07:00
fmt.Sprintf("Whether a validator (represented by %s and %s) is delinquent", VotekeyLabel, NodekeyLabel),
2024-10-24 05:20:30 -07:00
VotekeyLabel, NodekeyLabel,
2024-10-01 02:52:02 -07:00
),
2024-10-24 05:20:30 -07:00
AccountBalances: NewGaugeDesc(
2024-10-24 03:09:32 -07:00
"solana_account_balance",
fmt.Sprintf("Solana account balances, grouped by %s", AddressLabel),
2024-10-24 05:20:30 -07:00
AddressLabel,
2024-10-24 03:09:32 -07:00
),
2024-10-24 05:20:30 -07:00
NodeVersion: NewGaugeDesc(
2022-08-10 10:13:08 -07:00
"solana_node_version",
"Node version of solana",
2024-10-24 05:20:30 -07:00
VersionLabel,
2024-10-01 02:52:02 -07:00
),
2024-10-24 05:20:30 -07:00
NodeIsHealthy: NewGaugeDesc(
2024-10-24 03:09:32 -07:00
"solana_node_is_healthy",
2024-10-27 04:04:45 -07:00
"Whether the node is healthy",
2024-10-18 07:57:44 -07:00
),
2024-10-24 05:20:30 -07:00
NodeNumSlotsBehind: NewGaugeDesc(
2024-10-24 03:09:32 -07:00
"solana_node_num_slots_behind",
2024-10-27 04:04:45 -07:00
"The number of slots that the node is behind the latest cluster confirmed slot.",
2024-10-18 07:57:44 -07:00
),
2024-10-24 05:20:30 -07:00
NodeMinimumLedgerSlot: NewGaugeDesc(
2024-10-24 03:09:32 -07:00
"solana_node_minimum_ledger_slot",
2024-10-27 04:04:45 -07:00
"The lowest slot that the node has information about in its ledger.",
),
2024-10-24 05:20:30 -07:00
NodeFirstAvailableBlock: NewGaugeDesc(
2024-10-24 03:09:32 -07:00
"solana_node_first_available_block",
2024-10-27 04:04:45 -07:00
"The slot of the lowest confirmed block that has not been purged from the node's ledger.",
),
}
2024-10-14 05:39:41 -07:00
return collector
}
2024-10-14 05:39:41 -07:00
func (c *SolanaCollector) Describe(ch chan<- *prometheus.Desc) {
2024-10-24 05:20:30 -07:00
ch <- c.NodeVersion.Desc
ch <- c.ValidatorActiveStake.Desc
ch <- c.ValidatorLastVote.Desc
ch <- c.ValidatorRootSlot.Desc
ch <- c.ValidatorDelinquent.Desc
ch <- c.AccountBalances.Desc
ch <- c.NodeIsHealthy.Desc
ch <- c.NodeNumSlotsBehind.Desc
ch <- c.NodeMinimumLedgerSlot.Desc
ch <- c.NodeFirstAvailableBlock.Desc
}
2024-10-14 05:39:41 -07:00
func (c *SolanaCollector) collectVoteAccounts(ctx context.Context, ch chan<- prometheus.Metric) {
if c.config.LightMode {
2024-10-25 03:54:57 -07:00
c.logger.Debug("Skipping vote-accounts collection in light mode.")
return
}
2024-10-25 02:50:16 -07:00
c.logger.Info("Collecting vote accounts...")
2024-10-28 09:19:07 -07:00
voteAccounts, err := c.rpcClient.GetVoteAccounts(ctx, rpc.CommitmentConfirmed)
2024-10-01 12:38:28 -07:00
if err != nil {
2024-10-25 01:16:48 -07:00
c.logger.Errorf("failed to get vote accounts: %v", err)
2024-10-24 05:20:30 -07:00
ch <- c.ValidatorActiveStake.NewInvalidMetric(err)
ch <- c.ValidatorLastVote.NewInvalidMetric(err)
ch <- c.ValidatorRootSlot.NewInvalidMetric(err)
ch <- c.ValidatorDelinquent.NewInvalidMetric(err)
2024-10-01 12:38:28 -07:00
return
}
for _, account := range append(voteAccounts.Current, voteAccounts.Delinquent...) {
2024-10-24 05:20:30 -07:00
accounts := []string{account.VotePubkey, account.NodePubkey}
2024-10-30 02:31:53 -07:00
ch <- c.ValidatorActiveStake.MustNewConstMetric(float64(account.ActivatedStake)/rpc.LamportsInSol, accounts...)
2024-10-24 05:20:30 -07:00
ch <- c.ValidatorLastVote.MustNewConstMetric(float64(account.LastVote), accounts...)
ch <- c.ValidatorRootSlot.MustNewConstMetric(float64(account.RootSlot), accounts...)
}
2024-10-01 12:38:28 -07:00
for _, account := range voteAccounts.Current {
2024-10-24 05:20:30 -07:00
ch <- c.ValidatorDelinquent.MustNewConstMetric(0, account.VotePubkey, account.NodePubkey)
}
2024-10-01 12:38:28 -07:00
for _, account := range voteAccounts.Delinquent {
2024-10-24 05:20:30 -07:00
ch <- c.ValidatorDelinquent.MustNewConstMetric(1, account.VotePubkey, account.NodePubkey)
}
2024-10-25 02:50:16 -07:00
c.logger.Info("Vote accounts collected.")
}
2024-10-14 05:39:41 -07:00
func (c *SolanaCollector) collectVersion(ctx context.Context, ch chan<- prometheus.Metric) {
2024-10-25 02:50:16 -07:00
c.logger.Info("Collecting version...")
2024-10-01 12:38:28 -07:00
version, err := c.rpcClient.GetVersion(ctx)
if err != nil {
2024-10-25 01:16:48 -07:00
c.logger.Errorf("failed to get version: %v", err)
2024-10-24 05:20:30 -07:00
ch <- c.NodeVersion.NewInvalidMetric(err)
2024-10-01 12:38:28 -07:00
return
}
2022-08-10 10:13:08 -07:00
2024-10-24 05:20:30 -07:00
ch <- c.NodeVersion.MustNewConstMetric(1, version)
2024-10-25 02:50:16 -07:00
c.logger.Info("Version collected.")
2024-10-01 12:38:28 -07:00
}
func (c *SolanaCollector) collectMinimumLedgerSlot(ctx context.Context, ch chan<- prometheus.Metric) {
2024-10-25 02:50:16 -07:00
c.logger.Info("Collecting minimum ledger slot...")
slot, err := c.rpcClient.GetMinimumLedgerSlot(ctx)
if err != nil {
2024-10-25 01:16:48 -07:00
c.logger.Errorf("failed to get minimum lidger slot: %v", err)
2024-10-24 05:20:30 -07:00
ch <- c.NodeMinimumLedgerSlot.NewInvalidMetric(err)
return
}
2024-10-27 09:14:08 -07:00
ch <- c.NodeMinimumLedgerSlot.MustNewConstMetric(float64(slot))
2024-10-25 02:50:16 -07:00
c.logger.Info("Minimum ledger slot collected.")
}
func (c *SolanaCollector) collectFirstAvailableBlock(ctx context.Context, ch chan<- prometheus.Metric) {
2024-10-25 02:50:16 -07:00
c.logger.Info("Collecting first available block...")
block, err := c.rpcClient.GetFirstAvailableBlock(ctx)
if err != nil {
2024-10-25 01:16:48 -07:00
c.logger.Errorf("failed to get first available block: %v", err)
2024-10-24 05:20:30 -07:00
ch <- c.NodeFirstAvailableBlock.NewInvalidMetric(err)
return
}
2024-10-27 09:14:08 -07:00
ch <- c.NodeFirstAvailableBlock.MustNewConstMetric(float64(block))
2024-10-25 02:50:16 -07:00
c.logger.Info("First available block collected.")
}
2022-08-10 10:13:08 -07:00
2024-10-14 05:39:41 -07:00
func (c *SolanaCollector) collectBalances(ctx context.Context, ch chan<- prometheus.Metric) {
if c.config.LightMode {
2024-10-25 03:54:57 -07:00
c.logger.Debug("Skipping balance collection in light mode.")
return
}
2024-10-25 02:50:16 -07:00
c.logger.Info("Collecting balances...")
balances, err := FetchBalances(
ctx, c.rpcClient, CombineUnique(c.config.BalanceAddresses, c.config.NodeKeys, c.config.VoteKeys),
)
2022-08-10 10:13:08 -07:00
if err != nil {
2024-10-25 01:16:48 -07:00
c.logger.Errorf("failed to get balances: %v", err)
2024-10-24 05:20:30 -07:00
ch <- c.AccountBalances.NewInvalidMetric(err)
2024-10-01 12:38:28 -07:00
return
}
for address, balance := range balances {
2024-10-24 05:20:30 -07:00
ch <- c.AccountBalances.MustNewConstMetric(balance, address)
2024-10-01 12:38:28 -07:00
}
2024-10-25 02:50:16 -07:00
c.logger.Info("Balances collected.")
2024-10-01 12:38:28 -07:00
}
2024-10-18 07:57:44 -07:00
func (c *SolanaCollector) collectHealth(ctx context.Context, ch chan<- prometheus.Metric) {
2024-10-25 02:50:16 -07:00
c.logger.Info("Collecting health...")
2024-10-18 07:57:44 -07:00
var (
isHealthy = 1
numSlotsBehind int64
)
_, err := c.rpcClient.GetHealth(ctx)
if err != nil {
var rpcError *rpc.RPCError
if errors.As(err, &rpcError) {
var errorData rpc.NodeUnhealthyErrorData
if rpcError.Data == nil {
// if there is no data, then this is some unexpected error and should just be logged
2024-10-25 01:16:48 -07:00
c.logger.Errorf("failed to get health: %v", err)
2024-10-24 05:20:30 -07:00
ch <- c.NodeIsHealthy.NewInvalidMetric(err)
ch <- c.NodeNumSlotsBehind.NewInvalidMetric(err)
2024-10-18 07:57:44 -07:00
return
}
if err = rpc.UnpackRpcErrorData(rpcError, errorData); err != nil {
// if we error here, it means we have the incorrect format
2024-10-25 01:16:48 -07:00
c.logger.Fatalf("failed to unpack %s rpc error: %v", rpcError.Method, err.Error())
2024-10-18 07:57:44 -07:00
}
isHealthy = 0
numSlotsBehind = errorData.NumSlotsBehind
} else {
// if it's not an RPC error, log it
2024-10-25 01:16:48 -07:00
c.logger.Errorf("failed to get health: %v", err)
2024-10-24 05:20:30 -07:00
ch <- c.NodeIsHealthy.NewInvalidMetric(err)
ch <- c.NodeNumSlotsBehind.NewInvalidMetric(err)
2024-10-18 07:57:44 -07:00
return
}
}
2024-10-27 04:04:45 -07:00
ch <- c.NodeIsHealthy.MustNewConstMetric(float64(isHealthy))
ch <- c.NodeNumSlotsBehind.MustNewConstMetric(float64(numSlotsBehind))
2024-10-25 02:50:16 -07:00
c.logger.Info("Health collected.")
2024-10-18 07:57:44 -07:00
return
}
2024-10-14 05:39:41 -07:00
func (c *SolanaCollector) Collect(ch chan<- prometheus.Metric) {
2024-10-25 02:50:16 -07:00
c.logger.Info("========== BEGIN COLLECTION ==========")
2024-10-15 03:30:53 -07:00
ctx, cancel := context.WithCancel(context.Background())
2024-10-01 12:38:28 -07:00
defer cancel()
2024-10-18 07:57:44 -07:00
c.collectHealth(ctx, ch)
c.collectMinimumLedgerSlot(ctx, ch)
c.collectFirstAvailableBlock(ctx, ch)
2024-10-25 03:54:57 -07:00
c.collectVoteAccounts(ctx, ch)
c.collectVersion(ctx, ch)
c.collectBalances(ctx, ch)
2024-10-25 02:50:16 -07:00
c.logger.Info("=========== END COLLECTION ===========")
}