Merge pull request #55 from chainstack/feature/add-identity-label

Introduced additional identity label to the metrics.
This commit is contained in:
Matt Johnstone 2024-10-24 10:25:48 +02:00 committed by GitHub
commit 5210505a77
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 151 additions and 17 deletions

View File

@ -20,6 +20,7 @@ const (
VersionLabel = "version"
AddressLabel = "address"
EpochLabel = "epoch"
IdentityLabel = "identity"
StatusSkipped = "skipped"
StatusValid = "valid"
@ -38,6 +39,7 @@ type SolanaCollector struct {
// config:
slotPace time.Duration
balanceAddresses []string
identity *string
/// descriptors:
totalValidatorsDesc *prometheus.Desc
@ -49,15 +51,16 @@ type SolanaCollector struct {
balances *prometheus.Desc
isHealthy *prometheus.Desc
numSlotsBehind *prometheus.Desc
minimumLedgerSlot *prometheus.Desc
firstAvailableBlock *prometheus.Desc
}
func NewSolanaCollector(
provider rpc.Provider, slotPace time.Duration, balanceAddresses []string, nodekeys []string, votekeys []string,
) *SolanaCollector {
func NewSolanaCollector(provider rpc.Provider, slotPace time.Duration, balanceAddresses []string, nodekeys []string, votekeys []string, identity *string) *SolanaCollector {
collector := &SolanaCollector{
rpcClient: provider,
slotPace: slotPace,
balanceAddresses: CombineUnique(balanceAddresses, nodekeys, votekeys),
identity: identity,
totalValidatorsDesc: prometheus.NewDesc(
"solana_active_validators",
"Total number of active validators by state",
@ -103,13 +106,25 @@ func NewSolanaCollector(
isHealthy: prometheus.NewDesc(
"solana_is_healthy",
"Whether the node is healthy or not.",
nil,
[]string{IdentityLabel},
nil,
),
numSlotsBehind: prometheus.NewDesc(
"solana_num_slots_behind",
"The number of slots that the node is behind the latest cluster confirmed slot.",
[]string{IdentityLabel},
nil,
),
minimumLedgerSlot: prometheus.NewDesc(
"solana_minimum_ledger_slot",
"The lowest slot that the node has information about in its ledger.",
[]string{IdentityLabel},
nil,
),
firstAvailableBlock: prometheus.NewDesc(
"solana_first_available_block",
"The slot of the lowest confirmed block that has not been purged from the ledger.",
[]string{IdentityLabel},
nil,
),
}
@ -126,6 +141,8 @@ func (c *SolanaCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.balances
ch <- c.isHealthy
ch <- c.numSlotsBehind
ch <- c.minimumLedgerSlot
ch <- c.firstAvailableBlock
}
func (c *SolanaCollector) collectVoteAccounts(ctx context.Context, ch chan<- prometheus.Metric) {
@ -194,6 +211,28 @@ func (c *SolanaCollector) collectVersion(ctx context.Context, ch chan<- promethe
ch <- prometheus.MustNewConstMetric(c.solanaVersion, prometheus.GaugeValue, 1, version)
}
func (c *SolanaCollector) collectMinimumLedgerSlot(ctx context.Context, ch chan<- prometheus.Metric) {
slot, err := c.rpcClient.GetMinimumLedgerSlot(ctx)
if err != nil {
klog.Errorf("failed to get minimum lidger slot: %v", err)
ch <- prometheus.NewInvalidMetric(c.minimumLedgerSlot, err)
return
}
ch <- prometheus.MustNewConstMetric(c.minimumLedgerSlot, prometheus.GaugeValue, float64(*slot), *c.identity)
}
func (c *SolanaCollector) collectFirstAvailableBlock(ctx context.Context, ch chan<- prometheus.Metric) {
block, err := c.rpcClient.GetFirstAvailableBlock(ctx)
if err != nil {
klog.Errorf("failed to get first available block: %v", err)
ch <- prometheus.NewInvalidMetric(c.firstAvailableBlock, err)
return
}
ch <- prometheus.MustNewConstMetric(c.firstAvailableBlock, prometheus.GaugeValue, float64(*block), *c.identity)
}
func (c *SolanaCollector) collectBalances(ctx context.Context, ch chan<- prometheus.Metric) {
balances, err := FetchBalances(ctx, c.rpcClient, c.balanceAddresses)
@ -241,8 +280,8 @@ func (c *SolanaCollector) collectHealth(ctx context.Context, ch chan<- prometheu
}
}
ch <- prometheus.MustNewConstMetric(c.isHealthy, prometheus.GaugeValue, float64(isHealthy))
ch <- prometheus.MustNewConstMetric(c.numSlotsBehind, prometheus.GaugeValue, float64(numSlotsBehind))
ch <- prometheus.MustNewConstMetric(c.isHealthy, prometheus.GaugeValue, float64(isHealthy), *c.identity)
ch <- prometheus.MustNewConstMetric(c.numSlotsBehind, prometheus.GaugeValue, float64(numSlotsBehind), *c.identity)
return
}
@ -255,6 +294,8 @@ func (c *SolanaCollector) Collect(ch chan<- prometheus.Metric) {
c.collectVersion(ctx, ch)
c.collectBalances(ctx, ch)
c.collectHealth(ctx, ch)
c.collectMinimumLedgerSlot(ctx, ch)
c.collectFirstAvailableBlock(ctx, ch)
}
func main() {
@ -273,10 +314,13 @@ func main() {
if err != nil {
klog.Fatalf("Failed to get associated vote accounts for %v: %v", config.NodeKeys, err)
}
collector := NewSolanaCollector(client, slotPacerSchedule, config.BalanceAddresses, config.NodeKeys, votekeys)
identity, err := client.GetIdentity(ctx)
if err != nil {
klog.Fatalf("Failed to get identity: %v", err)
}
collector := NewSolanaCollector(client, slotPacerSchedule, config.BalanceAddresses, config.NodeKeys, votekeys, identity)
slotWatcher := NewSlotWatcher(
client, config.NodeKeys, votekeys, config.ComprehensiveSlotTracking, config.MonitorBlockSizes,
client, config.NodeKeys, votekeys, *identity, config.ComprehensiveSlotTracking, config.MonitorBlockSizes,
)
ctx, cancel := context.WithCancel(ctx)
defer cancel()

View File

@ -43,6 +43,7 @@ type (
var (
identities = []string{"aaa", "bbb", "ccc"}
votekeys = []string{"AAA", "BBB", "CCC"}
identity = "aaa"
balances = map[string]float64{"aaa": 1, "bbb": 2, "ccc": 3, "AAA": 4, "BBB": 5, "CCC": 6}
identityVotes = map[string]string{"aaa": "AAA", "bbb": "BBB", "ccc": "CCC"}
nv = len(identities)
@ -184,6 +185,24 @@ func (c *staticRPCClient) GetHealth(ctx context.Context) (*string, error) {
return &health, nil
}
//goland:noinspection GoUnusedParameter
func (c *staticRPCClient) GetIdentity(ctx context.Context) (*string, error) {
nodeIdentity := "aaa"
return &nodeIdentity, nil
}
//goland:noinspection GoUnusedParameter
func (c *staticRPCClient) GetFirstAvailableBlock(ctx context.Context) (*int64, error) {
firstAvailiableBlock := int64(33)
return &firstAvailiableBlock, nil
}
//goland:noinspection GoUnusedParameter
func (c *staticRPCClient) GetMinimumLedgerSlot(ctx context.Context) (*int64, error) {
minimumLedgerSlot := int64(23)
return &minimumLedgerSlot, nil
}
/*
===== DYNAMIC CLIENT =====:
*/
@ -385,6 +404,24 @@ func (c *dynamicRPCClient) GetHealth(ctx context.Context) (*string, error) {
return &health, nil
}
//goland:noinspection GoUnusedParameter
func (c *dynamicRPCClient) GetIdentity(ctx context.Context) (*string, error) {
nodeIdentity := "aaa"
return &nodeIdentity, nil
}
//goland:noinspection GoUnusedParameter
func (c *dynamicRPCClient) GetFirstAvailableBlock(ctx context.Context) (*int64, error) {
firstAvailiableBlock := int64(33)
return &firstAvailiableBlock, nil
}
//goland:noinspection GoUnusedParameter
func (c *dynamicRPCClient) GetMinimumLedgerSlot(ctx context.Context) (*int64, error) {
minimumLedgerSlot := int64(23)
return &minimumLedgerSlot, nil
}
/*
===== OTHER TEST UTILITIES =====:
*/
@ -420,7 +457,7 @@ func runCollectionTests(t *testing.T, collector prometheus.Collector, testCases
}
func TestSolanaCollector_Collect_Static(t *testing.T) {
collector := NewSolanaCollector(&staticRPCClient{}, slotPacerSchedule, nil, identities, votekeys)
collector := NewSolanaCollector(&staticRPCClient{}, slotPacerSchedule, nil, identities, votekeys, &identity)
prometheus.NewPedanticRegistry().MustRegister(collector)
testCases := []collectionTest{
@ -492,7 +529,7 @@ solana_node_version{version="1.16.7"} 1
func TestSolanaCollector_Collect_Dynamic(t *testing.T) {
client := newDynamicRPCClient()
collector := NewSolanaCollector(client, slotPacerSchedule, nil, identities, votekeys)
collector := NewSolanaCollector(client, slotPacerSchedule, nil, identities, votekeys, &identity)
prometheus.NewPedanticRegistry().MustRegister(collector)
// start off by testing initial state:

View File

@ -23,6 +23,7 @@ type SlotWatcher struct {
// config:
nodekeys []string
votekeys []string
identity string
comprehensiveSlotTracking bool
monitorBlockSizes bool
@ -48,12 +49,14 @@ type SlotWatcher struct {
InflationRewardsMetric *prometheus.GaugeVec
FeeRewardsMetric *prometheus.CounterVec
BlockSizeMetric *prometheus.GaugeVec
BlockHeight *prometheus.GaugeVec
}
func NewSlotWatcher(
client rpc.Provider,
nodekeys []string,
votekeys []string,
identity string,
comprehensiveSlotTracking bool,
monitorBlockSizes bool,
) *SlotWatcher {
@ -61,6 +64,7 @@ func NewSlotWatcher(
client: client,
nodekeys: nodekeys,
votekeys: votekeys,
identity: identity,
comprehensiveSlotTracking: comprehensiveSlotTracking,
monitorBlockSizes: monitorBlockSizes,
// metrics:
@ -119,6 +123,13 @@ func NewSlotWatcher(
},
[]string{NodekeyLabel},
),
BlockHeight: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "solana_block_height",
Help: "The current block height of the node.",
},
[]string{IdentityLabel},
),
}
// register:
for _, collector := range []prometheus.Collector{
@ -132,6 +143,7 @@ func NewSlotWatcher(
watcher.InflationRewardsMetric,
watcher.FeeRewardsMetric,
watcher.BlockSizeMetric,
watcher.BlockHeight,
} {
if err := prometheus.Register(collector); err != nil {
var (
@ -176,6 +188,7 @@ func (c *SlotWatcher) WatchSlots(ctx context.Context, pace time.Duration) {
c.TotalTransactionsMetric.Set(float64(epochInfo.TransactionCount))
c.SlotHeightMetric.Set(float64(epochInfo.AbsoluteSlot))
c.BlockHeight.WithLabelValues(c.identity).Set(float64(epochInfo.BlockHeight))
// if we get here, then the tracking numbers are set, so this is a "normal" run.
// start by checking if we have progressed since last run:

View File

@ -90,14 +90,16 @@ func assertSlotMetricsChangeCorrectly(t *testing.T, initial slotMetricValues, fi
func TestSolanaCollector_WatchSlots_Static(t *testing.T) {
client := staticRPCClient{}
collector := NewSolanaCollector(&client, 100*time.Millisecond, nil, identities, votekeys)
watcher := NewSlotWatcher(&client, identities, votekeys, false, false)
ctx, cancel := context.WithCancel(context.Background())
nodeIdentity, _ := client.GetIdentity(ctx)
collector := NewSolanaCollector(&client, 100*time.Millisecond, nil, identities, votekeys, nodeIdentity)
watcher := NewSlotWatcher(&client, identities, votekeys, *nodeIdentity, false, false)
// reset metrics before running tests:
watcher.LeaderSlotsTotalMetric.Reset()
watcher.LeaderSlotsByEpochMetric.Reset()
prometheus.NewPedanticRegistry().MustRegister(collector)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go watcher.WatchSlots(ctx, collector.slotPace)
@ -159,15 +161,17 @@ func TestSolanaCollector_WatchSlots_Static(t *testing.T) {
func TestSolanaCollector_WatchSlots_Dynamic(t *testing.T) {
// create clients:
client := newDynamicRPCClient()
collector := NewSolanaCollector(client, 300*time.Millisecond, nil, identities, votekeys)
watcher := NewSlotWatcher(client, identities, votekeys, false, false)
runCtx, runCancel := context.WithCancel(context.Background())
nodeIdentity, _ := client.GetIdentity(runCtx)
collector := NewSolanaCollector(client, 300*time.Millisecond, nil, identities, votekeys, nodeIdentity)
watcher := NewSlotWatcher(client, identities, votekeys, *nodeIdentity, false, false)
// reset metrics before running tests:
watcher.LeaderSlotsTotalMetric.Reset()
watcher.LeaderSlotsByEpochMetric.Reset()
prometheus.NewPedanticRegistry().MustRegister(collector)
// start client/collector and wait a bit:
runCtx, runCancel := context.WithCancel(context.Background())
defer runCancel()
go client.Run(runCtx)
time.Sleep(time.Second)

View File

@ -75,6 +75,9 @@ type Provider interface {
GetBlock(ctx context.Context, commitment Commitment, slot int64, transactionDetails string) (*Block, error)
GetHealth(ctx context.Context) (*string, error)
GetIdentity(ctx context.Context) (*string, error)
GetMinimumLedgerSlot(ctx context.Context) (*int64, error)
GetFirstAvailableBlock(ctx context.Context) (*int64, error)
}
func (c Commitment) MarshalJSON() ([]byte, error) {
@ -315,3 +318,33 @@ func (c *Client) GetHealth(ctx context.Context) (*string, error) {
}
return &resp.Result, nil
}
// GetIdentity returns the identity pubkey for the current node
// See API docs: https://solana.com/docs/rpc/http/getidentity
func (c *Client) GetIdentity(ctx context.Context) (*string, error) {
var resp response[Identity]
if err := getResponse(ctx, c, "getIdentity", []any{}, &resp); err != nil {
return nil, err
}
return &resp.Result.Identity, nil
}
// MinimumLedgerSlot returns the lowest slot that the node has information about in its ledger.
// See API docs: https://solana.com/docs/rpc/http/minimumledgerslot
func (c *Client) GetMinimumLedgerSlot(ctx context.Context) (*int64, error) {
var resp response[int64]
if err := getResponse(ctx, c, "minimumLedgerSlot", []any{}, &resp); err != nil {
return nil, err
}
return &resp.Result, nil
}
// GetFirstAvailableBlock returns the slot of the lowest confirmed block that has not been purged from the ledger
// See API docs: https://solana.com/docs/rpc/http/getfirstavailableblock
func (c *Client) GetFirstAvailableBlock(ctx context.Context) (*int64, error) {
var resp response[int64]
if err := getResponse(ctx, c, "getFirstAvailableBlock", []any{}, &resp); err != nil {
return nil, err
}
return &resp.Result, nil
}

View File

@ -98,6 +98,9 @@ type (
RewardType string `json:"rewardType"`
Commission uint8 `json:"commission"`
}
Identity struct {
Identity string `json:"identity"`
}
)
func (e *RPCError) Error() string {