mirror of https://github.com/certusone/vouch.git
Add metrics to track strategy operations.
This commit is contained in:
parent
1439ee1937
commit
b825ba40b7
|
@ -1,4 +1,5 @@
|
|||
1.1.0:
|
||||
- added metrics to track strategy operation results
|
||||
- provide release metric in `vouch_release`
|
||||
- provide ready metric in `vouch_ready`
|
||||
- handle chain reorganisations, updating duties as appropriate
|
||||
|
|
|
@ -56,7 +56,7 @@ Vouch's job scheduler provides a number of metrics. The specific metrics are:
|
|||
## Client operations
|
||||
Client operations metrics provide information about the response time of beacon nodes, as well as if the request to them succeeded or failed. This can be used to understand how quickly and how well beacon nodes are responding to requests, for example if Vouch using multiple beacon nodes in different data centres this can be used to obtain data about their response times due to network latency.
|
||||
|
||||
`vouch_client_opeation_duration_seconds` is provided as a histogram, with buckets in increments of 0.1 seconds up to 4 seconds. It has two labels:
|
||||
`vouch_client_operation_duration_seconds` is provided as a histogram, with buckets in increments of 0.1 seconds up to 4 seconds. It has two labels:
|
||||
|
||||
- `proposer` is the endpoint for the operation
|
||||
- `operation` is the operation that took place (_e.g._ "beacon block proposal")
|
||||
|
@ -67,6 +67,21 @@ There is also a companion metric `vouch_client_operation_requests_total`, which
|
|||
- `operation` is the operation that took place (_e.g._ "beacon block proposal")
|
||||
- `result` is the result of the operation, either "succeeded" or "failed"
|
||||
|
||||
## Strategy operations
|
||||
Strategy operations metrics provide information the results and calculation times of strategies. This can be used to understand which beacon nodes are providing the most useful information to Vouch, and how quickly Vouch is deciding on which data to use in its attestations and proposals.
|
||||
|
||||
`vouch_strategy_operation_duration_seconds` is provided as a histogram, with buckets in increments of 0.1 seconds up to 4 seconds. It has three labels:
|
||||
|
||||
- `strategy` is the strategy for the operation
|
||||
- `provider` is the provider for the operation
|
||||
- `operation` is the operation that took place (_e.g._ "beacon block proposal")
|
||||
|
||||
There is also a companion metric `vouch_strategy_operation_requests_total`, which is a simple count of the number of operations that have taken place. It has three labels:
|
||||
|
||||
- `strategy` is the strategy for the operation
|
||||
- `provider` is the provider for the operation
|
||||
- `operation` is the operation that took place (_e.g._ "beacon block proposal")
|
||||
|
||||
## Network
|
||||
Network metrics provide information about the network from vouch's point of view. Although these are not under vouch's control, they have an impact on the performance of the validator. The specific metrics are:
|
||||
|
||||
|
|
|
@ -77,3 +77,7 @@ func (s *Service) Accounts(state string, count uint64) {}
|
|||
// ClientOperation provides a generic monitor for client operations.
|
||||
func (s *Service) ClientOperation(provider string, name string, succeeded bool, duration time.Duration) {
|
||||
}
|
||||
|
||||
// StrategyOperation provides a generic monitor for strategy operations.
|
||||
func (s *Service) StrategyOperation(strategy string, provider string, operation string, duration time.Duration) {
|
||||
}
|
||||
|
|
|
@ -43,6 +43,30 @@ func (s *Service) setupClientMetrics() error {
|
|||
if err := prometheus.Register(s.clientOperationTimer); err != nil {
|
||||
return err
|
||||
}
|
||||
s.strategyOperationCounter = prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Namespace: "vouch",
|
||||
Subsystem: "strategy_operation",
|
||||
Name: "used_total",
|
||||
Help: "The results used by a strategy.",
|
||||
}, []string{"strategy", "provider", "operation"})
|
||||
if err := prometheus.Register(s.strategyOperationCounter); err != nil {
|
||||
return err
|
||||
}
|
||||
s.strategyOperationTimer = prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Namespace: "vouch",
|
||||
Subsystem: "strategy_operation",
|
||||
Name: "duration_seconds",
|
||||
Help: "The time vouch spends in strategy operations.",
|
||||
Buckets: []float64{
|
||||
0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
|
||||
1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
|
||||
2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0,
|
||||
3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0,
|
||||
},
|
||||
}, []string{"strategy", "provider", "operation"})
|
||||
if err := prometheus.Register(s.strategyOperationTimer); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -56,3 +80,9 @@ func (s *Service) ClientOperation(provider string, operation string, succeeded b
|
|||
s.clientOperationCounter.WithLabelValues(provider, operation, "failed").Add(1)
|
||||
}
|
||||
}
|
||||
|
||||
// StrategyOperation provides a generic monitor for strategy operations.
|
||||
func (s *Service) StrategyOperation(strategy string, provider string, operation string, duration time.Duration) {
|
||||
s.strategyOperationCounter.WithLabelValues(strategy, provider, operation).Add(1)
|
||||
s.strategyOperationTimer.WithLabelValues(strategy, provider, operation).Observe(duration.Seconds())
|
||||
}
|
||||
|
|
|
@ -50,8 +50,10 @@ type Service struct {
|
|||
|
||||
accountManagerAccounts *prometheus.GaugeVec
|
||||
|
||||
clientOperationCounter *prometheus.CounterVec
|
||||
clientOperationTimer *prometheus.HistogramVec
|
||||
clientOperationCounter *prometheus.CounterVec
|
||||
clientOperationTimer *prometheus.HistogramVec
|
||||
strategyOperationCounter *prometheus.CounterVec
|
||||
strategyOperationTimer *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
// module-wide log.
|
||||
|
|
|
@ -83,6 +83,8 @@ type AccountManagerMonitor interface {
|
|||
type ClientMonitor interface {
|
||||
// ClientOperation provides a generic monitor for client operations.
|
||||
ClientOperation(provider string, name string, succeeded bool, duration time.Duration)
|
||||
// StrategyOperation provides a generic monitor for strategy operations.
|
||||
StrategyOperation(strategy string, provider string, operation string, duration time.Duration)
|
||||
}
|
||||
|
||||
// ValidatorsManagerMonitor provides methods to monitor the validators manager.
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
)
|
||||
|
||||
type aggregateAttestationResponse struct {
|
||||
provider string
|
||||
aggregate *spec.Attestation
|
||||
score float64
|
||||
}
|
||||
|
@ -57,6 +58,7 @@ func (s *Service) AggregateAttestation(ctx context.Context, slot spec.Slot, atte
|
|||
|
||||
score := s.scoreAggregateAttestation(ctx, name, aggregate)
|
||||
respCh <- &aggregateAttestationResponse{
|
||||
provider: name,
|
||||
aggregate: aggregate,
|
||||
score: score,
|
||||
}
|
||||
|
@ -68,6 +70,8 @@ func (s *Service) AggregateAttestation(ctx context.Context, slot spec.Slot, atte
|
|||
errored := 0
|
||||
bestScore := float64(0)
|
||||
var bestAggregateAttestation *spec.Attestation
|
||||
bestProvider := ""
|
||||
|
||||
for responded+errored != len(s.aggregateAttestationProviders) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
@ -81,6 +85,7 @@ func (s *Service) AggregateAttestation(ctx context.Context, slot spec.Slot, atte
|
|||
if bestAggregateAttestation == nil || resp.score > bestScore {
|
||||
bestAggregateAttestation = resp.aggregate
|
||||
bestScore = resp.score
|
||||
bestProvider = resp.provider
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -90,6 +95,9 @@ func (s *Service) AggregateAttestation(ctx context.Context, slot spec.Slot, atte
|
|||
return nil, errors.New("no aggregate attestations received")
|
||||
}
|
||||
log.Trace().Stringer("aggregate_attestation", bestAggregateAttestation).Float64("score", bestScore).Msg("Selected best aggregate attestation")
|
||||
if bestProvider != "" {
|
||||
s.clientMonitor.StrategyOperation("best", bestProvider, "aggregate attestation", time.Since(started))
|
||||
}
|
||||
|
||||
return bestAggregateAttestation, nil
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import (
|
|||
)
|
||||
|
||||
type attestationDataResponse struct {
|
||||
provider string
|
||||
attestationData *spec.AttestationData
|
||||
score float64
|
||||
}
|
||||
|
@ -53,6 +54,7 @@ func (s *Service) AttestationData(ctx context.Context, slot spec.Slot, committee
|
|||
|
||||
score := s.scoreAttestationData(ctx, provider, name, attestationData)
|
||||
respCh <- &attestationDataResponse{
|
||||
provider: name,
|
||||
attestationData: attestationData,
|
||||
score: score,
|
||||
}
|
||||
|
@ -64,6 +66,8 @@ func (s *Service) AttestationData(ctx context.Context, slot spec.Slot, committee
|
|||
errored := 0
|
||||
bestScore := float64(0)
|
||||
var bestAttestationData *spec.AttestationData
|
||||
bestProvider := ""
|
||||
|
||||
for responded+errored != len(s.attestationDataProviders) {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
|
@ -77,6 +81,7 @@ func (s *Service) AttestationData(ctx context.Context, slot spec.Slot, committee
|
|||
if bestAttestationData == nil || resp.score > bestScore {
|
||||
bestAttestationData = resp.attestationData
|
||||
bestScore = resp.score
|
||||
bestProvider = resp.provider
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -87,6 +92,9 @@ func (s *Service) AttestationData(ctx context.Context, slot spec.Slot, committee
|
|||
return nil, errors.New("no attestations received")
|
||||
}
|
||||
log.Trace().Stringer("attestation_data", bestAttestationData).Float64("score", bestScore).Msg("Selected best attestation")
|
||||
if bestProvider != "" {
|
||||
s.clientMonitor.StrategyOperation("best", bestProvider, "attestation data", time.Since(started))
|
||||
}
|
||||
|
||||
return bestAttestationData, nil
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ func (s *Service) BeaconBlockProposal(ctx context.Context, slot spec.Slot, randa
|
|||
var mu sync.Mutex
|
||||
bestScore := float64(0)
|
||||
var bestProposal *spec.BeaconBlock
|
||||
bestProvider := ""
|
||||
|
||||
started := time.Now()
|
||||
sem := semaphore.NewWeighted(s.processConcurrency)
|
||||
|
@ -80,11 +81,16 @@ func (s *Service) BeaconBlockProposal(ctx context.Context, slot spec.Slot, randa
|
|||
if score > bestScore || bestProposal == nil {
|
||||
bestScore = score
|
||||
bestProposal = proposal
|
||||
bestProvider = name
|
||||
}
|
||||
mu.Unlock()
|
||||
}(ctx, sem, &wg, name, provider, &mu)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
if bestProvider != "" {
|
||||
s.clientMonitor.StrategyOperation("best", bestProvider, "beacon block proposal", time.Since(started))
|
||||
}
|
||||
|
||||
return bestProposal, nil
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue