Add `epoch_slot` to block receipt delay.

This adds a label `epoch_slot` to the prometheus metric
`vouch_block_receipt_delay_seconds`.  It has been noted that the receipt
delay is often significantly higher for the first (and, to a lesser
extent, second) slot in a given epoch.  This allows the receipt delay to
be examined for specific slots in a given epoch.
This commit is contained in:
Jim McDonald 2020-12-26 17:28:01 +00:00
parent fcab5aa757
commit 052bc332d4
No known key found for this signature in database
GPG Key ID: 89CEB61B2AD2A5E7
6 changed files with 13 additions and 10 deletions

View File

@ -49,7 +49,7 @@ Vouch's job scheduler provides a number of metrics. The specific metrics are:
- `vouch_scheduler_jobs_started_total` number of jobs started. This has a label `trigger` which can be "timer" if the job ran due to reaching its designated start time or "signal" if the job ran due to being triggered before its designated start time.
## Client operations
Client operations metrics provide information about the response time of beacon nodes, as well as if they returned
Client operations metrics provide information about the response time of beacon nodes, as well as if the request to them succeeded or failed. This can be used to understand how quickly and how well beacon nodes are responding to requests, for example if Vouch using multiple beacon nodes in different data centres this can be used to obtain data about their response times due to network latency.
`vouch_client_opeation_duration_seconds` is provided as a histogram, with buckets in increments of 0.1 seconds up to 4 seconds. It has two labels:
@ -65,5 +65,5 @@ There is also a companion metric `vouch_client_operation_requests_total`, which
## Network
Network metrics provide information about the network from vouch's point of view. Although these are not under vouch's control, they have an impact on the performance of the validator. The specific metrics are:
- `vouch_block_receipt_delay_seconds` the delay between the start of a slot and the arrival of the block for that slot. This metric is provided as a histogram, with buckets in increments of 0.1 seconds up to 4 seconds.
- `vouch_block_receipt_delay_seconds` the delay between the start of a slot and the arrival of the block for that slot. This metric is provided as a histogram, with buckets in increments of 0.1 seconds up to 12 seconds. This has a label `epoch_slot` which is the position of the slot in the epoch (0 through 31, inclusive).
- `vouch_attestationaggregation_coverage_ratio` the ratio of the number of attestations included in the aggregate to the total number of attestations for the aggregate. This metric is provided as a histogram, with buckets in increments of 0.1 up to 1.

View File

@ -30,7 +30,9 @@ func (s *Service) HandleHeadEvent(event *api.Event) {
if data.Slot != s.chainTimeService.CurrentSlot() {
return
}
s.monitor.BlockDelay(time.Since(s.chainTimeService.StartOfSlot(data.Slot)))
epochSlot := uint(uint64(data.Slot) % s.slotsPerEpoch)
s.monitor.BlockDelay(epochSlot, time.Since(s.chainTimeService.StartOfSlot(data.Slot)))
// We give the block half a second to propagate around the rest of the network before
// kicking off attestations for the block's slot.

View File

@ -43,7 +43,7 @@ func (s *Service) JobStartedOnSignal() {}
func (s *Service) NewEpoch() {}
// BlockDelay provides the delay between the start of a slot and vouch receiving its block.
func (s *Service) BlockDelay(delay time.Duration) {}
func (s *Service) BlockDelay(epochSlot uint, delay time.Duration) {}
// BeaconBlockProposalCompleted is called when a block proposal process has completed.
func (s *Service) BeaconBlockProposalCompleted(started time.Time, result string) {}

View File

@ -14,6 +14,7 @@
package prometheus
import (
"fmt"
"time"
"github.com/prometheus/client_golang/prometheus"
@ -40,7 +41,7 @@ func (s *Service) setupControllerMetrics() error {
}
s.blockReceiptDelay =
prometheus.NewHistogram(prometheus.HistogramOpts{
prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "vouch",
Name: "block_receipt_delay_seconds",
Help: "The delay between the start of a slot and the time vouch receives it.",
@ -58,7 +59,7 @@ func (s *Service) setupControllerMetrics() error {
10.1, 10.2, 10.3, 10.4, 10.5, 10.6, 10.7, 10.8, 10.9, 11.0,
11.1, 11.2, 11.3, 11.4, 11.5, 11.6, 11.7, 11.8, 11.9, 12.0,
},
})
}, []string{"epoch_slot"})
if err := prometheus.Register(s.blockReceiptDelay); err != nil {
return err
}
@ -72,6 +73,6 @@ func (s *Service) NewEpoch() {
}
// BlockDelay provides the delay between the start of a slot and vouch receiving its block.
func (s *Service) BlockDelay(delay time.Duration) {
s.blockReceiptDelay.Observe(delay.Seconds())
func (s *Service) BlockDelay(epochSlot uint, delay time.Duration) {
s.blockReceiptDelay.WithLabelValues(fmt.Sprintf("%d", epochSlot)).Observe(delay.Seconds())
}

View File

@ -31,7 +31,7 @@ type Service struct {
schedulerJobsStarted *prometheus.CounterVec
epochsProcessed prometheus.Counter
blockReceiptDelay prometheus.Histogram
blockReceiptDelay *prometheus.HistogramVec
beaconBlockProposalProcessTimer prometheus.Histogram
beaconBlockProposalProcessRequests *prometheus.CounterVec

View File

@ -36,7 +36,7 @@ type ControllerMonitor interface {
// NewEpoch is called when vouch starts processing a new epoch.
NewEpoch()
// BlockDelay provides the delay between the start of a slot and vouch receiving its block.
BlockDelay(delay time.Duration)
BlockDelay(epochSlot uint, delay time.Duration)
}
// BeaconBlockProposalMonitor provides methods to monitor the block proposal process.