add metrics for tick producer and poh_recorder (#15931)

This commit is contained in:
Jeff Washington (jwash) 2021-03-17 10:38:26 -05:00 committed by GitHub
parent 5460fb10a2
commit 40997d0aef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 29 deletions

View File

@ -80,6 +80,7 @@ pub struct PohRecorder {
tick_lock_contention_us: u64,
tick_overhead_us: u64,
record_us: u64,
last_metric: Instant,
}
impl PohRecorder {
@ -262,14 +263,15 @@ impl PohRecorder {
) {
self.clear_bank();
let mut cache = vec![];
{
let poh_hash = {
let mut poh = self.poh.lock().unwrap();
info!(
"reset poh from: {},{},{} to: {},{}",
poh.hash, self.tick_height, self.start_slot, blockhash, start_slot
);
poh.reset(blockhash, self.poh_config.hashes_per_tick);
}
poh.hash
};
info!(
"reset poh from: {},{},{} to: {},{}",
poh_hash, self.tick_height, self.start_slot, blockhash, start_slot
);
std::mem::swap(&mut cache, &mut self.tick_cache);
@ -396,23 +398,26 @@ impl PohRecorder {
}
fn report_metrics(&mut self, bank_slot: Slot) {
datapoint_info!(
"poh_recorder",
("slot", bank_slot, i64),
("tick_lock_contention", self.tick_lock_contention_us, i64),
("record_us", self.record_us, i64),
("tick_overhead", self.tick_overhead_us, i64),
(
"record_lock_contention",
self.record_lock_contention_us,
i64
),
);
if self.last_metric.elapsed().as_millis() > 1000 {
datapoint_info!(
"poh_recorder",
("slot", bank_slot, i64),
("tick_lock_contention", self.tick_lock_contention_us, i64),
("record_us", self.record_us, i64),
("tick_overhead", self.tick_overhead_us, i64),
(
"record_lock_contention",
self.record_lock_contention_us,
i64
),
);
self.tick_lock_contention_us = 0;
self.record_us = 0;
self.tick_overhead_us = 0;
self.record_lock_contention_us = 0;
self.tick_lock_contention_us = 0;
self.record_us = 0;
self.tick_overhead_us = 0;
self.record_lock_contention_us = 0;
self.last_metric = Instant::now();
}
}
pub fn record(
@ -424,6 +429,7 @@ impl PohRecorder {
// Entries without transactions are used to track real-time passing in the ledger and
// cannot be generated by `record()`
assert!(!transactions.is_empty(), "No transactions provided");
self.report_metrics(bank_slot);
loop {
self.flush_cache(false)?;
@ -432,7 +438,6 @@ impl PohRecorder {
.as_ref()
.ok_or(PohRecorderError::MaxHeightReached)?;
if bank_slot != working_bank.bank.slot() {
self.report_metrics(bank_slot);
return Err(PohRecorderError::MaxHeightReached);
}
@ -504,6 +509,7 @@ impl PohRecorder {
tick_lock_contention_us: 0,
record_us: 0,
tick_overhead_us: 0,
last_metric: Instant::now(),
},
receiver,
)

View File

@ -1,6 +1,7 @@
//! The `poh_service` module implements a service that records the passing of
//! "ticks", a measure of time in the PoH stream
use crate::poh_recorder::PohRecorder;
use solana_measure::measure::Measure;
use solana_sdk::poh_config::PohConfig;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::{Arc, Mutex};
@ -117,11 +118,34 @@ impl PohService {
let mut num_ticks = 0;
let mut num_hashes = 0;
let mut total_sleep_us = 0;
let mut total_lock_time_ns = 0;
let mut total_hash_time_ns = 0;
let mut total_tick_time_ns = 0;
loop {
num_hashes += hashes_per_batch;
if poh.lock().unwrap().hash(hashes_per_batch) {
let should_tick = {
let mut lock_time = Measure::start("lock");
let mut poh_l = poh.lock().unwrap();
lock_time.stop();
total_lock_time_ns += lock_time.as_ns();
let mut hash_time = Measure::start("hash");
let r = poh_l.hash(hashes_per_batch);
hash_time.stop();
total_hash_time_ns += hash_time.as_ns();
r
};
if should_tick {
// Lock PohRecorder only for the final hash...
poh_recorder.lock().unwrap().tick();
{
let mut lock_time = Measure::start("lock");
let mut poh_recorder_l = poh_recorder.lock().unwrap();
lock_time.stop();
total_lock_time_ns += lock_time.as_ns();
let mut tick_time = Measure::start("tick");
poh_recorder_l.tick();
tick_time.stop();
total_tick_time_ns += tick_time.as_ns();
}
num_ticks += 1;
let elapsed_ns = now.elapsed().as_nanos() as u64;
// sleep is not accurate enough to get a predictable time.
@ -133,18 +157,24 @@ impl PohService {
now = Instant::now();
if last_metric.elapsed().as_millis() > 1000 {
let elapsed_ms = last_metric.elapsed().as_millis() as u64;
let ms_per_slot = (elapsed_ms * ticks_per_slot) / num_ticks;
let elapsed_us = last_metric.elapsed().as_micros() as u64;
let us_per_slot = (elapsed_us * ticks_per_slot) / num_ticks;
datapoint_info!(
"poh-service",
("ticks", num_ticks as i64, i64),
("hashes", num_hashes as i64, i64),
("elapsed_ms", ms_per_slot, i64),
("total_sleep_ms", total_sleep_us / 1000, i64),
("elapsed_us", us_per_slot, i64),
("total_sleep_us", total_sleep_us, i64),
("total_tick_time_us", total_tick_time_ns / 1000, i64),
("total_lock_time_us", total_lock_time_ns / 1000, i64),
("total_hash_time_us", total_hash_time_ns / 1000, i64),
);
total_sleep_us = 0;
num_ticks = 0;
num_hashes = 0;
total_tick_time_ns = 0;
total_lock_time_ns = 0;
total_hash_time_ns = 0;
last_metric = Instant::now();
}
if poh_exit.load(Ordering::Relaxed) {

View File

@ -20,6 +20,10 @@ impl Measure {
self.duration = duration_as_ns(&self.start.elapsed());
}
pub fn as_ns(&self) -> u64 {
self.duration
}
pub fn as_us(&self) -> u64 {
self.duration / 1000
}