add metrics for tick producer and poh_recorder (#15931)
This commit is contained in:
parent
5460fb10a2
commit
40997d0aef
|
@ -80,6 +80,7 @@ pub struct PohRecorder {
|
|||
tick_lock_contention_us: u64,
|
||||
tick_overhead_us: u64,
|
||||
record_us: u64,
|
||||
last_metric: Instant,
|
||||
}
|
||||
|
||||
impl PohRecorder {
|
||||
|
@ -262,14 +263,15 @@ impl PohRecorder {
|
|||
) {
|
||||
self.clear_bank();
|
||||
let mut cache = vec![];
|
||||
{
|
||||
let poh_hash = {
|
||||
let mut poh = self.poh.lock().unwrap();
|
||||
info!(
|
||||
"reset poh from: {},{},{} to: {},{}",
|
||||
poh.hash, self.tick_height, self.start_slot, blockhash, start_slot
|
||||
);
|
||||
poh.reset(blockhash, self.poh_config.hashes_per_tick);
|
||||
}
|
||||
poh.hash
|
||||
};
|
||||
info!(
|
||||
"reset poh from: {},{},{} to: {},{}",
|
||||
poh_hash, self.tick_height, self.start_slot, blockhash, start_slot
|
||||
);
|
||||
|
||||
std::mem::swap(&mut cache, &mut self.tick_cache);
|
||||
|
||||
|
@ -396,23 +398,26 @@ impl PohRecorder {
|
|||
}
|
||||
|
||||
fn report_metrics(&mut self, bank_slot: Slot) {
|
||||
datapoint_info!(
|
||||
"poh_recorder",
|
||||
("slot", bank_slot, i64),
|
||||
("tick_lock_contention", self.tick_lock_contention_us, i64),
|
||||
("record_us", self.record_us, i64),
|
||||
("tick_overhead", self.tick_overhead_us, i64),
|
||||
(
|
||||
"record_lock_contention",
|
||||
self.record_lock_contention_us,
|
||||
i64
|
||||
),
|
||||
);
|
||||
if self.last_metric.elapsed().as_millis() > 1000 {
|
||||
datapoint_info!(
|
||||
"poh_recorder",
|
||||
("slot", bank_slot, i64),
|
||||
("tick_lock_contention", self.tick_lock_contention_us, i64),
|
||||
("record_us", self.record_us, i64),
|
||||
("tick_overhead", self.tick_overhead_us, i64),
|
||||
(
|
||||
"record_lock_contention",
|
||||
self.record_lock_contention_us,
|
||||
i64
|
||||
),
|
||||
);
|
||||
|
||||
self.tick_lock_contention_us = 0;
|
||||
self.record_us = 0;
|
||||
self.tick_overhead_us = 0;
|
||||
self.record_lock_contention_us = 0;
|
||||
self.tick_lock_contention_us = 0;
|
||||
self.record_us = 0;
|
||||
self.tick_overhead_us = 0;
|
||||
self.record_lock_contention_us = 0;
|
||||
self.last_metric = Instant::now();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn record(
|
||||
|
@ -424,6 +429,7 @@ impl PohRecorder {
|
|||
// Entries without transactions are used to track real-time passing in the ledger and
|
||||
// cannot be generated by `record()`
|
||||
assert!(!transactions.is_empty(), "No transactions provided");
|
||||
self.report_metrics(bank_slot);
|
||||
loop {
|
||||
self.flush_cache(false)?;
|
||||
|
||||
|
@ -432,7 +438,6 @@ impl PohRecorder {
|
|||
.as_ref()
|
||||
.ok_or(PohRecorderError::MaxHeightReached)?;
|
||||
if bank_slot != working_bank.bank.slot() {
|
||||
self.report_metrics(bank_slot);
|
||||
return Err(PohRecorderError::MaxHeightReached);
|
||||
}
|
||||
|
||||
|
@ -504,6 +509,7 @@ impl PohRecorder {
|
|||
tick_lock_contention_us: 0,
|
||||
record_us: 0,
|
||||
tick_overhead_us: 0,
|
||||
last_metric: Instant::now(),
|
||||
},
|
||||
receiver,
|
||||
)
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
//! The `poh_service` module implements a service that records the passing of
|
||||
//! "ticks", a measure of time in the PoH stream
|
||||
use crate::poh_recorder::PohRecorder;
|
||||
use solana_measure::measure::Measure;
|
||||
use solana_sdk::poh_config::PohConfig;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
@ -117,11 +118,34 @@ impl PohService {
|
|||
let mut num_ticks = 0;
|
||||
let mut num_hashes = 0;
|
||||
let mut total_sleep_us = 0;
|
||||
let mut total_lock_time_ns = 0;
|
||||
let mut total_hash_time_ns = 0;
|
||||
let mut total_tick_time_ns = 0;
|
||||
loop {
|
||||
num_hashes += hashes_per_batch;
|
||||
if poh.lock().unwrap().hash(hashes_per_batch) {
|
||||
let should_tick = {
|
||||
let mut lock_time = Measure::start("lock");
|
||||
let mut poh_l = poh.lock().unwrap();
|
||||
lock_time.stop();
|
||||
total_lock_time_ns += lock_time.as_ns();
|
||||
let mut hash_time = Measure::start("hash");
|
||||
let r = poh_l.hash(hashes_per_batch);
|
||||
hash_time.stop();
|
||||
total_hash_time_ns += hash_time.as_ns();
|
||||
r
|
||||
};
|
||||
if should_tick {
|
||||
// Lock PohRecorder only for the final hash...
|
||||
poh_recorder.lock().unwrap().tick();
|
||||
{
|
||||
let mut lock_time = Measure::start("lock");
|
||||
let mut poh_recorder_l = poh_recorder.lock().unwrap();
|
||||
lock_time.stop();
|
||||
total_lock_time_ns += lock_time.as_ns();
|
||||
let mut tick_time = Measure::start("tick");
|
||||
poh_recorder_l.tick();
|
||||
tick_time.stop();
|
||||
total_tick_time_ns += tick_time.as_ns();
|
||||
}
|
||||
num_ticks += 1;
|
||||
let elapsed_ns = now.elapsed().as_nanos() as u64;
|
||||
// sleep is not accurate enough to get a predictable time.
|
||||
|
@ -133,18 +157,24 @@ impl PohService {
|
|||
now = Instant::now();
|
||||
|
||||
if last_metric.elapsed().as_millis() > 1000 {
|
||||
let elapsed_ms = last_metric.elapsed().as_millis() as u64;
|
||||
let ms_per_slot = (elapsed_ms * ticks_per_slot) / num_ticks;
|
||||
let elapsed_us = last_metric.elapsed().as_micros() as u64;
|
||||
let us_per_slot = (elapsed_us * ticks_per_slot) / num_ticks;
|
||||
datapoint_info!(
|
||||
"poh-service",
|
||||
("ticks", num_ticks as i64, i64),
|
||||
("hashes", num_hashes as i64, i64),
|
||||
("elapsed_ms", ms_per_slot, i64),
|
||||
("total_sleep_ms", total_sleep_us / 1000, i64),
|
||||
("elapsed_us", us_per_slot, i64),
|
||||
("total_sleep_us", total_sleep_us, i64),
|
||||
("total_tick_time_us", total_tick_time_ns / 1000, i64),
|
||||
("total_lock_time_us", total_lock_time_ns / 1000, i64),
|
||||
("total_hash_time_us", total_hash_time_ns / 1000, i64),
|
||||
);
|
||||
total_sleep_us = 0;
|
||||
num_ticks = 0;
|
||||
num_hashes = 0;
|
||||
total_tick_time_ns = 0;
|
||||
total_lock_time_ns = 0;
|
||||
total_hash_time_ns = 0;
|
||||
last_metric = Instant::now();
|
||||
}
|
||||
if poh_exit.load(Ordering::Relaxed) {
|
||||
|
|
|
@ -20,6 +20,10 @@ impl Measure {
|
|||
self.duration = duration_as_ns(&self.start.elapsed());
|
||||
}
|
||||
|
||||
pub fn as_ns(&self) -> u64 {
|
||||
self.duration
|
||||
}
|
||||
|
||||
pub fn as_us(&self) -> u64 {
|
||||
self.duration / 1000
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue