solana/poh/src/poh_service.rs

572 lines
21 KiB
Rust

//! The `poh_service` module implements a service that records the passing of
//! "ticks", a measure of time in the PoH stream
use {
crate::poh_recorder::{PohRecorder, Record},
crossbeam_channel::Receiver,
log::*,
solana_entry::poh::Poh,
solana_measure::{measure, measure::Measure},
solana_sdk::poh_config::PohConfig,
std::{
sync::{
atomic::{AtomicBool, Ordering},
Arc, Mutex, RwLock,
},
thread::{self, Builder, JoinHandle},
time::{Duration, Instant},
},
};
pub struct PohService {
tick_producer: JoinHandle<()>,
}
// Number of hashes to batch together.
// * If this number is too small, PoH hash rate will suffer.
// * The larger this number is from 1, the speed of recording transactions will suffer due to lock
// contention with the PoH hashing within `tick_producer()`.
//
// Can use test_poh_service to calibrate this
pub const DEFAULT_HASHES_PER_BATCH: u64 = 64;
pub const DEFAULT_PINNED_CPU_CORE: usize = 0;
const TARGET_SLOT_ADJUSTMENT_NS: u64 = 50_000_000;
#[derive(Debug)]
struct PohTiming {
num_ticks: u64,
num_hashes: u64,
total_sleep_us: u64,
total_lock_time_ns: u64,
total_hash_time_ns: u64,
total_tick_time_ns: u64,
last_metric: Instant,
total_record_time_us: u64,
total_send_record_result_us: u64,
}
impl PohTiming {
fn new() -> Self {
Self {
num_ticks: 0,
num_hashes: 0,
total_sleep_us: 0,
total_lock_time_ns: 0,
total_hash_time_ns: 0,
total_tick_time_ns: 0,
last_metric: Instant::now(),
total_record_time_us: 0,
total_send_record_result_us: 0,
}
}
fn report(&mut self, ticks_per_slot: u64) {
if self.last_metric.elapsed().as_millis() > 1000 {
let elapsed_us = self.last_metric.elapsed().as_micros() as u64;
let us_per_slot = (elapsed_us * ticks_per_slot) / self.num_ticks;
datapoint_info!(
"poh-service",
("ticks", self.num_ticks as i64, i64),
("hashes", self.num_hashes as i64, i64),
("elapsed_us", us_per_slot, i64),
("total_sleep_us", self.total_sleep_us, i64),
("total_tick_time_us", self.total_tick_time_ns / 1000, i64),
("total_lock_time_us", self.total_lock_time_ns / 1000, i64),
("total_hash_time_us", self.total_hash_time_ns / 1000, i64),
("total_record_time_us", self.total_record_time_us, i64),
(
"total_send_record_result_us",
self.total_send_record_result_us,
i64
),
);
self.total_sleep_us = 0;
self.num_ticks = 0;
self.num_hashes = 0;
self.total_tick_time_ns = 0;
self.total_lock_time_ns = 0;
self.total_hash_time_ns = 0;
self.last_metric = Instant::now();
self.total_record_time_us = 0;
self.total_send_record_result_us = 0;
}
}
}
impl PohService {
pub fn new(
poh_recorder: Arc<RwLock<PohRecorder>>,
poh_config: &PohConfig,
poh_exit: Arc<AtomicBool>,
ticks_per_slot: u64,
pinned_cpu_core: usize,
hashes_per_batch: u64,
record_receiver: Receiver<Record>,
) -> Self {
let poh_config = poh_config.clone();
let tick_producer = Builder::new()
.name("solPohTickProd".to_string())
.spawn(move || {
if poh_config.hashes_per_tick.is_none() {
if poh_config.target_tick_count.is_none() {
Self::low_power_tick_producer(
poh_recorder,
&poh_config,
&poh_exit,
record_receiver,
);
} else {
Self::short_lived_low_power_tick_producer(
poh_recorder,
&poh_config,
&poh_exit,
record_receiver,
);
}
} else {
// PoH service runs in a tight loop, generating hashes as fast as possible.
// Let's dedicate one of the CPU cores to this thread so that it can gain
// from cache performance.
if let Some(cores) = core_affinity::get_core_ids() {
core_affinity::set_for_current(cores[pinned_cpu_core]);
}
Self::tick_producer(
poh_recorder,
&poh_exit,
ticks_per_slot,
hashes_per_batch,
record_receiver,
Self::target_ns_per_tick(
ticks_per_slot,
poh_config.target_tick_duration.as_nanos() as u64,
),
);
}
poh_exit.store(true, Ordering::Relaxed);
})
.unwrap();
Self { tick_producer }
}
pub fn target_ns_per_tick(ticks_per_slot: u64, target_tick_duration_ns: u64) -> u64 {
// Account for some extra time outside of PoH generation to account
// for processing time outside PoH.
let adjustment_per_tick = if ticks_per_slot > 0 {
TARGET_SLOT_ADJUSTMENT_NS / ticks_per_slot
} else {
0
};
target_tick_duration_ns.saturating_sub(adjustment_per_tick)
}
fn low_power_tick_producer(
poh_recorder: Arc<RwLock<PohRecorder>>,
poh_config: &PohConfig,
poh_exit: &AtomicBool,
record_receiver: Receiver<Record>,
) {
let mut last_tick = Instant::now();
while !poh_exit.load(Ordering::Relaxed) {
let remaining_tick_time = poh_config
.target_tick_duration
.saturating_sub(last_tick.elapsed());
Self::read_record_receiver_and_process(
&poh_recorder,
&record_receiver,
remaining_tick_time,
);
if remaining_tick_time.is_zero() {
last_tick = Instant::now();
poh_recorder.write().unwrap().tick();
}
}
}
pub fn read_record_receiver_and_process(
poh_recorder: &Arc<RwLock<PohRecorder>>,
record_receiver: &Receiver<Record>,
timeout: Duration,
) {
let record = record_receiver.recv_timeout(timeout);
if let Ok(record) = record {
if record
.sender
.send(poh_recorder.write().unwrap().record(
record.slot,
record.mixin,
record.transactions,
))
.is_err()
{
panic!("Error returning mixin hash");
}
}
}
fn short_lived_low_power_tick_producer(
poh_recorder: Arc<RwLock<PohRecorder>>,
poh_config: &PohConfig,
poh_exit: &AtomicBool,
record_receiver: Receiver<Record>,
) {
let mut warned = false;
let mut elapsed_ticks = 0;
let mut last_tick = Instant::now();
let num_ticks = poh_config.target_tick_count.unwrap();
while elapsed_ticks < num_ticks {
let remaining_tick_time = poh_config
.target_tick_duration
.saturating_sub(last_tick.elapsed());
Self::read_record_receiver_and_process(
&poh_recorder,
&record_receiver,
Duration::from_millis(0),
);
if remaining_tick_time.is_zero() {
last_tick = Instant::now();
poh_recorder.write().unwrap().tick();
elapsed_ticks += 1;
}
if poh_exit.load(Ordering::Relaxed) && !warned {
warned = true;
warn!("exit signal is ignored because PohService is scheduled to exit soon");
}
}
}
// returns true if we need to tick
fn record_or_hash(
next_record: &mut Option<Record>,
poh_recorder: &Arc<RwLock<PohRecorder>>,
timing: &mut PohTiming,
record_receiver: &Receiver<Record>,
hashes_per_batch: u64,
poh: &Arc<Mutex<Poh>>,
target_ns_per_tick: u64,
) -> bool {
match next_record.take() {
Some(mut record) => {
// received message to record
// so, record for as long as we have queued up record requests
let mut lock_time = Measure::start("lock");
let mut poh_recorder_l = poh_recorder.write().unwrap();
lock_time.stop();
timing.total_lock_time_ns += lock_time.as_ns();
let mut record_time = Measure::start("record");
loop {
let res = poh_recorder_l.record(
record.slot,
record.mixin,
std::mem::take(&mut record.transactions),
);
// what do we do on failure here? Ignore for now.
let (_send_res, send_record_result_time) =
measure!(record.sender.send(res), "send_record_result");
timing.total_send_record_result_us += send_record_result_time.as_us();
timing.num_hashes += 1; // note: may have also ticked inside record
let new_record_result = record_receiver.try_recv();
match new_record_result {
Ok(new_record) => {
// we already have second request to record, so record again while we still have the mutex
record = new_record;
}
Err(_) => {
break;
}
}
}
record_time.stop();
timing.total_record_time_us += record_time.as_us();
// PohRecorder.record would have ticked if it needed to, so should_tick will be false
}
None => {
// did not receive instructions to record, so hash until we notice we've been asked to record (or we need to tick) and then remember what to record
let mut lock_time = Measure::start("lock");
let mut poh_l = poh.lock().unwrap();
lock_time.stop();
timing.total_lock_time_ns += lock_time.as_ns();
loop {
timing.num_hashes += hashes_per_batch;
let mut hash_time = Measure::start("hash");
let should_tick = poh_l.hash(hashes_per_batch);
let ideal_time = poh_l.target_poh_time(target_ns_per_tick);
hash_time.stop();
timing.total_hash_time_ns += hash_time.as_ns();
if should_tick {
// nothing else can be done. tick required.
return true;
}
// check to see if a record request has been sent
if let Ok(record) = record_receiver.try_recv() {
// remember the record we just received as the next record to occur
*next_record = Some(record);
break;
}
// check to see if we need to wait to catch up to ideal
let wait_start = Instant::now();
if ideal_time <= wait_start {
// no, keep hashing. We still hold the lock.
continue;
}
// busy wait, polling for new records and after dropping poh lock (reset can occur, for example)
drop(poh_l);
while ideal_time > Instant::now() {
// check to see if a record request has been sent
if let Ok(record) = record_receiver.try_recv() {
// remember the record we just received as the next record to occur
*next_record = Some(record);
break;
}
}
timing.total_sleep_us += wait_start.elapsed().as_micros() as u64;
break;
}
}
};
false // should_tick = false for all code that reaches here
}
fn tick_producer(
poh_recorder: Arc<RwLock<PohRecorder>>,
poh_exit: &AtomicBool,
ticks_per_slot: u64,
hashes_per_batch: u64,
record_receiver: Receiver<Record>,
target_ns_per_tick: u64,
) {
let poh = poh_recorder.read().unwrap().poh.clone();
let mut timing = PohTiming::new();
let mut next_record = None;
loop {
let should_tick = Self::record_or_hash(
&mut next_record,
&poh_recorder,
&mut timing,
&record_receiver,
hashes_per_batch,
&poh,
target_ns_per_tick,
);
if should_tick {
// Lock PohRecorder only for the final hash. record_or_hash will lock PohRecorder for record calls but not for hashing.
{
let mut lock_time = Measure::start("lock");
let mut poh_recorder_l = poh_recorder.write().unwrap();
lock_time.stop();
timing.total_lock_time_ns += lock_time.as_ns();
let mut tick_time = Measure::start("tick");
poh_recorder_l.tick();
tick_time.stop();
timing.total_tick_time_ns += tick_time.as_ns();
}
timing.num_ticks += 1;
timing.report(ticks_per_slot);
if poh_exit.load(Ordering::Relaxed) {
break;
}
}
}
}
pub fn join(self) -> thread::Result<()> {
self.tick_producer.join()
}
}
#[cfg(test)]
mod tests {
use {
super::*,
rand::{thread_rng, Rng},
solana_ledger::{
blockstore::Blockstore,
genesis_utils::{create_genesis_config, GenesisConfigInfo},
get_tmp_ledger_path_auto_delete,
leader_schedule_cache::LeaderScheduleCache,
},
solana_measure::measure::Measure,
solana_perf::test_tx::test_tx,
solana_runtime::bank::Bank,
solana_sdk::{
clock, hash::hash, pubkey::Pubkey, timing, transaction::VersionedTransaction,
},
std::{thread::sleep, time::Duration},
};
#[test]
#[ignore]
fn test_poh_service() {
solana_logger::setup();
let GenesisConfigInfo { genesis_config, .. } = create_genesis_config(2);
let bank = Bank::new_no_wallclock_throttle_for_tests(&genesis_config).0;
let prev_hash = bank.last_blockhash();
let ledger_path = get_tmp_ledger_path_auto_delete!();
let blockstore = Blockstore::open(ledger_path.path())
.expect("Expected to be able to open database ledger");
let default_target_tick_duration =
timing::duration_as_us(&PohConfig::default().target_tick_duration);
let target_tick_duration = Duration::from_micros(default_target_tick_duration);
let poh_config = PohConfig {
hashes_per_tick: Some(clock::DEFAULT_HASHES_PER_TICK),
target_tick_duration,
target_tick_count: None,
};
let exit = Arc::new(AtomicBool::new(false));
let ticks_per_slot = bank.ticks_per_slot();
let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank));
let blockstore = Arc::new(blockstore);
let (poh_recorder, entry_receiver, record_receiver) = PohRecorder::new(
bank.tick_height(),
prev_hash,
bank.clone(),
Some((4, 4)),
ticks_per_slot,
&Pubkey::default(),
blockstore,
&leader_schedule_cache,
&poh_config,
exit.clone(),
);
let poh_recorder = Arc::new(RwLock::new(poh_recorder));
let ticks_per_slot = bank.ticks_per_slot();
let bank_slot = bank.slot();
// specify RUN_TIME to run in a benchmark-like mode
// to calibrate batch size
let run_time = std::env::var("RUN_TIME")
.map(|x| x.parse().unwrap())
.unwrap_or(0);
let is_test_run = run_time == 0;
let entry_producer = {
let poh_recorder = poh_recorder.clone();
let exit = exit.clone();
Builder::new()
.name("solPohEntryProd".to_string())
.spawn(move || {
let now = Instant::now();
let mut total_us = 0;
let mut total_times = 0;
let h1 = hash(b"hello world!");
let tx = VersionedTransaction::from(test_tx());
loop {
// send some data
let mut time = Measure::start("record");
let _ =
poh_recorder
.write()
.unwrap()
.record(bank_slot, h1, vec![tx.clone()]);
time.stop();
total_us += time.as_us();
total_times += 1;
if is_test_run && thread_rng().gen_ratio(1, 4) {
sleep(Duration::from_millis(200));
}
if exit.load(Ordering::Relaxed) {
info!(
"spent:{}ms record: {}ms entries recorded: {}",
now.elapsed().as_millis(),
total_us / 1000,
total_times,
);
break;
}
}
})
.unwrap()
};
let hashes_per_batch = std::env::var("HASHES_PER_BATCH")
.map(|x| x.parse().unwrap())
.unwrap_or(DEFAULT_HASHES_PER_BATCH);
let poh_service = PohService::new(
poh_recorder.clone(),
&poh_config,
exit.clone(),
0,
DEFAULT_PINNED_CPU_CORE,
hashes_per_batch,
record_receiver,
);
poh_recorder.write().unwrap().set_bank_for_test(bank);
// get some events
let mut hashes = 0;
let mut need_tick = true;
let mut need_entry = true;
let mut need_partial = true;
let mut num_ticks = 0;
let time = Instant::now();
while run_time != 0 || need_tick || need_entry || need_partial {
let (_bank, (entry, _tick_height)) = entry_receiver.recv().unwrap();
if entry.is_tick() {
num_ticks += 1;
assert!(
entry.num_hashes <= poh_config.hashes_per_tick.unwrap(),
"{} <= {}",
entry.num_hashes,
poh_config.hashes_per_tick.unwrap()
);
if entry.num_hashes == poh_config.hashes_per_tick.unwrap() {
need_tick = false;
} else {
need_partial = false;
}
hashes += entry.num_hashes;
assert_eq!(hashes, poh_config.hashes_per_tick.unwrap());
hashes = 0;
} else {
assert!(entry.num_hashes >= 1);
need_entry = false;
hashes += entry.num_hashes;
}
if run_time != 0 {
if time.elapsed().as_millis() > run_time {
break;
}
} else {
assert!(
time.elapsed().as_secs() < 60,
"Test should not run for this long! {}s tick {} entry {} partial {}",
time.elapsed().as_secs(),
need_tick,
need_entry,
need_partial,
);
}
}
info!(
"target_tick_duration: {} ticks_per_slot: {}",
poh_config.target_tick_duration.as_nanos(),
ticks_per_slot
);
let elapsed = time.elapsed();
info!(
"{} ticks in {}ms {}us/tick",
num_ticks,
elapsed.as_millis(),
elapsed.as_micros() / num_ticks
);
exit.store(true, Ordering::Relaxed);
poh_service.join().unwrap();
entry_producer.join().unwrap();
}
}