//! The `poh_service` module implements a service that records the passing of //! "ticks", a measure of time in the PoH stream use { crate::poh_recorder::{PohRecorder, Record}, crossbeam_channel::Receiver, log::*, solana_entry::poh::Poh, solana_measure::{measure, measure::Measure}, solana_sdk::poh_config::PohConfig, std::{ sync::{ atomic::{AtomicBool, Ordering}, Arc, Mutex, RwLock, }, thread::{self, Builder, JoinHandle}, time::{Duration, Instant}, }, }; pub struct PohService { tick_producer: JoinHandle<()>, } // Number of hashes to batch together. // * If this number is too small, PoH hash rate will suffer. // * The larger this number is from 1, the speed of recording transactions will suffer due to lock // contention with the PoH hashing within `tick_producer()`. // // Can use test_poh_service to calibrate this pub const DEFAULT_HASHES_PER_BATCH: u64 = 64; pub const DEFAULT_PINNED_CPU_CORE: usize = 0; const TARGET_SLOT_ADJUSTMENT_NS: u64 = 50_000_000; #[derive(Debug)] struct PohTiming { num_ticks: u64, num_hashes: u64, total_sleep_us: u64, total_lock_time_ns: u64, total_hash_time_ns: u64, total_tick_time_ns: u64, last_metric: Instant, total_record_time_us: u64, total_send_record_result_us: u64, } impl PohTiming { fn new() -> Self { Self { num_ticks: 0, num_hashes: 0, total_sleep_us: 0, total_lock_time_ns: 0, total_hash_time_ns: 0, total_tick_time_ns: 0, last_metric: Instant::now(), total_record_time_us: 0, total_send_record_result_us: 0, } } fn report(&mut self, ticks_per_slot: u64) { if self.last_metric.elapsed().as_millis() > 1000 { let elapsed_us = self.last_metric.elapsed().as_micros() as u64; let us_per_slot = (elapsed_us * ticks_per_slot) / self.num_ticks; datapoint_info!( "poh-service", ("ticks", self.num_ticks as i64, i64), ("hashes", self.num_hashes as i64, i64), ("elapsed_us", us_per_slot, i64), ("total_sleep_us", self.total_sleep_us, i64), ("total_tick_time_us", self.total_tick_time_ns / 1000, i64), ("total_lock_time_us", self.total_lock_time_ns / 1000, i64), ("total_hash_time_us", self.total_hash_time_ns / 1000, i64), ("total_record_time_us", self.total_record_time_us, i64), ( "total_send_record_result_us", self.total_send_record_result_us, i64 ), ); self.total_sleep_us = 0; self.num_ticks = 0; self.num_hashes = 0; self.total_tick_time_ns = 0; self.total_lock_time_ns = 0; self.total_hash_time_ns = 0; self.last_metric = Instant::now(); self.total_record_time_us = 0; self.total_send_record_result_us = 0; } } } impl PohService { pub fn new( poh_recorder: Arc>, poh_config: &PohConfig, poh_exit: Arc, ticks_per_slot: u64, pinned_cpu_core: usize, hashes_per_batch: u64, record_receiver: Receiver, ) -> Self { let poh_config = poh_config.clone(); let tick_producer = Builder::new() .name("solPohTickProd".to_string()) .spawn(move || { if poh_config.hashes_per_tick.is_none() { if poh_config.target_tick_count.is_none() { Self::low_power_tick_producer( poh_recorder, &poh_config, &poh_exit, record_receiver, ); } else { Self::short_lived_low_power_tick_producer( poh_recorder, &poh_config, &poh_exit, record_receiver, ); } } else { // PoH service runs in a tight loop, generating hashes as fast as possible. // Let's dedicate one of the CPU cores to this thread so that it can gain // from cache performance. if let Some(cores) = core_affinity::get_core_ids() { core_affinity::set_for_current(cores[pinned_cpu_core]); } Self::tick_producer( poh_recorder, &poh_exit, ticks_per_slot, hashes_per_batch, record_receiver, Self::target_ns_per_tick( ticks_per_slot, poh_config.target_tick_duration.as_nanos() as u64, ), ); } poh_exit.store(true, Ordering::Relaxed); }) .unwrap(); Self { tick_producer } } pub fn target_ns_per_tick(ticks_per_slot: u64, target_tick_duration_ns: u64) -> u64 { // Account for some extra time outside of PoH generation to account // for processing time outside PoH. let adjustment_per_tick = if ticks_per_slot > 0 { TARGET_SLOT_ADJUSTMENT_NS / ticks_per_slot } else { 0 }; target_tick_duration_ns.saturating_sub(adjustment_per_tick) } fn low_power_tick_producer( poh_recorder: Arc>, poh_config: &PohConfig, poh_exit: &AtomicBool, record_receiver: Receiver, ) { let mut last_tick = Instant::now(); while !poh_exit.load(Ordering::Relaxed) { let remaining_tick_time = poh_config .target_tick_duration .saturating_sub(last_tick.elapsed()); Self::read_record_receiver_and_process( &poh_recorder, &record_receiver, remaining_tick_time, ); if remaining_tick_time.is_zero() { last_tick = Instant::now(); poh_recorder.write().unwrap().tick(); } } } pub fn read_record_receiver_and_process( poh_recorder: &Arc>, record_receiver: &Receiver, timeout: Duration, ) { let record = record_receiver.recv_timeout(timeout); if let Ok(record) = record { if record .sender .send(poh_recorder.write().unwrap().record( record.slot, record.mixin, record.transactions, )) .is_err() { panic!("Error returning mixin hash"); } } } fn short_lived_low_power_tick_producer( poh_recorder: Arc>, poh_config: &PohConfig, poh_exit: &AtomicBool, record_receiver: Receiver, ) { let mut warned = false; let mut elapsed_ticks = 0; let mut last_tick = Instant::now(); let num_ticks = poh_config.target_tick_count.unwrap(); while elapsed_ticks < num_ticks { let remaining_tick_time = poh_config .target_tick_duration .saturating_sub(last_tick.elapsed()); Self::read_record_receiver_and_process( &poh_recorder, &record_receiver, Duration::from_millis(0), ); if remaining_tick_time.is_zero() { last_tick = Instant::now(); poh_recorder.write().unwrap().tick(); elapsed_ticks += 1; } if poh_exit.load(Ordering::Relaxed) && !warned { warned = true; warn!("exit signal is ignored because PohService is scheduled to exit soon"); } } } // returns true if we need to tick fn record_or_hash( next_record: &mut Option, poh_recorder: &Arc>, timing: &mut PohTiming, record_receiver: &Receiver, hashes_per_batch: u64, poh: &Arc>, target_ns_per_tick: u64, ) -> bool { match next_record.take() { Some(mut record) => { // received message to record // so, record for as long as we have queued up record requests let mut lock_time = Measure::start("lock"); let mut poh_recorder_l = poh_recorder.write().unwrap(); lock_time.stop(); timing.total_lock_time_ns += lock_time.as_ns(); let mut record_time = Measure::start("record"); loop { let res = poh_recorder_l.record( record.slot, record.mixin, std::mem::take(&mut record.transactions), ); // what do we do on failure here? Ignore for now. let (_send_res, send_record_result_time) = measure!(record.sender.send(res), "send_record_result"); timing.total_send_record_result_us += send_record_result_time.as_us(); timing.num_hashes += 1; // note: may have also ticked inside record let new_record_result = record_receiver.try_recv(); match new_record_result { Ok(new_record) => { // we already have second request to record, so record again while we still have the mutex record = new_record; } Err(_) => { break; } } } record_time.stop(); timing.total_record_time_us += record_time.as_us(); // PohRecorder.record would have ticked if it needed to, so should_tick will be false } None => { // did not receive instructions to record, so hash until we notice we've been asked to record (or we need to tick) and then remember what to record let mut lock_time = Measure::start("lock"); let mut poh_l = poh.lock().unwrap(); lock_time.stop(); timing.total_lock_time_ns += lock_time.as_ns(); loop { timing.num_hashes += hashes_per_batch; let mut hash_time = Measure::start("hash"); let should_tick = poh_l.hash(hashes_per_batch); let ideal_time = poh_l.target_poh_time(target_ns_per_tick); hash_time.stop(); timing.total_hash_time_ns += hash_time.as_ns(); if should_tick { // nothing else can be done. tick required. return true; } // check to see if a record request has been sent if let Ok(record) = record_receiver.try_recv() { // remember the record we just received as the next record to occur *next_record = Some(record); break; } // check to see if we need to wait to catch up to ideal let wait_start = Instant::now(); if ideal_time <= wait_start { // no, keep hashing. We still hold the lock. continue; } // busy wait, polling for new records and after dropping poh lock (reset can occur, for example) drop(poh_l); while ideal_time > Instant::now() { // check to see if a record request has been sent if let Ok(record) = record_receiver.try_recv() { // remember the record we just received as the next record to occur *next_record = Some(record); break; } } timing.total_sleep_us += wait_start.elapsed().as_micros() as u64; break; } } }; false // should_tick = false for all code that reaches here } fn tick_producer( poh_recorder: Arc>, poh_exit: &AtomicBool, ticks_per_slot: u64, hashes_per_batch: u64, record_receiver: Receiver, target_ns_per_tick: u64, ) { let poh = poh_recorder.read().unwrap().poh.clone(); let mut timing = PohTiming::new(); let mut next_record = None; loop { let should_tick = Self::record_or_hash( &mut next_record, &poh_recorder, &mut timing, &record_receiver, hashes_per_batch, &poh, target_ns_per_tick, ); if should_tick { // Lock PohRecorder only for the final hash. record_or_hash will lock PohRecorder for record calls but not for hashing. { let mut lock_time = Measure::start("lock"); let mut poh_recorder_l = poh_recorder.write().unwrap(); lock_time.stop(); timing.total_lock_time_ns += lock_time.as_ns(); let mut tick_time = Measure::start("tick"); poh_recorder_l.tick(); tick_time.stop(); timing.total_tick_time_ns += tick_time.as_ns(); } timing.num_ticks += 1; timing.report(ticks_per_slot); if poh_exit.load(Ordering::Relaxed) { break; } } } } pub fn join(self) -> thread::Result<()> { self.tick_producer.join() } } #[cfg(test)] mod tests { use { super::*, rand::{thread_rng, Rng}, solana_ledger::{ blockstore::Blockstore, genesis_utils::{create_genesis_config, GenesisConfigInfo}, get_tmp_ledger_path_auto_delete, leader_schedule_cache::LeaderScheduleCache, }, solana_measure::measure::Measure, solana_perf::test_tx::test_tx, solana_runtime::bank::Bank, solana_sdk::{ clock, hash::hash, pubkey::Pubkey, timing, transaction::VersionedTransaction, }, std::{thread::sleep, time::Duration}, }; #[test] #[ignore] fn test_poh_service() { solana_logger::setup(); let GenesisConfigInfo { genesis_config, .. } = create_genesis_config(2); let bank = Bank::new_no_wallclock_throttle_for_tests(&genesis_config).0; let prev_hash = bank.last_blockhash(); let ledger_path = get_tmp_ledger_path_auto_delete!(); let blockstore = Blockstore::open(ledger_path.path()) .expect("Expected to be able to open database ledger"); let default_target_tick_duration = timing::duration_as_us(&PohConfig::default().target_tick_duration); let target_tick_duration = Duration::from_micros(default_target_tick_duration); let poh_config = PohConfig { hashes_per_tick: Some(clock::DEFAULT_HASHES_PER_TICK), target_tick_duration, target_tick_count: None, }; let exit = Arc::new(AtomicBool::new(false)); let ticks_per_slot = bank.ticks_per_slot(); let leader_schedule_cache = Arc::new(LeaderScheduleCache::new_from_bank(&bank)); let blockstore = Arc::new(blockstore); let (poh_recorder, entry_receiver, record_receiver) = PohRecorder::new( bank.tick_height(), prev_hash, bank.clone(), Some((4, 4)), ticks_per_slot, &Pubkey::default(), blockstore, &leader_schedule_cache, &poh_config, exit.clone(), ); let poh_recorder = Arc::new(RwLock::new(poh_recorder)); let ticks_per_slot = bank.ticks_per_slot(); let bank_slot = bank.slot(); // specify RUN_TIME to run in a benchmark-like mode // to calibrate batch size let run_time = std::env::var("RUN_TIME") .map(|x| x.parse().unwrap()) .unwrap_or(0); let is_test_run = run_time == 0; let entry_producer = { let poh_recorder = poh_recorder.clone(); let exit = exit.clone(); Builder::new() .name("solPohEntryProd".to_string()) .spawn(move || { let now = Instant::now(); let mut total_us = 0; let mut total_times = 0; let h1 = hash(b"hello world!"); let tx = VersionedTransaction::from(test_tx()); loop { // send some data let mut time = Measure::start("record"); let _ = poh_recorder .write() .unwrap() .record(bank_slot, h1, vec![tx.clone()]); time.stop(); total_us += time.as_us(); total_times += 1; if is_test_run && thread_rng().gen_ratio(1, 4) { sleep(Duration::from_millis(200)); } if exit.load(Ordering::Relaxed) { info!( "spent:{}ms record: {}ms entries recorded: {}", now.elapsed().as_millis(), total_us / 1000, total_times, ); break; } } }) .unwrap() }; let hashes_per_batch = std::env::var("HASHES_PER_BATCH") .map(|x| x.parse().unwrap()) .unwrap_or(DEFAULT_HASHES_PER_BATCH); let poh_service = PohService::new( poh_recorder.clone(), &poh_config, exit.clone(), 0, DEFAULT_PINNED_CPU_CORE, hashes_per_batch, record_receiver, ); poh_recorder.write().unwrap().set_bank_for_test(bank); // get some events let mut hashes = 0; let mut need_tick = true; let mut need_entry = true; let mut need_partial = true; let mut num_ticks = 0; let time = Instant::now(); while run_time != 0 || need_tick || need_entry || need_partial { let (_bank, (entry, _tick_height)) = entry_receiver.recv().unwrap(); if entry.is_tick() { num_ticks += 1; assert!( entry.num_hashes <= poh_config.hashes_per_tick.unwrap(), "{} <= {}", entry.num_hashes, poh_config.hashes_per_tick.unwrap() ); if entry.num_hashes == poh_config.hashes_per_tick.unwrap() { need_tick = false; } else { need_partial = false; } hashes += entry.num_hashes; assert_eq!(hashes, poh_config.hashes_per_tick.unwrap()); hashes = 0; } else { assert!(entry.num_hashes >= 1); need_entry = false; hashes += entry.num_hashes; } if run_time != 0 { if time.elapsed().as_millis() > run_time { break; } } else { assert!( time.elapsed().as_secs() < 60, "Test should not run for this long! {}s tick {} entry {} partial {}", time.elapsed().as_secs(), need_tick, need_entry, need_partial, ); } } info!( "target_tick_duration: {} ticks_per_slot: {}", poh_config.target_tick_duration.as_nanos(), ticks_per_slot ); let elapsed = time.elapsed(); info!( "{} ticks in {}ms {}us/tick", num_ticks, elapsed.as_millis(), elapsed.as_micros() / num_ticks ); exit.store(true, Ordering::Relaxed); poh_service.join().unwrap(); entry_producer.join().unwrap(); } }