From 398af132a5c949827b80ff9d1f7ea770c640bdfb Mon Sep 17 00:00:00 2001 From: sakridge Date: Mon, 15 Nov 2021 23:28:18 +0000 Subject: [PATCH] More set_root metrics (#21286) --- core/src/replay_stage.rs | 11 ++++- runtime/src/bank_forks.rs | 84 ++++++++++++++++++++++++++++----------- 2 files changed, 70 insertions(+), 25 deletions(-) diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 98c9a7a7f..420a71037 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -2724,11 +2724,20 @@ impl ReplayStage { voted_signatures: &mut Vec, epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, ) { - bank_forks.write().unwrap().set_root( + let removed_banks = bank_forks.write().unwrap().set_root( new_root, accounts_background_request_sender, highest_confirmed_root, ); + let mut dropped_banks_time = Measure::start("handle_new_root::drop_banks"); + drop(removed_banks); + dropped_banks_time.stop(); + if dropped_banks_time.as_ms() > 10 { + datapoint_info!( + "handle_new_root-dropped_banks", + ("elapsed_ms", dropped_banks_time.as_ms(), i64) + ); + } // Dropping the bank_forks write lock and reacquiring as a read lock is // safe because updates to bank_forks are only made by a single thread. let r_bank_forks = bank_forks.read().unwrap(); diff --git a/runtime/src/bank_forks.rs b/runtime/src/bank_forks.rs index d3820652a..f951e0ce0 100644 --- a/runtime/src/bank_forks.rs +++ b/runtime/src/bank_forks.rs @@ -16,13 +16,16 @@ use std::{ }; struct SetRootTimings { - total_banks: i64, + total_parent_banks: i64, total_squash_cache_ms: i64, total_squash_accounts_ms: i64, total_snapshot_ms: i64, tx_count: i64, prune_non_rooted_ms: i64, drop_parent_banks_ms: i64, + prune_slots_ms: i64, + prune_remove_ms: i64, + dropped_banks_len: i64, } pub struct BankForks { @@ -187,7 +190,7 @@ impl BankForks { root: Slot, accounts_background_request_sender: &AbsRequestSender, highest_confirmed_root: Option, - ) -> SetRootTimings { + ) -> (Vec>, SetRootTimings) { let old_epoch = self.root_bank().epoch(); self.root = root; let root_bank = self @@ -221,7 +224,7 @@ impl BankForks { let mut banks = vec![root_bank]; let parents = root_bank.parents(); banks.extend(parents.iter()); - let total_banks = banks.len(); + let total_parent_banks = banks.len(); let mut total_squash_accounts_ms = 0; let mut total_squash_cache_ms = 0; let mut total_snapshot_ms = 0; @@ -269,22 +272,30 @@ impl BankForks { } let new_tx_count = root_bank.transaction_count(); let mut prune_time = Measure::start("set_root::prune"); - self.prune_non_rooted(root, highest_confirmed_root); + let (removed_banks, prune_slots_ms, prune_remove_ms) = + self.prune_non_rooted(root, highest_confirmed_root); prune_time.stop(); + let dropped_banks_len = removed_banks.len(); let mut drop_parent_banks_time = Measure::start("set_root::drop_banks"); drop(parents); drop_parent_banks_time.stop(); - SetRootTimings { - total_banks: total_banks as i64, - total_squash_cache_ms, - total_squash_accounts_ms, - total_snapshot_ms, - tx_count: (new_tx_count - root_tx_count) as i64, - prune_non_rooted_ms: prune_time.as_ms() as i64, - drop_parent_banks_ms: drop_parent_banks_time.as_ms() as i64, - } + ( + removed_banks, + SetRootTimings { + total_parent_banks: total_parent_banks as i64, + total_squash_cache_ms, + total_squash_accounts_ms, + total_snapshot_ms, + tx_count: (new_tx_count - root_tx_count) as i64, + prune_non_rooted_ms: prune_time.as_ms() as i64, + drop_parent_banks_ms: drop_parent_banks_time.as_ms() as i64, + prune_slots_ms: prune_slots_ms as i64, + prune_remove_ms: prune_remove_ms as i64, + dropped_banks_len: dropped_banks_len as i64, + }, + ) } pub fn set_root( @@ -292,9 +303,9 @@ impl BankForks { root: Slot, accounts_background_request_sender: &AbsRequestSender, highest_confirmed_root: Option, - ) { + ) -> Vec> { let set_root_start = Instant::now(); - let set_root_metrics = self.do_set_root_return_metrics( + let (removed_banks, set_root_metrics) = self.do_set_root_return_metrics( root, accounts_background_request_sender, highest_confirmed_root, @@ -307,7 +318,12 @@ impl BankForks { i64 ), ("slot", root, i64), - ("total_banks", set_root_metrics.total_banks, i64), + ( + "total_parent_banks", + set_root_metrics.total_parent_banks, + i64 + ), + ("total_banks", self.banks.len(), i64), ( "total_squash_cache_ms", set_root_metrics.total_squash_cache_ms, @@ -330,7 +346,11 @@ impl BankForks { set_root_metrics.drop_parent_banks_ms, i64 ), + ("prune_slots_ms", set_root_metrics.prune_slots_ms, i64), + ("prune_remove_ms", set_root_metrics.prune_remove_ms, i64), + ("dropped_banks_len", set_root_metrics.dropped_banks_len, i64), ); + removed_banks } pub fn root(&self) -> Slot { @@ -387,7 +407,16 @@ impl BankForks { /// i.e. the cluster-confirmed root. This commitment is stronger than the local node's root. /// So (A) and (B) are kept to facilitate RPC at different commitment levels. Everything below /// the highest confirmed root can be pruned. - fn prune_non_rooted(&mut self, root: Slot, highest_confirmed_root: Option) { + fn prune_non_rooted( + &mut self, + root: Slot, + highest_confirmed_root: Option, + ) -> (Vec>, u64, u64) { + // Clippy doesn't like separating the two collects below, + // but we want to collect timing separately, and the 2nd requires + // a unique borrow to self which is already borrowed by self.banks + #![allow(clippy::needless_collect)] + let mut prune_slots_time = Measure::start("prune_slots"); let highest_confirmed_root = highest_confirmed_root.unwrap_or(root); let prune_slots: Vec<_> = self .banks @@ -402,13 +431,20 @@ impl BankForks { !keep }) .collect(); - for slot in prune_slots { - self.remove(slot); - } - datapoint_debug!( - "bank_forks_purge_non_root", - ("num_banks_retained", self.banks.len(), i64), - ); + prune_slots_time.stop(); + + let mut prune_remove_time = Measure::start("prune_slots"); + let removed_banks = prune_slots + .into_iter() + .filter_map(|slot| self.remove(slot)) + .collect(); + prune_remove_time.stop(); + + ( + removed_banks, + prune_slots_time.as_ms(), + prune_remove_time.as_ms(), + ) } pub fn set_snapshot_config(&mut self, snapshot_config: Option) {