Cleanup ReplayStage loop timing struct (#35361)

- Track loop_count in the struct
- Rename ReplayTiming ==> ReplayLoopTiming
- Make all metrics consistent to end with "_elapsed_us"
This commit is contained in:
steviez 2024-03-01 12:30:50 -06:00 committed by GitHub
parent 7399178c78
commit 7c878973e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 133 additions and 117 deletions

View File

@ -297,86 +297,97 @@ pub struct ReplayStageConfig {
pub replay_slots_concurrently: bool,
}
/// Timing information for the ReplayStage main processing loop
#[derive(Default)]
pub struct ReplayTiming {
last_print: u64,
collect_frozen_banks_elapsed: u64,
compute_bank_stats_elapsed: u64,
select_vote_and_reset_forks_elapsed: u64,
start_leader_elapsed: u64,
reset_bank_elapsed: u64,
voting_elapsed: u64,
struct ReplayLoopTiming {
last_submit: u64,
loop_count: u64,
collect_frozen_banks_elapsed_us: u64,
compute_bank_stats_elapsed_us: u64,
select_vote_and_reset_forks_elapsed_us: u64,
start_leader_elapsed_us: u64,
reset_bank_elapsed_us: u64,
voting_elapsed_us: u64,
generate_vote_us: u64,
update_commitment_cache_us: u64,
select_forks_elapsed: u64,
compute_slot_stats_elapsed: u64,
generate_new_bank_forks_elapsed: u64,
replay_active_banks_elapsed: u64,
wait_receive_elapsed: u64,
heaviest_fork_failures_elapsed: u64,
select_forks_elapsed_us: u64,
compute_slot_stats_elapsed_us: u64,
generate_new_bank_forks_elapsed_us: u64,
replay_active_banks_elapsed_us: u64,
wait_receive_elapsed_us: u64,
heaviest_fork_failures_elapsed_us: u64,
bank_count: u64,
process_ancestor_hashes_duplicate_slots_elapsed: u64,
process_duplicate_confirmed_slots_elapsed: u64,
process_duplicate_slots_elapsed: u64,
process_unfrozen_gossip_verified_vote_hashes_elapsed: u64,
process_popular_pruned_forks_elapsed: u64,
repair_correct_slots_elapsed: u64,
retransmit_not_propagated_elapsed: u64,
process_ancestor_hashes_duplicate_slots_elapsed_us: u64,
process_duplicate_confirmed_slots_elapsed_us: u64,
process_duplicate_slots_elapsed_us: u64,
process_unfrozen_gossip_verified_vote_hashes_elapsed_us: u64,
process_popular_pruned_forks_elapsed_us: u64,
repair_correct_slots_elapsed_us: u64,
retransmit_not_propagated_elapsed_us: u64,
generate_new_bank_forks_read_lock_us: u64,
generate_new_bank_forks_get_slots_since_us: u64,
generate_new_bank_forks_loop_us: u64,
generate_new_bank_forks_write_lock_us: u64,
replay_blockstore_us: u64, //< When processing forks concurrently, only captures the longest fork
// When processing multiple forks concurrently, only captures the longest fork
replay_blockstore_us: u64,
}
impl ReplayTiming {
impl ReplayLoopTiming {
#[allow(clippy::too_many_arguments)]
fn update(
&mut self,
collect_frozen_banks_elapsed: u64,
compute_bank_stats_elapsed: u64,
select_vote_and_reset_forks_elapsed: u64,
start_leader_elapsed: u64,
reset_bank_elapsed: u64,
voting_elapsed: u64,
select_forks_elapsed: u64,
compute_slot_stats_elapsed: u64,
generate_new_bank_forks_elapsed: u64,
replay_active_banks_elapsed: u64,
wait_receive_elapsed: u64,
heaviest_fork_failures_elapsed: u64,
collect_frozen_banks_elapsed_us: u64,
compute_bank_stats_elapsed_us: u64,
select_vote_and_reset_forks_elapsed_us: u64,
start_leader_elapsed_us: u64,
reset_bank_elapsed_us: u64,
voting_elapsed_us: u64,
select_forks_elapsed_us: u64,
compute_slot_stats_elapsed_us: u64,
generate_new_bank_forks_elapsed_us: u64,
replay_active_banks_elapsed_us: u64,
wait_receive_elapsed_us: u64,
heaviest_fork_failures_elapsed_us: u64,
bank_count: u64,
process_ancestor_hashes_duplicate_slots_elapsed: u64,
process_duplicate_confirmed_slots_elapsed: u64,
process_unfrozen_gossip_verified_vote_hashes_elapsed: u64,
process_popular_pruned_forks_elapsed: u64,
process_duplicate_slots_elapsed: u64,
repair_correct_slots_elapsed: u64,
retransmit_not_propagated_elapsed: u64,
process_ancestor_hashes_duplicate_slots_elapsed_us: u64,
process_duplicate_confirmed_slots_elapsed_us: u64,
process_unfrozen_gossip_verified_vote_hashes_elapsed_us: u64,
process_popular_pruned_forks_elapsed_us: u64,
process_duplicate_slots_elapsed_us: u64,
repair_correct_slots_elapsed_us: u64,
retransmit_not_propagated_elapsed_us: u64,
) {
self.collect_frozen_banks_elapsed += collect_frozen_banks_elapsed;
self.compute_bank_stats_elapsed += compute_bank_stats_elapsed;
self.select_vote_and_reset_forks_elapsed += select_vote_and_reset_forks_elapsed;
self.start_leader_elapsed += start_leader_elapsed;
self.reset_bank_elapsed += reset_bank_elapsed;
self.voting_elapsed += voting_elapsed;
self.select_forks_elapsed += select_forks_elapsed;
self.compute_slot_stats_elapsed += compute_slot_stats_elapsed;
self.generate_new_bank_forks_elapsed += generate_new_bank_forks_elapsed;
self.replay_active_banks_elapsed += replay_active_banks_elapsed;
self.wait_receive_elapsed += wait_receive_elapsed;
self.heaviest_fork_failures_elapsed += heaviest_fork_failures_elapsed;
self.loop_count += 1;
self.collect_frozen_banks_elapsed_us += collect_frozen_banks_elapsed_us;
self.compute_bank_stats_elapsed_us += compute_bank_stats_elapsed_us;
self.select_vote_and_reset_forks_elapsed_us += select_vote_and_reset_forks_elapsed_us;
self.start_leader_elapsed_us += start_leader_elapsed_us;
self.reset_bank_elapsed_us += reset_bank_elapsed_us;
self.voting_elapsed_us += voting_elapsed_us;
self.select_forks_elapsed_us += select_forks_elapsed_us;
self.compute_slot_stats_elapsed_us += compute_slot_stats_elapsed_us;
self.generate_new_bank_forks_elapsed_us += generate_new_bank_forks_elapsed_us;
self.replay_active_banks_elapsed_us += replay_active_banks_elapsed_us;
self.wait_receive_elapsed_us += wait_receive_elapsed_us;
self.heaviest_fork_failures_elapsed_us += heaviest_fork_failures_elapsed_us;
self.bank_count += bank_count;
self.process_ancestor_hashes_duplicate_slots_elapsed +=
process_ancestor_hashes_duplicate_slots_elapsed;
self.process_duplicate_confirmed_slots_elapsed += process_duplicate_confirmed_slots_elapsed;
self.process_unfrozen_gossip_verified_vote_hashes_elapsed +=
process_unfrozen_gossip_verified_vote_hashes_elapsed;
self.process_popular_pruned_forks_elapsed += process_popular_pruned_forks_elapsed;
self.process_duplicate_slots_elapsed += process_duplicate_slots_elapsed;
self.repair_correct_slots_elapsed += repair_correct_slots_elapsed;
self.retransmit_not_propagated_elapsed += retransmit_not_propagated_elapsed;
self.process_ancestor_hashes_duplicate_slots_elapsed_us +=
process_ancestor_hashes_duplicate_slots_elapsed_us;
self.process_duplicate_confirmed_slots_elapsed_us +=
process_duplicate_confirmed_slots_elapsed_us;
self.process_unfrozen_gossip_verified_vote_hashes_elapsed_us +=
process_unfrozen_gossip_verified_vote_hashes_elapsed_us;
self.process_popular_pruned_forks_elapsed_us += process_popular_pruned_forks_elapsed_us;
self.process_duplicate_slots_elapsed_us += process_duplicate_slots_elapsed_us;
self.repair_correct_slots_elapsed_us += repair_correct_slots_elapsed_us;
self.retransmit_not_propagated_elapsed_us += retransmit_not_propagated_elapsed_us;
self.maybe_submit();
}
fn maybe_submit(&mut self) {
let now = timestamp();
let elapsed_ms = now - self.last_print;
let elapsed_ms = now - self.last_submit;
if elapsed_ms > 1000 {
datapoint_info!(
"replay-loop-voting-stats",
@ -389,93 +400,98 @@ impl ReplayTiming {
);
datapoint_info!(
"replay-loop-timing-stats",
("loop_count", self.loop_count as i64, i64),
("total_elapsed_us", elapsed_ms * 1000, i64),
(
"collect_frozen_banks_elapsed",
self.collect_frozen_banks_elapsed as i64,
"collect_frozen_banks_elapsed_us",
self.collect_frozen_banks_elapsed_us as i64,
i64
),
(
"compute_bank_stats_elapsed",
self.compute_bank_stats_elapsed as i64,
"compute_bank_stats_elapsed_us",
self.compute_bank_stats_elapsed_us as i64,
i64
),
(
"select_vote_and_reset_forks_elapsed",
self.select_vote_and_reset_forks_elapsed as i64,
"select_vote_and_reset_forks_elapsed_us",
self.select_vote_and_reset_forks_elapsed_us as i64,
i64
),
(
"start_leader_elapsed",
self.start_leader_elapsed as i64,
i64
),
("reset_bank_elapsed", self.reset_bank_elapsed as i64, i64),
("voting_elapsed", self.voting_elapsed as i64, i64),
(
"select_forks_elapsed",
self.select_forks_elapsed as i64,
"start_leader_elapsed_us",
self.start_leader_elapsed_us as i64,
i64
),
(
"compute_slot_stats_elapsed",
self.compute_slot_stats_elapsed as i64,
"reset_bank_elapsed_us",
self.reset_bank_elapsed_us as i64,
i64
),
("voting_elapsed_us", self.voting_elapsed_us as i64, i64),
(
"select_forks_elapsed_us",
self.select_forks_elapsed_us as i64,
i64
),
(
"generate_new_bank_forks_elapsed",
self.generate_new_bank_forks_elapsed as i64,
"compute_slot_stats_elapsed_us",
self.compute_slot_stats_elapsed_us as i64,
i64
),
(
"replay_active_banks_elapsed",
self.replay_active_banks_elapsed as i64,
"generate_new_bank_forks_elapsed_us",
self.generate_new_bank_forks_elapsed_us as i64,
i64
),
(
"process_ancestor_hashes_duplicate_slots_elapsed",
self.process_ancestor_hashes_duplicate_slots_elapsed as i64,
"replay_active_banks_elapsed_us",
self.replay_active_banks_elapsed_us as i64,
i64
),
(
"process_duplicate_confirmed_slots_elapsed",
self.process_duplicate_confirmed_slots_elapsed as i64,
"process_ancestor_hashes_duplicate_slots_elapsed_us",
self.process_ancestor_hashes_duplicate_slots_elapsed_us as i64,
i64
),
(
"process_unfrozen_gossip_verified_vote_hashes_elapsed",
self.process_unfrozen_gossip_verified_vote_hashes_elapsed as i64,
"process_duplicate_confirmed_slots_elapsed_us",
self.process_duplicate_confirmed_slots_elapsed_us as i64,
i64
),
(
"process_popular_pruned_forks_elapsed",
self.process_popular_pruned_forks_elapsed as i64,
"process_unfrozen_gossip_verified_vote_hashes_elapsed_us",
self.process_unfrozen_gossip_verified_vote_hashes_elapsed_us as i64,
i64
),
(
"wait_receive_elapsed",
self.wait_receive_elapsed as i64,
"process_popular_pruned_forks_elapsed_us",
self.process_popular_pruned_forks_elapsed_us as i64,
i64
),
(
"heaviest_fork_failures_elapsed",
self.heaviest_fork_failures_elapsed as i64,
"wait_receive_elapsed_us",
self.wait_receive_elapsed_us as i64,
i64
),
(
"heaviest_fork_failures_elapsed_us",
self.heaviest_fork_failures_elapsed_us as i64,
i64
),
("bank_count", self.bank_count as i64, i64),
(
"process_duplicate_slots_elapsed",
self.process_duplicate_slots_elapsed as i64,
"process_duplicate_slots_elapsed_us",
self.process_duplicate_slots_elapsed_us as i64,
i64
),
(
"repair_correct_slots_elapsed",
self.repair_correct_slots_elapsed as i64,
"repair_correct_slots_elapsed_us",
self.repair_correct_slots_elapsed_us as i64,
i64
),
(
"retransmit_not_propagated_elapsed",
self.retransmit_not_propagated_elapsed as i64,
"retransmit_not_propagated_elapsed_us",
self.retransmit_not_propagated_elapsed_us as i64,
i64
),
(
@ -504,8 +520,8 @@ impl ReplayTiming {
i64
),
);
*self = ReplayTiming::default();
self.last_print = now;
*self = ReplayLoopTiming::default();
self.last_submit = now;
}
}
}
@ -615,7 +631,7 @@ impl ReplayStage {
let mut last_reset = Hash::default();
let mut partition_info = PartitionInfo::new();
let mut skipped_slots_info = SkippedSlotsInfo::default();
let mut replay_timing = ReplayTiming::default();
let mut replay_timing = ReplayLoopTiming::default();
let mut duplicate_slots_tracker = DuplicateSlotsTracker::default();
let mut duplicate_confirmed_slots: DuplicateConfirmedSlots =
DuplicateConfirmedSlots::default();
@ -2272,7 +2288,7 @@ impl ReplayStage {
unfrozen_gossip_verified_vote_hashes: &mut UnfrozenGossipVerifiedVoteHashes,
vote_signatures: &mut Vec<Signature>,
has_new_vote_been_rooted: &mut bool,
replay_timing: &mut ReplayTiming,
replay_timing: &mut ReplayLoopTiming,
voting_sender: &Sender<VoteOp>,
epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots,
drop_bank_sender: &Sender<Vec<Arc<Bank>>>,
@ -2604,7 +2620,7 @@ impl ReplayStage {
switch_fork_decision: &SwitchForkDecision,
vote_signatures: &mut Vec<Signature>,
has_new_vote_been_rooted: bool,
replay_timing: &mut ReplayTiming,
replay_timing: &mut ReplayLoopTiming,
voting_sender: &Sender<VoteOp>,
wait_to_vote_slot: Option<Slot>,
) {
@ -2697,7 +2713,7 @@ impl ReplayStage {
entry_notification_sender: Option<&EntryNotifierSender>,
verify_recyclers: &VerifyRecyclers,
replay_vote_sender: &ReplayVoteSender,
replay_timing: &mut ReplayTiming,
replay_timing: &mut ReplayLoopTiming,
log_messages_bytes_limit: Option<usize>,
active_bank_slots: &[Slot],
prioritization_fee_cache: &PrioritizationFeeCache,
@ -2810,7 +2826,7 @@ impl ReplayStage {
entry_notification_sender: Option<&EntryNotifierSender>,
verify_recyclers: &VerifyRecyclers,
replay_vote_sender: &ReplayVoteSender,
replay_timing: &mut ReplayTiming,
replay_timing: &mut ReplayLoopTiming,
log_messages_bytes_limit: Option<usize>,
bank_slot: Slot,
prioritization_fee_cache: &PrioritizationFeeCache,
@ -3157,7 +3173,7 @@ impl ReplayStage {
duplicate_slots_to_repair: &mut DuplicateSlotsToRepair,
ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender,
block_metadata_notifier: Option<BlockMetadataNotifierArc>,
replay_timing: &mut ReplayTiming,
replay_timing: &mut ReplayLoopTiming,
log_messages_bytes_limit: Option<usize>,
replay_slots_concurrently: bool,
prioritization_fee_cache: &PrioritizationFeeCache,
@ -4140,7 +4156,7 @@ impl ReplayStage {
leader_schedule_cache: &Arc<LeaderScheduleCache>,
rpc_subscriptions: &Arc<RpcSubscriptions>,
progress: &mut ProgressMap,
replay_timing: &mut ReplayTiming,
replay_timing: &mut ReplayLoopTiming,
) {
// Find the next slot that chains to the old slot
let mut generate_new_bank_forks_read_lock =
@ -4530,7 +4546,7 @@ pub(crate) mod tests {
.unwrap()
.get(NUM_CONSECUTIVE_LEADER_SLOTS)
.is_none());
let mut replay_timing = ReplayTiming::default();
let mut replay_timing = ReplayLoopTiming::default();
ReplayStage::generate_new_bank_forks(
&blockstore,
&bank_forks,
@ -6351,7 +6367,7 @@ pub(crate) mod tests {
..
} = vote_simulator;
let mut replay_timing = ReplayTiming::default();
let mut replay_timing = ReplayLoopTiming::default();
// Create bank 7 and insert to blockstore and bank forks
let root_bank = bank_forks.read().unwrap().root_bank();
@ -7543,7 +7559,7 @@ pub(crate) mod tests {
&SwitchForkDecision::SameFork,
&mut voted_signatures,
has_new_vote_been_rooted,
&mut ReplayTiming::default(),
&mut ReplayLoopTiming::default(),
&voting_sender,
None,
);
@ -7618,7 +7634,7 @@ pub(crate) mod tests {
&SwitchForkDecision::SameFork,
&mut voted_signatures,
has_new_vote_been_rooted,
&mut ReplayTiming::default(),
&mut ReplayLoopTiming::default(),
&voting_sender,
None,
);
@ -7819,7 +7835,7 @@ pub(crate) mod tests {
&SwitchForkDecision::SameFork,
voted_signatures,
has_new_vote_been_rooted,
&mut ReplayTiming::default(),
&mut ReplayLoopTiming::default(),
voting_sender,
None,
);