From ae557a9eb5d8c8451cf7615858f09b2fd47609a0 Mon Sep 17 00:00:00 2001 From: Ashwin Sekar Date: Thu, 27 Oct 2022 20:06:06 -0700 Subject: [PATCH] Exit when stuck in an unrecoverable repair/purge loop (#28596) * Exit when stuck in an unrecoverable repair/purge loop * add tests --- core/src/ancestor_hashes_service.rs | 3 +- core/src/cluster_slot_state_verifier.rs | 36 +++++++++++++++++ core/src/replay_stage.rs | 54 ++++++++++++++++++++++--- 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/core/src/ancestor_hashes_service.rs b/core/src/ancestor_hashes_service.rs index f8e3eb1dcb..658882802d 100644 --- a/core/src/ancestor_hashes_service.rs +++ b/core/src/ancestor_hashes_service.rs @@ -760,7 +760,7 @@ mod test { use { super::*, crate::{ - cluster_slot_state_verifier::DuplicateSlotsToRepair, + cluster_slot_state_verifier::{DuplicateSlotsToRepair, PurgeRepairSlotCounter}, repair_service::DuplicateSlotsResetReceiver, replay_stage::{ tests::{replay_blockstore_components, ReplayBlockstoreComponents}, @@ -1542,6 +1542,7 @@ mod test { &bank_forks, &requester_blockstore, None, + &mut PurgeRepairSlotCounter::default(), ); // Simulate making a request diff --git a/core/src/cluster_slot_state_verifier.rs b/core/src/cluster_slot_state_verifier.rs index e082c44fac..2b334cd3ac 100644 --- a/core/src/cluster_slot_state_verifier.rs +++ b/core/src/cluster_slot_state_verifier.rs @@ -11,6 +11,7 @@ use { pub(crate) type DuplicateSlotsTracker = BTreeSet; pub(crate) type DuplicateSlotsToRepair = HashMap; +pub(crate) type PurgeRepairSlotCounter = BTreeMap; pub(crate) type EpochSlotsFrozenSlots = BTreeMap; pub(crate) type GossipDuplicateConfirmedSlots = BTreeMap; @@ -694,6 +695,7 @@ fn apply_state_changes( duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, blockstore: &Blockstore, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, state_changes: Vec, ) { // Handle cases where the bank is frozen, but not duplicate confirmed @@ -728,6 +730,7 @@ fn apply_state_changes( ) .unwrap(); duplicate_slots_to_repair.remove(&slot); + purge_repair_slot_counter.remove(&slot); } ResultingStateChange::SendAncestorHashesReplayUpdate(ancestor_hashes_replay_update) => { let _ = ancestor_hashes_replay_update_sender.send(ancestor_hashes_replay_update); @@ -750,6 +753,7 @@ pub(crate) fn check_slot_agrees_with_cluster( fork_choice: &mut HeaviestSubtreeForkChoice, duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, slot_state_update: SlotStateUpdate, ) { info!( @@ -839,6 +843,7 @@ pub(crate) fn check_slot_agrees_with_cluster( duplicate_slots_to_repair, blockstore, ancestor_hashes_replay_update_sender, + purge_repair_slot_counter, state_changes, ); } @@ -1396,6 +1401,7 @@ mod test { } = setup(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); // MarkSlotDuplicate should mark progress map and remove // the slot from fork choice @@ -1414,6 +1420,7 @@ mod test { &mut duplicate_slots_to_repair, &blockstore, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, vec![ResultingStateChange::MarkSlotDuplicate(duplicate_slot_hash)], ); assert!(!heaviest_subtree_fork_choice @@ -1436,6 +1443,7 @@ mod test { ); } assert!(duplicate_slots_to_repair.is_empty()); + assert!(purge_repair_slot_counter.is_empty()); // Simulate detecting another hash that is the correct version, // RepairDuplicateConfirmedVersion should add the slot to repair @@ -1448,6 +1456,7 @@ mod test { &mut duplicate_slots_to_repair, &blockstore, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, vec![ResultingStateChange::RepairDuplicateConfirmedVersion( correct_hash, )], @@ -1457,6 +1466,7 @@ mod test { *duplicate_slots_to_repair.get(&duplicate_slot).unwrap(), correct_hash ); + assert!(purge_repair_slot_counter.is_empty()); } #[test] @@ -1470,6 +1480,7 @@ mod test { } = setup(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); let duplicate_slot = bank_forks.read().unwrap().root() + 1; let duplicate_slot_hash = bank_forks @@ -1490,6 +1501,7 @@ mod test { &mut duplicate_slots_to_repair, &blockstore, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, vec![ResultingStateChange::BankFrozen(duplicate_slot_hash)], ); assert_eq!( @@ -1513,6 +1525,7 @@ mod test { &mut duplicate_slots_to_repair, &blockstore, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, vec![ResultingStateChange::BankFrozen(new_bank_hash)], ); assert_eq!( @@ -1535,6 +1548,7 @@ mod test { } = setup(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); let duplicate_slot = bank_forks.read().unwrap().root() + 1; let our_duplicate_slot_hash = bank_forks @@ -1546,6 +1560,7 @@ mod test { // Setup and check the state that is about to change. duplicate_slots_to_repair.insert(duplicate_slot, Hash::new_unique()); + purge_repair_slot_counter.insert(duplicate_slot, 1); assert!(blockstore.get_bank_hash(duplicate_slot).is_none()); assert!(!blockstore.is_duplicate_confirmed(duplicate_slot)); @@ -1553,6 +1568,7 @@ mod test { // 1) Re-enable fork choice // 2) Clear any pending repairs from `duplicate_slots_to_repair` since we have the // right version now + // 3) Clear the slot from `purge_repair_slot_counter` // 3) Set the status to duplicate confirmed in Blockstore let mut state_changes = vec![ResultingStateChange::DuplicateConfirmedSlotMatchesCluster( our_duplicate_slot_hash, @@ -1566,6 +1582,7 @@ mod test { &mut duplicate_slots_to_repair, &blockstore, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, state_changes, ); for child_slot in descendants @@ -1585,6 +1602,7 @@ mod test { .is_candidate(&(duplicate_slot, our_duplicate_slot_hash)) .unwrap()); assert!(duplicate_slots_to_repair.is_empty()); + assert!(purge_repair_slot_counter.is_empty()); assert_eq!( blockstore.get_bank_hash(duplicate_slot).unwrap(), our_duplicate_slot_hash @@ -1627,6 +1645,7 @@ mod test { let gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); let duplicate_slot = 2; let duplicate_state = DuplicateState::new_from_state( duplicate_slot, @@ -1646,6 +1665,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::Duplicate(duplicate_state), ); assert!(duplicate_slots_tracker.contains(&duplicate_slot)); @@ -1681,6 +1701,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::BankFrozen(bank_frozen_state), ); @@ -1730,6 +1751,7 @@ mod test { ); let root = 0; let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); // Mark slot 2 as duplicate confirmed @@ -1751,6 +1773,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); assert!(heaviest_subtree_fork_choice @@ -1788,6 +1811,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::Duplicate(duplicate_state), ); assert!(duplicate_slots_tracker.contains(&3)); @@ -1837,6 +1861,7 @@ mod test { let root = 0; let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); // Mark 2 as duplicate let slot2_hash = bank_forks.read().unwrap().get(2).unwrap().hash(); @@ -1858,6 +1883,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::Duplicate(duplicate_state), ); assert!(duplicate_slots_tracker.contains(&2)); @@ -1893,6 +1919,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); for slot in 0..=3 { @@ -1954,6 +1981,7 @@ mod test { let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); // Mark 3 as duplicate confirmed gossip_duplicate_confirmed_slots.insert(3, slot3_hash); @@ -1973,6 +2001,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); verify_all_slots_duplicate_confirmed(&bank_forks, &heaviest_subtree_fork_choice, 3, true); @@ -2001,6 +2030,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::Duplicate(duplicate_state), ); assert!(duplicate_slots_tracker.contains(&1)); @@ -2032,6 +2062,7 @@ mod test { let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); // Mark 3 as only epoch slots frozen, matching our `slot3_hash`, should not duplicate // confirm the slot @@ -2055,6 +2086,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), ); verify_all_slots_duplicate_confirmed( @@ -2084,6 +2116,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); assert_eq!(*epoch_slots_frozen_slots.get(&3).unwrap(), slot3_hash); @@ -2120,6 +2153,7 @@ mod test { let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); // Mark 3 as only epoch slots frozen with different hash than the our // locally replayed `slot3_hash`. This should not duplicate confirm the slot, @@ -2145,6 +2179,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), ); assert_eq!(*duplicate_slots_to_repair.get(&3).unwrap(), mismatched_hash); @@ -2175,6 +2210,7 @@ mod test { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); assert!(duplicate_slots_to_repair.is_empty()); diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 8b03da7546..85407ba64f 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -94,6 +94,7 @@ const MAX_VOTE_REFRESH_INTERVAL_MILLIS: usize = 5000; // Expect this number to be small enough to minimize thread pool overhead while large enough // to be able to replay all active forks at the same time in most cases. const MAX_CONCURRENT_FORKS_TO_REPLAY: usize = 4; +const MAX_REPAIR_RETRY_LOOP_ATTEMPTS: usize = 10; lazy_static! { static ref PAR_THREAD_POOL: ThreadPool = rayon::ThreadPoolBuilder::new() @@ -463,6 +464,7 @@ impl ReplayStage { let mut gossip_duplicate_confirmed_slots: GossipDuplicateConfirmedSlots = GossipDuplicateConfirmedSlots::default(); let mut epoch_slots_frozen_slots: EpochSlotsFrozenSlots = EpochSlotsFrozenSlots::default(); let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); let mut unfrozen_gossip_verified_vote_hashes: UnfrozenGossipVerifiedVoteHashes = UnfrozenGossipVerifiedVoteHashes::default(); let mut latest_validator_votes_for_frozen_banks: LatestValidatorVotesForFrozenBanks = LatestValidatorVotesForFrozenBanks::default(); let mut voted_signatures = Vec::new(); @@ -535,6 +537,7 @@ impl ReplayStage { log_messages_bytes_limit, replay_slots_concurrently, &prioritization_fee_cache, + &mut purge_repair_slot_counter, ); replay_active_banks_time.stop(); @@ -554,7 +557,8 @@ impl ReplayStage { &mut heaviest_subtree_fork_choice, &bank_forks, &mut duplicate_slots_to_repair, - &ancestor_hashes_replay_update_sender + &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, ); purge_dead_slots_time.stop(); @@ -571,6 +575,7 @@ impl ReplayStage { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, ); process_gossip_duplicate_confirmed_slots_time.stop(); @@ -603,6 +608,7 @@ impl ReplayStage { &mut heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, ); } process_duplicate_slots_time.stop(); @@ -644,7 +650,7 @@ impl ReplayStage { &bank_forks, ); - Self::mark_slots_confirmed(&confirmed_forks, &blockstore, &bank_forks, &mut progress, &mut duplicate_slots_tracker, &mut heaviest_subtree_fork_choice, &mut epoch_slots_frozen_slots, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender); + Self::mark_slots_confirmed(&confirmed_forks, &blockstore, &bank_forks, &mut progress, &mut duplicate_slots_tracker, &mut heaviest_subtree_fork_choice, &mut epoch_slots_frozen_slots, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, &mut purge_repair_slot_counter); } compute_slot_stats_time.stop(); @@ -867,7 +873,7 @@ impl ReplayStage { // // Has to be before `maybe_start_leader()`. Otherwise, `ancestors` and `descendants` // will be outdated, and we cannot assume `poh_bank` will be in either of these maps. - Self::dump_then_repair_correct_slots(&mut duplicate_slots_to_repair, &mut ancestors, &mut descendants, &mut progress, &bank_forks, &blockstore, poh_bank.map(|bank| bank.slot())); + Self::dump_then_repair_correct_slots(&mut duplicate_slots_to_repair, &mut ancestors, &mut descendants, &mut progress, &bank_forks, &blockstore, poh_bank.map(|bank| bank.slot()), &mut purge_repair_slot_counter); dump_then_repair_correct_slots_time.stop(); let mut retransmit_not_propagated_time = Measure::start("retransmit_not_propagated_time"); @@ -1113,6 +1119,7 @@ impl ReplayStage { bank_forks: &RwLock, blockstore: &Blockstore, poh_bank_slot: Option, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, ) { if duplicate_slots_to_repair.is_empty() { return; @@ -1181,9 +1188,16 @@ impl ReplayStage { bank_forks, blockstore, ); + let attempt_no = purge_repair_slot_counter + .entry(*duplicate_slot) + .and_modify(|x| *x += 1) + .or_insert(1); + if *attempt_no > MAX_REPAIR_RETRY_LOOP_ATTEMPTS { + panic!("We have tried to repair duplicate slot: {} more than {} times and are unable to freeze a block with bankhash {}, instead we have a block with bankhash {:?}. This is most likely a bug in the runtime. At this point manual intervention is needed to make progress. Exiting", *duplicate_slot, MAX_REPAIR_RETRY_LOOP_ATTEMPTS, *correct_hash, frozen_hash); + } warn!( - "Notifying repair service to repair duplicate slot: {}", - *duplicate_slot, + "Notifying repair service to repair duplicate slot: {}, attempt {}", + *duplicate_slot, *attempt_no, ); true // TODO: Send signal to repair to repair the correct version of @@ -1216,6 +1230,7 @@ impl ReplayStage { bank_forks: &RwLock, duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, ) { let root = bank_forks.read().unwrap().root(); for maybe_purgeable_duplicate_slots in epoch_slots_frozen_receiver.try_iter() { @@ -1249,6 +1264,7 @@ impl ReplayStage { fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, + purge_repair_slot_counter, SlotStateUpdate::EpochSlotsFrozen(epoch_slots_frozen_state), ); } @@ -1392,6 +1408,7 @@ impl ReplayStage { fork_choice: &mut HeaviestSubtreeForkChoice, duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, ) { let root = bank_forks.read().unwrap().root(); for new_confirmed_slots in gossip_duplicate_confirmed_slots_receiver.try_iter() { @@ -1420,6 +1437,7 @@ impl ReplayStage { fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, + purge_repair_slot_counter, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); } @@ -1458,6 +1476,7 @@ impl ReplayStage { fork_choice: &mut HeaviestSubtreeForkChoice, duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, ) { let new_duplicate_slots: Vec = duplicate_slots_receiver.try_iter().collect(); let (root_slot, bank_hashes) = { @@ -1489,6 +1508,7 @@ impl ReplayStage { fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, + purge_repair_slot_counter, SlotStateUpdate::Duplicate(duplicate_state), ); } @@ -1760,6 +1780,7 @@ impl ReplayStage { heaviest_subtree_fork_choice: &mut HeaviestSubtreeForkChoice, duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, ) { // Do not remove from progress map when marking dead! Needed by // `process_gossip_duplicate_confirmed_slots()` @@ -1813,6 +1834,7 @@ impl ReplayStage { heaviest_subtree_fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, + purge_repair_slot_counter, SlotStateUpdate::Dead(dead_state), ); } @@ -2426,6 +2448,7 @@ impl ReplayStage { block_metadata_notifier: Option, replay_result_vec: &[ReplaySlotFromBlockstore], prioritization_fee_cache: &PrioritizationFeeCache, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, ) -> bool { // TODO: See if processing of blockstore replay results and bank completion can be made thread safe. let mut did_complete_bank = false; @@ -2456,6 +2479,7 @@ impl ReplayStage { heaviest_subtree_fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, + purge_repair_slot_counter, ); // If the bank was corrupted, don't try to run the below logic to check if the // bank is completed @@ -2527,6 +2551,7 @@ impl ReplayStage { heaviest_subtree_fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, + purge_repair_slot_counter, SlotStateUpdate::BankFrozen(bank_frozen_state), ); if let Some(sender) = bank_notification_sender { @@ -2621,6 +2646,7 @@ impl ReplayStage { log_messages_bytes_limit: Option, replay_slots_concurrently: bool, prioritization_fee_cache: &PrioritizationFeeCache, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, ) -> bool /* completed a bank */ { let active_bank_slots = bank_forks.read().unwrap().active_bank_slots(); let num_active_banks = active_bank_slots.len(); @@ -2689,6 +2715,7 @@ impl ReplayStage { block_metadata_notifier, &replay_result_vec, prioritization_fee_cache, + purge_repair_slot_counter, ) } else { false @@ -3228,6 +3255,7 @@ impl ReplayStage { did_newly_reach_threshold } + #[allow(clippy::too_many_arguments)] fn mark_slots_confirmed( confirmed_forks: &[(Slot, Hash)], blockstore: &Blockstore, @@ -3238,6 +3266,7 @@ impl ReplayStage { epoch_slots_frozen_slots: &mut EpochSlotsFrozenSlots, duplicate_slots_to_repair: &mut DuplicateSlotsToRepair, ancestor_hashes_replay_update_sender: &AncestorHashesReplayUpdateSender, + purge_repair_slot_counter: &mut PurgeRepairSlotCounter, ) { let root_slot = bank_forks.read().unwrap().root(); for (slot, frozen_hash) in confirmed_forks.iter() { @@ -3265,6 +3294,7 @@ impl ReplayStage { fork_choice, duplicate_slots_to_repair, ancestor_hashes_replay_update_sender, + purge_repair_slot_counter, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); } @@ -4253,6 +4283,7 @@ pub(crate) mod tests { &mut heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, + &mut PurgeRepairSlotCounter::default(), ); } @@ -5687,6 +5718,7 @@ pub(crate) mod tests { // Mark 5 as duplicate blockstore.store_duplicate_slot(5, vec![], vec![]).unwrap(); let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); let bank5_hash = bank_forks.read().unwrap().bank_hash(5).unwrap(); @@ -5709,6 +5741,7 @@ pub(crate) mod tests { &mut vote_simulator.heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::Duplicate(duplicate_state), ); @@ -5741,6 +5774,7 @@ pub(crate) mod tests { &mut vote_simulator.heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut purge_repair_slot_counter, SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); // The confirmed hash is detected in `progress`, which means @@ -5841,6 +5875,7 @@ pub(crate) mod tests { &mut vote_simulator.heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, + &mut PurgeRepairSlotCounter::default(), SlotStateUpdate::Duplicate(duplicate_state), ); @@ -5874,6 +5909,7 @@ pub(crate) mod tests { &mut vote_simulator.heaviest_subtree_fork_choice, &mut DuplicateSlotsToRepair::default(), &ancestor_hashes_replay_update_sender, + &mut PurgeRepairSlotCounter::default(), SlotStateUpdate::Duplicate(duplicate_state), ); @@ -5908,6 +5944,7 @@ pub(crate) mod tests { &mut vote_simulator.heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut PurgeRepairSlotCounter::default(), SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); // The confirmed hash is detected in `progress`, which means @@ -5955,6 +5992,7 @@ pub(crate) mod tests { let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); duplicate_slots_to_repair.insert(1, Hash::new_unique()); duplicate_slots_to_repair.insert(2, Hash::new_unique()); + let mut purge_repair_slot_counter = PurgeRepairSlotCounter::default(); ReplayStage::dump_then_repair_correct_slots( &mut duplicate_slots_to_repair, @@ -5964,6 +6002,7 @@ pub(crate) mod tests { bank_forks, blockstore, None, + &mut purge_repair_slot_counter, ); let r_bank_forks = bank_forks.read().unwrap(); @@ -5981,6 +6020,9 @@ pub(crate) mod tests { assert!(descendants_result.is_none()); } } + assert_eq!(2, purge_repair_slot_counter.len()); + assert_eq!(1, *purge_repair_slot_counter.get(&1).unwrap()); + assert_eq!(1, *purge_repair_slot_counter.get(&2).unwrap()); } fn setup_vote_then_rollback( @@ -6057,6 +6099,7 @@ pub(crate) mod tests { heaviest_subtree_fork_choice, &mut duplicate_slots_to_repair, &ancestor_hashes_replay_update_sender, + &mut PurgeRepairSlotCounter::default(), SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), ); assert_eq!( @@ -6075,6 +6118,7 @@ pub(crate) mod tests { bank_forks, blockstore, None, + &mut PurgeRepairSlotCounter::default(), ); // Check everything was purged properly