From c2bb2b8e60ccbf176fc753963a98cc2cf95f53f7 Mon Sep 17 00:00:00 2001 From: Justin Starry Date: Mon, 3 Oct 2022 16:49:47 +0800 Subject: [PATCH] Allow validators to reset to the slot which matches their last voted slot (#28172) * Add failing test * Allow resetting to duplicate was confirmed * feedback * feedback * bump * simplify change * Revert "simplify change" This reverts commit 72e5de3e5bdac595f71dc7fc01650ca3bc7da98e. * update comment * Update core/src/replay_stage.rs --- core/src/heaviest_subtree_fork_choice.rs | 60 +++++---- core/src/replay_stage.rs | 151 ++++++++++++++++++++++- 2 files changed, 174 insertions(+), 37 deletions(-) diff --git a/core/src/heaviest_subtree_fork_choice.rs b/core/src/heaviest_subtree_fork_choice.rs index 96167eebe7..f9c97f4adb 100644 --- a/core/src/heaviest_subtree_fork_choice.rs +++ b/core/src/heaviest_subtree_fork_choice.rs @@ -858,37 +858,32 @@ impl HeaviestSubtreeForkChoice { tower .last_voted_slot_hash() .and_then(|last_voted_slot_hash| { - let heaviest_slot_hash_on_same_voted_fork = self.best_slot(&last_voted_slot_hash); - if heaviest_slot_hash_on_same_voted_fork.is_none() { - if !tower.is_stray_last_vote() { - // Unless last vote is stray and stale, self.bast_slot(last_voted_slot) must return - // Some(_), justifying to panic! here. - // Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None, - // if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be - // touched in that case as well. - // In other words, except being stray, all other slots have been voted on while this - // validator has been running, so we must be able to fetch best_slots for all of - // them. - panic!( - "a bank at last_voted_slot({:?}) is a frozen bank so must have been \ - added to heaviest_subtree_fork_choice at time of freezing", - last_voted_slot_hash, - ) - } else { - // fork_infos doesn't have corresponding data for the stale stray last vote, - // meaning some inconsistency between saved tower and ledger. - // (newer snapshot, or only a saved tower is moved over to new setup?) - return None; + match self.is_candidate(&last_voted_slot_hash) { + Some(true) => self.best_slot(&last_voted_slot_hash), + Some(false) => None, + None => { + if !tower.is_stray_last_vote() { + // Unless last vote is stray and stale, self.is_candidate(last_voted_slot_hash) must return + // Some(_), justifying to panic! here. + // Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None, + // if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be + // touched in that case as well. + // In other words, except being stray, all other slots have been voted on while this + // validator has been running, so we must be able to fetch best_slots for all of + // them. + panic!( + "a bank at last_voted_slot({:?}) is a frozen bank so must have been \ + added to heaviest_subtree_fork_choice at time of freezing", + last_voted_slot_hash, + ) + } else { + // fork_infos doesn't have corresponding data for the stale stray last vote, + // meaning some inconsistency between saved tower and ledger. + // (newer snapshot, or only a saved tower is moved over to new setup?) + None + } } } - let heaviest_slot_hash_on_same_voted_fork = - heaviest_slot_hash_on_same_voted_fork.unwrap(); - - if heaviest_slot_hash_on_same_voted_fork == last_voted_slot_hash { - None - } else { - Some(heaviest_slot_hash_on_same_voted_fork) - } }) } @@ -2957,9 +2952,10 @@ mod test { // After marking the last vote in the tower as invalid, `heaviest_slot_on_same_voted_fork()` // should disregard all descendants of that invalid vote - assert!(heaviest_subtree_fork_choice - .heaviest_slot_on_same_voted_fork(&tower) - .is_none()); + assert_eq!( + heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower), + None + ); // Adding another descendant to the invalid candidate won't // update the best slot, even if it contains votes diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index b9359d62b5..cf4d5451c0 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -2901,9 +2901,18 @@ impl ReplayStage { ); } - // Given a heaviest bank, `heaviest_bank` and the next votable bank - // `heaviest_bank_on_same_voted_fork` as the validator's last vote, return - // a bank to vote on, a bank to reset to, + /// Given a `heaviest_bank` and a `heaviest_bank_on_same_voted_fork`, return + /// a bank to vote on, a bank to reset to, and a list of switch failure + /// reasons. + /// + /// If `heaviest_bank_on_same_voted_fork` is `None` due to that fork no + /// longer being valid to vote on, it's possible that a validator will not + /// be able to reset away from the invalid fork that they last voted on. To + /// resolve this scenario, validators need to wait until they can create a + /// switch proof for another fork or until the invalid fork is be marked + /// valid again if it was confirmed by the cluster. + /// Until this is resolved, leaders will build each of their + /// blocks from the last reset bank on the invalid fork. pub fn select_vote_and_reset_forks( heaviest_bank: &Arc, // Should only be None if there was no previous vote @@ -5605,6 +5614,139 @@ pub(crate) mod tests { ); } + #[test] + fn test_unconfirmed_duplicate_slots_and_lockouts_for_non_heaviest_fork() { + /* + Build fork structure: + + slot 0 + | + slot 1 + / \ + slot 2 | + | | + slot 3 | + | | + slot 4 | + slot 5 + */ + let forks = tr(0) / (tr(1) / (tr(2) / (tr(3) / (tr(4)))) / tr(5)); + + let mut vote_simulator = VoteSimulator::new(1); + vote_simulator.fill_bank_forks(forks, &HashMap::>::new(), true); + let (bank_forks, mut progress) = (vote_simulator.bank_forks, vote_simulator.progress); + let ledger_path = get_tmp_ledger_path!(); + let blockstore = Arc::new( + Blockstore::open(&ledger_path).expect("Expected to be able to open database ledger"), + ); + let mut tower = Tower::new_for_tests(8, 2.0 / 3.0); + + // All forks have same weight so heaviest bank to vote/reset on should be the tip of + // the fork with the lower slot + let (vote_fork, reset_fork) = run_compute_and_select_forks( + &bank_forks, + &mut progress, + &mut tower, + &mut vote_simulator.heaviest_subtree_fork_choice, + &mut vote_simulator.latest_validator_votes_for_frozen_banks, + ); + assert_eq!(vote_fork.unwrap(), 4); + assert_eq!(reset_fork.unwrap(), 4); + + // Record the vote for 5 which is not on the heaviest fork. + tower.record_bank_vote( + &bank_forks.read().unwrap().get(5).unwrap(), + &Pubkey::default(), + ); + + // 4 should be the heaviest slot, but should not be votable + // because of lockout. 5 is the heaviest slot on the same fork as the last vote. + let (vote_fork, reset_fork) = run_compute_and_select_forks( + &bank_forks, + &mut progress, + &mut tower, + &mut vote_simulator.heaviest_subtree_fork_choice, + &mut vote_simulator.latest_validator_votes_for_frozen_banks, + ); + assert!(vote_fork.is_none()); + assert_eq!(reset_fork, Some(5)); + + // Mark 5 as duplicate + blockstore.store_duplicate_slot(5, vec![], vec![]).unwrap(); + let mut duplicate_slots_tracker = DuplicateSlotsTracker::default(); + let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default(); + let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default(); + let bank5_hash = bank_forks.read().unwrap().bank_hash(5).unwrap(); + assert_ne!(bank5_hash, Hash::default()); + let duplicate_state = DuplicateState::new_from_state( + 5, + &gossip_duplicate_confirmed_slots, + &mut vote_simulator.heaviest_subtree_fork_choice, + || progress.is_dead(5).unwrap_or(false), + || Some(bank5_hash), + ); + let (ancestor_hashes_replay_update_sender, _ancestor_hashes_replay_update_receiver) = + unbounded(); + check_slot_agrees_with_cluster( + 5, + bank_forks.read().unwrap().root(), + &blockstore, + &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, + &mut vote_simulator.heaviest_subtree_fork_choice, + &mut DuplicateSlotsToRepair::default(), + &ancestor_hashes_replay_update_sender, + SlotStateUpdate::Duplicate(duplicate_state), + ); + + // 4 should be the heaviest slot, but should not be votable + // because of lockout. 5 is no longer valid due to it being a duplicate. + let (vote_fork, reset_fork) = run_compute_and_select_forks( + &bank_forks, + &mut progress, + &mut tower, + &mut vote_simulator.heaviest_subtree_fork_choice, + &mut vote_simulator.latest_validator_votes_for_frozen_banks, + ); + assert!(vote_fork.is_none()); + assert!(reset_fork.is_none()); + + // If slot 5 is marked as confirmed, it becomes the heaviest bank on same slot again + let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default(); + gossip_duplicate_confirmed_slots.insert(5, bank5_hash); + let duplicate_confirmed_state = DuplicateConfirmedState::new_from_state( + bank5_hash, + || progress.is_dead(5).unwrap_or(false), + || Some(bank5_hash), + ); + check_slot_agrees_with_cluster( + 5, + bank_forks.read().unwrap().root(), + &blockstore, + &mut duplicate_slots_tracker, + &mut epoch_slots_frozen_slots, + &mut vote_simulator.heaviest_subtree_fork_choice, + &mut duplicate_slots_to_repair, + &ancestor_hashes_replay_update_sender, + SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state), + ); + // The confirmed hash is detected in `progress`, which means + // it's confirmation on the replayed block. This means we have + // the right version of the block, so `duplicate_slots_to_repair` + // should be empty + assert!(duplicate_slots_to_repair.is_empty()); + let (vote_fork, reset_fork) = run_compute_and_select_forks( + &bank_forks, + &mut progress, + &mut tower, + &mut vote_simulator.heaviest_subtree_fork_choice, + &mut vote_simulator.latest_validator_votes_for_frozen_banks, + ); + // Should now pick 5 as the heaviest fork from last vote again. + assert!(vote_fork.is_none()); + assert_eq!(reset_fork.unwrap(), 5); + } + #[test] fn test_unconfirmed_duplicate_slots_and_lockouts() { /* @@ -5697,7 +5839,7 @@ pub(crate) mod tests { &mut vote_simulator.latest_validator_votes_for_frozen_banks, ); assert!(vote_fork.is_none()); - assert_eq!(reset_fork.unwrap(), 3); + assert_eq!(reset_fork, Some(3)); // Now mark 2, an ancestor of 4, as duplicate blockstore.store_duplicate_slot(2, vec![], vec![]).unwrap(); @@ -6778,7 +6920,6 @@ pub(crate) mod tests { ); let (heaviest_bank, heaviest_bank_on_same_fork) = heaviest_subtree_fork_choice .select_forks(&frozen_banks, tower, progress, ancestors, bank_forks); - assert!(heaviest_bank_on_same_fork.is_none()); let SelectVoteAndResetForkResult { vote_bank, reset_bank,