Allow validators to reset to the slot which matches their last voted slot (#28172)

* Add failing test

* Allow resetting to duplicate was confirmed

* feedback

* feedback

* bump

* simplify change

* Revert "simplify change"

This reverts commit 72e5de3e5bdac595f71dc7fc01650ca3bc7da98e.

* update comment

* Update core/src/replay_stage.rs
This commit is contained in:
Justin Starry 2022-10-03 16:49:47 +08:00 committed by GitHub
parent 47b3ca5811
commit c2bb2b8e60
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 174 additions and 37 deletions

View File

@ -858,37 +858,32 @@ impl HeaviestSubtreeForkChoice {
tower
.last_voted_slot_hash()
.and_then(|last_voted_slot_hash| {
let heaviest_slot_hash_on_same_voted_fork = self.best_slot(&last_voted_slot_hash);
if heaviest_slot_hash_on_same_voted_fork.is_none() {
if !tower.is_stray_last_vote() {
// Unless last vote is stray and stale, self.bast_slot(last_voted_slot) must return
// Some(_), justifying to panic! here.
// Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None,
// if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be
// touched in that case as well.
// In other words, except being stray, all other slots have been voted on while this
// validator has been running, so we must be able to fetch best_slots for all of
// them.
panic!(
"a bank at last_voted_slot({:?}) is a frozen bank so must have been \
added to heaviest_subtree_fork_choice at time of freezing",
last_voted_slot_hash,
)
} else {
// fork_infos doesn't have corresponding data for the stale stray last vote,
// meaning some inconsistency between saved tower and ledger.
// (newer snapshot, or only a saved tower is moved over to new setup?)
return None;
match self.is_candidate(&last_voted_slot_hash) {
Some(true) => self.best_slot(&last_voted_slot_hash),
Some(false) => None,
None => {
if !tower.is_stray_last_vote() {
// Unless last vote is stray and stale, self.is_candidate(last_voted_slot_hash) must return
// Some(_), justifying to panic! here.
// Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None,
// if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be
// touched in that case as well.
// In other words, except being stray, all other slots have been voted on while this
// validator has been running, so we must be able to fetch best_slots for all of
// them.
panic!(
"a bank at last_voted_slot({:?}) is a frozen bank so must have been \
added to heaviest_subtree_fork_choice at time of freezing",
last_voted_slot_hash,
)
} else {
// fork_infos doesn't have corresponding data for the stale stray last vote,
// meaning some inconsistency between saved tower and ledger.
// (newer snapshot, or only a saved tower is moved over to new setup?)
None
}
}
}
let heaviest_slot_hash_on_same_voted_fork =
heaviest_slot_hash_on_same_voted_fork.unwrap();
if heaviest_slot_hash_on_same_voted_fork == last_voted_slot_hash {
None
} else {
Some(heaviest_slot_hash_on_same_voted_fork)
}
})
}
@ -2957,9 +2952,10 @@ mod test {
// After marking the last vote in the tower as invalid, `heaviest_slot_on_same_voted_fork()`
// should disregard all descendants of that invalid vote
assert!(heaviest_subtree_fork_choice
.heaviest_slot_on_same_voted_fork(&tower)
.is_none());
assert_eq!(
heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower),
None
);
// Adding another descendant to the invalid candidate won't
// update the best slot, even if it contains votes

View File

@ -2901,9 +2901,18 @@ impl ReplayStage {
);
}
// Given a heaviest bank, `heaviest_bank` and the next votable bank
// `heaviest_bank_on_same_voted_fork` as the validator's last vote, return
// a bank to vote on, a bank to reset to,
/// Given a `heaviest_bank` and a `heaviest_bank_on_same_voted_fork`, return
/// a bank to vote on, a bank to reset to, and a list of switch failure
/// reasons.
///
/// If `heaviest_bank_on_same_voted_fork` is `None` due to that fork no
/// longer being valid to vote on, it's possible that a validator will not
/// be able to reset away from the invalid fork that they last voted on. To
/// resolve this scenario, validators need to wait until they can create a
/// switch proof for another fork or until the invalid fork is be marked
/// valid again if it was confirmed by the cluster.
/// Until this is resolved, leaders will build each of their
/// blocks from the last reset bank on the invalid fork.
pub fn select_vote_and_reset_forks(
heaviest_bank: &Arc<Bank>,
// Should only be None if there was no previous vote
@ -5605,6 +5614,139 @@ pub(crate) mod tests {
);
}
#[test]
fn test_unconfirmed_duplicate_slots_and_lockouts_for_non_heaviest_fork() {
/*
Build fork structure:
slot 0
|
slot 1
/ \
slot 2 |
| |
slot 3 |
| |
slot 4 |
slot 5
*/
let forks = tr(0) / (tr(1) / (tr(2) / (tr(3) / (tr(4)))) / tr(5));
let mut vote_simulator = VoteSimulator::new(1);
vote_simulator.fill_bank_forks(forks, &HashMap::<Pubkey, Vec<u64>>::new(), true);
let (bank_forks, mut progress) = (vote_simulator.bank_forks, vote_simulator.progress);
let ledger_path = get_tmp_ledger_path!();
let blockstore = Arc::new(
Blockstore::open(&ledger_path).expect("Expected to be able to open database ledger"),
);
let mut tower = Tower::new_for_tests(8, 2.0 / 3.0);
// All forks have same weight so heaviest bank to vote/reset on should be the tip of
// the fork with the lower slot
let (vote_fork, reset_fork) = run_compute_and_select_forks(
&bank_forks,
&mut progress,
&mut tower,
&mut vote_simulator.heaviest_subtree_fork_choice,
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
);
assert_eq!(vote_fork.unwrap(), 4);
assert_eq!(reset_fork.unwrap(), 4);
// Record the vote for 5 which is not on the heaviest fork.
tower.record_bank_vote(
&bank_forks.read().unwrap().get(5).unwrap(),
&Pubkey::default(),
);
// 4 should be the heaviest slot, but should not be votable
// because of lockout. 5 is the heaviest slot on the same fork as the last vote.
let (vote_fork, reset_fork) = run_compute_and_select_forks(
&bank_forks,
&mut progress,
&mut tower,
&mut vote_simulator.heaviest_subtree_fork_choice,
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
);
assert!(vote_fork.is_none());
assert_eq!(reset_fork, Some(5));
// Mark 5 as duplicate
blockstore.store_duplicate_slot(5, vec![], vec![]).unwrap();
let mut duplicate_slots_tracker = DuplicateSlotsTracker::default();
let mut gossip_duplicate_confirmed_slots = GossipDuplicateConfirmedSlots::default();
let mut epoch_slots_frozen_slots = EpochSlotsFrozenSlots::default();
let bank5_hash = bank_forks.read().unwrap().bank_hash(5).unwrap();
assert_ne!(bank5_hash, Hash::default());
let duplicate_state = DuplicateState::new_from_state(
5,
&gossip_duplicate_confirmed_slots,
&mut vote_simulator.heaviest_subtree_fork_choice,
|| progress.is_dead(5).unwrap_or(false),
|| Some(bank5_hash),
);
let (ancestor_hashes_replay_update_sender, _ancestor_hashes_replay_update_receiver) =
unbounded();
check_slot_agrees_with_cluster(
5,
bank_forks.read().unwrap().root(),
&blockstore,
&mut duplicate_slots_tracker,
&mut epoch_slots_frozen_slots,
&mut vote_simulator.heaviest_subtree_fork_choice,
&mut DuplicateSlotsToRepair::default(),
&ancestor_hashes_replay_update_sender,
SlotStateUpdate::Duplicate(duplicate_state),
);
// 4 should be the heaviest slot, but should not be votable
// because of lockout. 5 is no longer valid due to it being a duplicate.
let (vote_fork, reset_fork) = run_compute_and_select_forks(
&bank_forks,
&mut progress,
&mut tower,
&mut vote_simulator.heaviest_subtree_fork_choice,
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
);
assert!(vote_fork.is_none());
assert!(reset_fork.is_none());
// If slot 5 is marked as confirmed, it becomes the heaviest bank on same slot again
let mut duplicate_slots_to_repair = DuplicateSlotsToRepair::default();
gossip_duplicate_confirmed_slots.insert(5, bank5_hash);
let duplicate_confirmed_state = DuplicateConfirmedState::new_from_state(
bank5_hash,
|| progress.is_dead(5).unwrap_or(false),
|| Some(bank5_hash),
);
check_slot_agrees_with_cluster(
5,
bank_forks.read().unwrap().root(),
&blockstore,
&mut duplicate_slots_tracker,
&mut epoch_slots_frozen_slots,
&mut vote_simulator.heaviest_subtree_fork_choice,
&mut duplicate_slots_to_repair,
&ancestor_hashes_replay_update_sender,
SlotStateUpdate::DuplicateConfirmed(duplicate_confirmed_state),
);
// The confirmed hash is detected in `progress`, which means
// it's confirmation on the replayed block. This means we have
// the right version of the block, so `duplicate_slots_to_repair`
// should be empty
assert!(duplicate_slots_to_repair.is_empty());
let (vote_fork, reset_fork) = run_compute_and_select_forks(
&bank_forks,
&mut progress,
&mut tower,
&mut vote_simulator.heaviest_subtree_fork_choice,
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
);
// Should now pick 5 as the heaviest fork from last vote again.
assert!(vote_fork.is_none());
assert_eq!(reset_fork.unwrap(), 5);
}
#[test]
fn test_unconfirmed_duplicate_slots_and_lockouts() {
/*
@ -5697,7 +5839,7 @@ pub(crate) mod tests {
&mut vote_simulator.latest_validator_votes_for_frozen_banks,
);
assert!(vote_fork.is_none());
assert_eq!(reset_fork.unwrap(), 3);
assert_eq!(reset_fork, Some(3));
// Now mark 2, an ancestor of 4, as duplicate
blockstore.store_duplicate_slot(2, vec![], vec![]).unwrap();
@ -6778,7 +6920,6 @@ pub(crate) mod tests {
);
let (heaviest_bank, heaviest_bank_on_same_fork) = heaviest_subtree_fork_choice
.select_forks(&frozen_banks, tower, progress, ancestors, bank_forks);
assert!(heaviest_bank_on_same_fork.is_none());
let SelectVoteAndResetForkResult {
vote_bank,
reset_bank,