Vote refresh fix when outside slothash (#29948)

* When there are too many pubkeys in one slot, kick the one with lowest
stake out.

* Cache last_root to reduce read locks we need.

* Use slots_in_epoch to limit number of slots in the map.

* Fix lint errors.

* Only cache stake and slots per epoch once per epoch.

* Revert "Only cache stake and slots per epoch once per epoch."

This reverts commit 8658aad0083456794b4c4403adaf9c74d1a71d09.

* Vote at the tip of current fork if last vote is outside SlotHash
of the tip and last vote expired.

* Add unittest when last vote is outside slothash, we should vote at the tip
of the current fork.

* Revert "Use slots_in_epoch to limit number of slots in the map."

This reverts commit 93574f57a48d2a70fbbc0f62fa8810d3b6bee0af.

* Revert "Cache last_root to reduce read locks we need."

This reverts commit bb114ec2b62cb9c0207328b19c415f6116be0f1c.

* Revert "When there are too many pubkeys in one slot, kick the one with lowest"

This reverts commit 711e29a6a025fd4f11fbc97dcbbe90e4832be04c.

* Move new vote generation when last vote is outside slothash into the
main path, this actually makes more sense since we don't select where
to vote in two different places, and all the vote generation logic
is seamlessly inherited.

* - Move vote refresh to be behind select vote and do not refresh vote if a new
  vote is selected.
- Check whether last vote is inside slothash inside select_vote_and_reset_forks
- rename slot_within_slothash to is_in_slothashes_history
- remove one unittest for now, more tests will be added in a separate CL

* Remove new test, it will be in another file.

* Add is_in_slot_hashes_history test in the new file.

* Add unittest for the case when last vote is outside slot hashes.

* Small improvements and more unittests.

* Fix bad merge.

* Update docs/src/terminology.md

Co-authored-by: mvines <mvines@gmail.com>

* Put SwitchForkDecision::FailedSwitchThreshold logic into separate function.

* Make linter happy.

---------

Co-authored-by: mvines <mvines@gmail.com>
This commit is contained in:
Wen 2023-06-26 18:21:24 -07:00 committed by GitHub
parent 1e12a18e01
commit 6f72258e3e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 425 additions and 58 deletions

View File

@ -806,28 +806,6 @@ impl ReplayStage {
&bank_forks,
);
if let Some(heaviest_bank_on_same_voted_fork) =
heaviest_bank_on_same_voted_fork.as_ref()
{
if let Some(my_latest_landed_vote) =
progress.my_latest_landed_vote(heaviest_bank_on_same_voted_fork.slot())
{
Self::refresh_last_vote(
&mut tower,
heaviest_bank_on_same_voted_fork,
my_latest_landed_vote,
&vote_account,
&identity_keypair,
&authorized_voter_keypairs.read().unwrap(),
&mut voted_signatures,
has_new_vote_been_rooted,
&mut last_vote_refresh_time,
&voting_sender,
wait_to_vote_slot,
);
}
}
let mut select_vote_and_reset_forks_time =
Measure::start("select_vote_and_reset_forks");
let SelectVoteAndResetForkResult {
@ -846,6 +824,30 @@ impl ReplayStage {
);
select_vote_and_reset_forks_time.stop();
if vote_bank.is_none() {
if let Some(heaviest_bank_on_same_voted_fork) =
heaviest_bank_on_same_voted_fork.as_ref()
{
if let Some(my_latest_landed_vote) =
progress.my_latest_landed_vote(heaviest_bank_on_same_voted_fork.slot())
{
Self::refresh_last_vote(
&mut tower,
heaviest_bank_on_same_voted_fork,
my_latest_landed_vote,
&vote_account,
&identity_keypair,
&authorized_voter_keypairs.read().unwrap(),
&mut voted_signatures,
has_new_vote_been_rooted,
&mut last_vote_refresh_time,
&voting_sender,
wait_to_vote_slot,
);
}
}
}
let mut heaviest_fork_failures_time = Measure::start("heaviest_fork_failures_time");
if tower.is_recent(heaviest_bank.slot()) && !heaviest_fork_failures.is_empty() {
info!(
@ -3199,6 +3201,97 @@ impl ReplayStage {
);
}
fn select_forks_failed_switch_threshold(
reset_bank: Option<&Arc<Bank>>,
progress: &ProgressMap,
tower: &mut Tower,
heaviest_bank_slot: Slot,
failure_reasons: &mut Vec<HeaviestForkFailures>,
switch_proof_stake: u64,
total_stake: u64,
switch_fork_decision: SwitchForkDecision,
) -> SwitchForkDecision {
let last_vote_unable_to_land = match reset_bank {
Some(heaviest_bank_on_same_voted_fork) => {
match tower.last_voted_slot() {
Some(last_voted_slot) => {
match progress
.my_latest_landed_vote(heaviest_bank_on_same_voted_fork.slot())
{
Some(my_latest_landed_vote) =>
// Last vote did not land
{
my_latest_landed_vote < last_voted_slot
// If we are already voting at the tip, there is nothing we can do.
&& last_voted_slot < heaviest_bank_on_same_voted_fork.slot()
// Last vote outside slot hashes of the tip of fork
&& !heaviest_bank_on_same_voted_fork
.is_in_slot_hashes_history(&last_voted_slot)
}
None => false,
}
}
None => false,
}
}
None => false,
};
if last_vote_unable_to_land {
// If we reach here, these assumptions are true:
// 1. We can't switch because of threshold
// 2. Our last vote was on a non-duplicate/confirmed slot
// 3. Our last vote is now outside slot hashes history of the tip of fork
// So, there was no hope of this last vote ever landing again.
// In this case, we do want to obey threshold, yet try to register our vote on
// the current fork, so we choose to vote at the tip of current fork instead.
// This will not cause longer lockout because lockout doesn't double after 512
// slots, it might be enough to get majority vote.
SwitchForkDecision::SameFork
} else {
// If we can't switch and our last vote was on a non-duplicate/confirmed slot, then
// reset to the the next votable bank on the same fork as our last vote,
// but don't vote.
// We don't just reset to the heaviest fork when switch threshold fails because
// a situation like this can occur:
/* Figure 1:
slot 0
|
slot 1
/ \
slot 2 (last vote) |
| slot 8 (10%)
slot 4 (9%)
*/
// Imagine 90% of validators voted on slot 4, but only 9% landed. If everybody that fails
// the switch threshold abandons slot 4 to build on slot 8 (because it's *currently* heavier),
// then there will be no blocks to include the votes for slot 4, and the network halts
// because 90% of validators can't vote
info!(
"Waiting to switch vote to {},
resetting to slot {:?} for now,
switch proof stake: {},
threshold stake: {},
total stake: {}",
heaviest_bank_slot,
reset_bank.as_ref().map(|b| b.slot()),
switch_proof_stake,
total_stake as f64 * SWITCH_FORK_THRESHOLD,
total_stake
);
failure_reasons.push(HeaviestForkFailures::FailedSwitchThreshold(
heaviest_bank_slot,
switch_proof_stake,
total_stake,
));
switch_fork_decision
}
}
/// Given a `heaviest_bank` and a `heaviest_bank_on_same_voted_fork`, return
/// a bank to vote on, a bank to reset to, and a list of switch failure
/// reasons.
@ -3253,45 +3346,17 @@ impl ReplayStage {
match switch_fork_decision {
SwitchForkDecision::FailedSwitchThreshold(switch_proof_stake, total_stake) => {
let reset_bank = heaviest_bank_on_same_voted_fork;
// If we can't switch and our last vote was on a non-duplicate/confirmed slot, then
// reset to the the next votable bank on the same fork as our last vote,
// but don't vote.
// We don't just reset to the heaviest fork when switch threshold fails because
// a situation like this can occur:
/* Figure 1:
slot 0
|
slot 1
/ \
slot 2 (last vote) |
| slot 8 (10%)
slot 4 (9%)
*/
// Imagine 90% of validators voted on slot 4, but only 9% landed. If everybody that fails
// the switch threshold abandons slot 4 to build on slot 8 (because it's *currently* heavier),
// then there will be no blocks to include the votes for slot 4, and the network halts
// because 90% of validators can't vote
info!(
"Waiting to switch vote to {},
resetting to slot {:?} for now,
switch proof stake: {},
threshold stake: {},
total stake: {}",
heaviest_bank.slot(),
reset_bank.as_ref().map(|b| b.slot()),
switch_proof_stake,
total_stake as f64 * SWITCH_FORK_THRESHOLD,
total_stake
);
failure_reasons.push(HeaviestForkFailures::FailedSwitchThreshold(
let final_switch_fork_decision = Self::select_forks_failed_switch_threshold(
reset_bank,
progress,
tower,
heaviest_bank.slot(),
&mut failure_reasons,
switch_proof_stake,
total_stake,
));
reset_bank.map(|b| (b, switch_fork_decision))
switch_fork_decision,
);
reset_bank.map(|b| (b, final_switch_fork_decision))
}
SwitchForkDecision::FailedSwitchDuplicateRollback(latest_duplicate_ancestor) => {
// If we can't switch and our last vote was on an unconfirmed, duplicate slot,
@ -7201,6 +7266,280 @@ pub(crate) mod tests {
assert_eq!(tower.last_voted_slot().unwrap(), 1);
}
#[allow(clippy::too_many_arguments)]
fn send_vote_in_new_bank(
parent_bank: &Arc<Bank>,
my_slot: Slot,
my_vote_keypair: &[Arc<Keypair>],
tower: &mut Tower,
identity_keypair: &Keypair,
voted_signatures: &mut Vec<Signature>,
has_new_vote_been_rooted: bool,
voting_sender: &Sender<VoteOp>,
voting_receiver: &Receiver<VoteOp>,
cluster_info: &ClusterInfo,
poh_recorder: &RwLock<PohRecorder>,
tower_storage: &dyn TowerStorage,
make_it_landing: bool,
cursor: &mut Cursor,
bank_forks: &RwLock<BankForks>,
progress: &mut ProgressMap,
) -> Arc<Bank> {
let my_vote_pubkey = &my_vote_keypair[0].pubkey();
tower.record_bank_vote(parent_bank, my_vote_pubkey);
ReplayStage::push_vote(
parent_bank,
my_vote_pubkey,
identity_keypair,
my_vote_keypair,
tower,
&SwitchForkDecision::SameFork,
voted_signatures,
has_new_vote_been_rooted,
&mut ReplayTiming::default(),
voting_sender,
None,
);
let vote_info = voting_receiver
.recv_timeout(Duration::from_secs(1))
.unwrap();
crate::voting_service::VotingService::handle_vote(
cluster_info,
poh_recorder,
tower_storage,
vote_info,
);
let votes = cluster_info.get_votes(cursor);
assert_eq!(votes.len(), 1);
let vote_tx = &votes[0];
assert_eq!(
vote_tx.message.recent_blockhash,
parent_bank.last_blockhash()
);
assert_eq!(tower.last_vote_tx_blockhash(), parent_bank.last_blockhash());
assert_eq!(tower.last_voted_slot().unwrap(), parent_bank.slot());
let bank = Bank::new_from_parent(parent_bank, &Pubkey::default(), my_slot);
bank.fill_bank_with_ticks_for_tests();
if make_it_landing {
bank.process_transaction(vote_tx).unwrap();
}
bank.freeze();
progress.entry(my_slot).or_insert_with(|| {
ForkProgress::new_from_bank(
&bank,
&identity_keypair.pubkey(),
my_vote_pubkey,
None,
0,
0,
)
});
bank_forks.write().unwrap().insert(bank);
bank_forks.read().unwrap().get(my_slot).unwrap()
}
#[test]
fn test_replay_stage_last_vote_outside_slot_hashes() {
solana_logger::setup();
let ReplayBlockstoreComponents {
cluster_info,
poh_recorder,
mut tower,
my_pubkey,
vote_simulator,
..
} = replay_blockstore_components(None, 10, None::<GenerateVotes>);
let tower_storage = crate::tower_storage::NullTowerStorage::default();
let VoteSimulator {
mut validator_keypairs,
bank_forks,
mut heaviest_subtree_fork_choice,
mut latest_validator_votes_for_frozen_banks,
mut progress,
..
} = vote_simulator;
let has_new_vote_been_rooted = false;
let mut voted_signatures = vec![];
let identity_keypair = cluster_info.keypair().clone();
let my_vote_keypair = vec![Arc::new(
validator_keypairs.remove(&my_pubkey).unwrap().vote_keypair,
)];
let my_vote_pubkey = my_vote_keypair[0].pubkey();
let bank0 = bank_forks.read().unwrap().get(0).unwrap();
bank0.set_initial_accounts_hash_verification_completed();
// Add a new fork starting from 0 with bigger slot number, we assume it has a bigger
// weight, but we cannot switch because of lockout.
let other_fork_slot = 1;
let other_fork_bank = Bank::new_from_parent(&bank0, &Pubkey::default(), other_fork_slot);
other_fork_bank.fill_bank_with_ticks_for_tests();
other_fork_bank.freeze();
progress.entry(other_fork_slot).or_insert_with(|| {
ForkProgress::new_from_bank(
&other_fork_bank,
&identity_keypair.pubkey(),
&my_vote_keypair[0].pubkey(),
None,
0,
0,
)
});
bank_forks.write().unwrap().insert(other_fork_bank);
let (voting_sender, voting_receiver) = unbounded();
let mut cursor = Cursor::default();
let mut new_bank = send_vote_in_new_bank(
&bank0,
2,
&my_vote_keypair,
&mut tower,
&identity_keypair,
&mut voted_signatures,
has_new_vote_been_rooted,
&voting_sender,
&voting_receiver,
&cluster_info,
&poh_recorder,
&tower_storage,
true,
&mut cursor,
&bank_forks,
&mut progress,
);
new_bank = send_vote_in_new_bank(
&new_bank,
new_bank.slot() + 1,
&my_vote_keypair,
&mut tower,
&identity_keypair,
&mut voted_signatures,
has_new_vote_been_rooted,
&voting_sender,
&voting_receiver,
&cluster_info,
&poh_recorder,
&tower_storage,
false,
&mut cursor,
&bank_forks,
&mut progress,
);
// Create enough banks on the fork so last vote is outside SlotHash, make sure
// we now vote at the tip of the fork.
let last_voted_slot = tower.last_voted_slot().unwrap();
while new_bank.is_in_slot_hashes_history(&last_voted_slot) {
let new_slot = new_bank.slot() + 1;
let bank = Bank::new_from_parent(&new_bank, &Pubkey::default(), new_slot);
bank.fill_bank_with_ticks_for_tests();
bank.freeze();
progress.entry(new_slot).or_insert_with(|| {
ForkProgress::new_from_bank(
&bank,
&identity_keypair.pubkey(),
&my_vote_keypair[0].pubkey(),
None,
0,
0,
)
});
bank_forks.write().unwrap().insert(bank);
new_bank = bank_forks.read().unwrap().get(new_slot).unwrap();
}
let tip_of_voted_fork = new_bank.slot();
let mut frozen_banks: Vec<_> = bank_forks
.read()
.unwrap()
.frozen_banks()
.values()
.cloned()
.collect();
ReplayStage::compute_bank_stats(
&my_vote_pubkey,
&bank_forks.read().unwrap().ancestors(),
&mut frozen_banks,
&mut tower,
&mut progress,
&VoteTracker::default(),
&ClusterSlots::default(),
&bank_forks,
&mut heaviest_subtree_fork_choice,
&mut latest_validator_votes_for_frozen_banks,
);
assert_eq!(tower.last_voted_slot(), Some(last_voted_slot));
assert_eq!(progress.my_latest_landed_vote(tip_of_voted_fork), Some(0));
let other_fork_bank = &bank_forks.read().unwrap().get(other_fork_slot).unwrap();
let SelectVoteAndResetForkResult { vote_bank, .. } =
ReplayStage::select_vote_and_reset_forks(
other_fork_bank,
Some(&new_bank),
&bank_forks.read().unwrap().ancestors(),
&bank_forks.read().unwrap().descendants(),
&progress,
&mut tower,
&latest_validator_votes_for_frozen_banks,
&heaviest_subtree_fork_choice,
);
assert!(vote_bank.is_some());
assert_eq!(vote_bank.unwrap().0.slot(), tip_of_voted_fork);
// If last vote is already equal to heaviest_bank_on_same_voted_fork,
// we should not vote.
let last_voted_bank = &bank_forks.read().unwrap().get(last_voted_slot).unwrap();
let SelectVoteAndResetForkResult { vote_bank, .. } =
ReplayStage::select_vote_and_reset_forks(
other_fork_bank,
Some(last_voted_bank),
&bank_forks.read().unwrap().ancestors(),
&bank_forks.read().unwrap().descendants(),
&progress,
&mut tower,
&latest_validator_votes_for_frozen_banks,
&heaviest_subtree_fork_choice,
);
assert!(vote_bank.is_none());
// If last vote is still inside slot hashes history of heaviest_bank_on_same_voted_fork,
// we should not vote.
let last_voted_bank_plus_1 = &bank_forks.read().unwrap().get(last_voted_slot + 1).unwrap();
let SelectVoteAndResetForkResult { vote_bank, .. } =
ReplayStage::select_vote_and_reset_forks(
other_fork_bank,
Some(last_voted_bank_plus_1),
&bank_forks.read().unwrap().ancestors(),
&bank_forks.read().unwrap().descendants(),
&progress,
&mut tower,
&latest_validator_votes_for_frozen_banks,
&heaviest_subtree_fork_choice,
);
assert!(vote_bank.is_none());
// create a new bank and make last_voted_slot land, we should not vote.
progress
.entry(new_bank.slot())
.and_modify(|s| s.fork_stats.my_latest_landed_vote = Some(last_voted_slot));
assert!(!new_bank.is_in_slot_hashes_history(&last_voted_slot));
let SelectVoteAndResetForkResult { vote_bank, .. } =
ReplayStage::select_vote_and_reset_forks(
other_fork_bank,
Some(&new_bank),
&bank_forks.read().unwrap().ancestors(),
&bank_forks.read().unwrap().descendants(),
&progress,
&mut tower,
&latest_validator_votes_for_frozen_banks,
&heaviest_subtree_fork_choice,
);
assert!(vote_bank.is_none());
}
#[test]
fn test_retransmit_latest_unpropagated_leader_slot() {
let ReplayBlockstoreComponents {

View File

@ -8760,6 +8760,17 @@ impl Bank {
&mut error_counters,
)
}
pub fn is_in_slot_hashes_history(&self, slot: &Slot) -> bool {
if slot < &self.slot {
if let Ok(sysvar_cache) = self.sysvar_cache.read() {
if let Ok(slot_hashes) = sysvar_cache.get_slot_hashes() {
return slot_hashes.get(slot).is_some();
}
}
}
false
}
}
/// Compute how much an account has changed size. This function is useful when the data size delta

View File

@ -12322,6 +12322,23 @@ fn test_calculate_fee_with_request_heap_frame_flag() {
);
}
#[test]
fn test_is_in_slot_hashes_history() {
use solana_sdk::slot_hashes::MAX_ENTRIES;
let bank0 = create_simple_test_arc_bank(1);
assert!(!bank0.is_in_slot_hashes_history(&0));
assert!(!bank0.is_in_slot_hashes_history(&1));
let mut last_bank = bank0;
for _ in 0..MAX_ENTRIES {
let new_bank = Arc::new(new_from_parent(&last_bank));
assert!(new_bank.is_in_slot_hashes_history(&0));
last_bank = new_bank;
}
let new_bank = Arc::new(new_from_parent(&last_bank));
assert!(!new_bank.is_in_slot_hashes_history(&0));
}
#[test]
fn test_runtime_feature_enable_with_program_cache() {
solana_logger::setup();