diff --git a/core/src/consensus.rs b/core/src/consensus.rs index 214e5f8d34..95f5625e8e 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -30,11 +30,11 @@ use std::{ }; use thiserror::Error; -#[derive(PartialEq, Clone, Debug)] +#[derive(PartialEq, Clone, Debug, AbiExample)] pub enum SwitchForkDecision { SwitchProof(Hash), - NoSwitch, - FailedSwitchThreshold, + SameFork, + FailedSwitchThreshold(u64, u64), } impl SwitchForkDecision { @@ -45,8 +45,11 @@ impl SwitchForkDecision { authorized_voter_pubkey: &Pubkey, ) -> Option { match self { - SwitchForkDecision::FailedSwitchThreshold => None, - SwitchForkDecision::NoSwitch => Some(vote_instruction::vote( + SwitchForkDecision::FailedSwitchThreshold(_, total_stake) => { + assert_ne!(*total_stake, 0); + None + } + SwitchForkDecision::SameFork => Some(vote_instruction::vote( vote_account_pubkey, authorized_voter_pubkey, vote, @@ -61,6 +64,10 @@ impl SwitchForkDecision { } } } + + pub fn can_vote(&self) -> bool { + !matches!(self, SwitchForkDecision::FailedSwitchThreshold(_, _)) + } } pub const VOTE_THRESHOLD_DEPTH: usize = 8; @@ -101,6 +108,8 @@ pub struct Tower { // This could be emptied after some time; but left intact indefinitely for easier // implementation stray_restored_slot: Option, + #[serde(skip)] + pub last_switch_threshold_check: Option<(Slot, SwitchForkDecision)>, } impl Default for Tower { @@ -115,6 +124,7 @@ impl Default for Tower { path: PathBuf::default(), tmp_path: PathBuf::default(), stray_restored_slot: Option::default(), + last_switch_threshold_check: Option::default(), }; // VoteState::root_slot is ensured to be Some in Tower tower.lockouts.root_slot = Some(Slot::default()); @@ -493,7 +503,7 @@ impl Tower { false } - pub(crate) fn check_switch_threshold( + fn make_check_switch_threshold_decision( &self, switch_slot: u64, ancestors: &HashMap>, @@ -520,9 +530,34 @@ impl Tower { // all of them. panic!("no ancestors found with slot: {}", last_voted_slot); } else { - // bank_forks doesn't have corresponding data for the stray restored last vote, - // meaning some inconsistency between saved tower and ledger. - // (newer snapshot, or only a saved tower is moved over to new setup?) + // This condition shouldn't occur under normal validator operation, indicating + // something unusual happened. + // Possible causes include: OS/HW crash, validator process crash, only saved tower + // is moved over to a new setup, etc... + + // However, returning empty ancestors as a fallback here shouldn't result in + // slashing by itself (Note that we couldn't fully preclude any kind of slashing if + // the failure was OS or HW level). + + // Firstly, lockout is ensured elsewhere. + + // Also, there is no risk of optimistic conf. violation. Although empty ancestors + // could result in incorrect (= more than actual) locked_out_stake and + // false-positive SwitchProof later in this function, there should be no such a + // heavier fork candidate, first of all, if the last vote (or any of its + // unavailable ancestors) were already optimistically confirmed. + // The only exception is that other validator is already violating it... + if self.is_first_switch_check() && switch_slot < last_voted_slot { + // `switch < last` is needed not to warn! this message just because of using + // newer snapshots on validator restart + let message = format!( + "bank_forks doesn't have corresponding data for the stray restored \ + last vote({}), meaning some inconsistency between saved tower and ledger.", + last_voted_slot + ); + warn!("{}", message); + datapoint_warn!("tower_warn", ("warn", message, String)); + } &empty_ancestors } }); @@ -532,7 +567,7 @@ impl Tower { if switch_slot == last_voted_slot || switch_slot_ancestors.contains(&last_voted_slot) { // If the `switch_slot is a descendant of the last vote, // no switching proof is necessary - return SwitchForkDecision::NoSwitch; + return SwitchForkDecision::SameFork; } // Should never consider switching to an ancestor @@ -598,7 +633,7 @@ impl Tower { } // Only count lockouts on slots that are: - // 1) Not ancestors of `last_vote` + // 1) Not ancestors of `last_vote`, meaning being on different fork // 2) Not from before the current root as we can't determine if // anything before the root was an ancestor of `last_vote` or not if !last_vote_ancestors.contains(lockout_interval_start) @@ -622,10 +657,43 @@ impl Tower { if (locked_out_stake as f64 / total_stake as f64) > SWITCH_FORK_THRESHOLD { SwitchForkDecision::SwitchProof(switch_proof) } else { - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(locked_out_stake, total_stake) } }) - .unwrap_or(SwitchForkDecision::NoSwitch) + .unwrap_or(SwitchForkDecision::SameFork) + } + + pub(crate) fn check_switch_threshold( + &mut self, + switch_slot: u64, + ancestors: &HashMap>, + descendants: &HashMap>, + progress: &ProgressMap, + total_stake: u64, + epoch_vote_accounts: &HashMap, + ) -> SwitchForkDecision { + let decision = self.make_check_switch_threshold_decision( + switch_slot, + ancestors, + descendants, + progress, + total_stake, + epoch_vote_accounts, + ); + let new_check = Some((switch_slot, decision.clone())); + if new_check != self.last_switch_threshold_check { + trace!( + "new switch threshold check: slot {}: {:?}", + switch_slot, + decision, + ); + self.last_switch_threshold_check = new_check; + } + decision + } + + fn is_first_switch_check(&self) -> bool { + self.last_switch_threshold_check.is_none() } pub fn check_vote_stake_threshold( @@ -932,9 +1000,9 @@ impl Tower { self.lockouts = vote_state; self.do_initialize_lockouts(root, |v| v.slot > root); trace!( - "{} lockouts initialized to {:?}", + "Lockouts in tower for {} is initialized using bank {}", self.node_pubkey, - self.lockouts + bank.slot(), ); assert_eq!( self.lockouts.node_pubkey, self.node_pubkey, @@ -986,6 +1054,7 @@ impl Tower { bincode::serialize_into(&mut file, &saved_tower)?; // file.sync_all() hurts performance; pipeline sync-ing and submitting votes to the cluster! } + trace!("persisted votes: {:?}", self.voted_slots()); fs::rename(&new_filename, &filename)?; // self.path.parent().sync_all() hurts performance same as the above sync @@ -1047,6 +1116,16 @@ pub enum TowerError { FatallyInconsistent(&'static str), } +impl TowerError { + pub fn is_file_missing(&self) -> bool { + if let TowerError::IOError(io_err) = &self { + io_err.kind() == std::io::ErrorKind::NotFound + } else { + false + } + } +} + #[frozen_abi(digest = "Gaxfwvx5MArn52mKZQgzHmDCyn5YfCuTHvp5Et3rFfpp")] #[derive(Default, Clone, Serialize, Deserialize, Debug, PartialEq, AbiExample)] pub struct SavedTower { @@ -1267,7 +1346,7 @@ pub mod test { &ancestors, &descendants, &self.progress, - &tower, + tower, ); // Make sure this slot isn't locked out or failing threshold @@ -1464,11 +1543,11 @@ pub mod test { #[test] fn test_to_vote_instruction() { let vote = Vote::default(); - let mut decision = SwitchForkDecision::FailedSwitchThreshold; + let mut decision = SwitchForkDecision::FailedSwitchThreshold(0, 1); assert!(decision .to_vote_instruction(vote.clone(), &Pubkey::default(), &Pubkey::default()) .is_none()); - decision = SwitchForkDecision::NoSwitch; + decision = SwitchForkDecision::SameFork; assert_eq!( decision.to_vote_instruction(vote.clone(), &Pubkey::default(), &Pubkey::default()), Some(vote_instruction::vote( @@ -1571,7 +1650,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::NoSwitch + SwitchForkDecision::SameFork ); // Trying to switch to another fork at 110 should fail @@ -1584,7 +1663,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on a descendant of last vote should @@ -1599,7 +1678,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on an ancestor of last vote should @@ -1614,7 +1693,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on a different fork, but the lockout @@ -1629,7 +1708,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on a different fork, and the lockout @@ -1646,7 +1725,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Adding another validator lockout on a different fork, and the lockout @@ -1697,7 +1776,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); } @@ -2365,7 +2444,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::NoSwitch + SwitchForkDecision::SameFork ); // Trying to switch to another fork at 110 should fail @@ -2378,7 +2457,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); vote_simulator.simulate_lockout_interval(111, (10, 49), &other_vote_account); @@ -2456,7 +2535,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Add lockout_interval which should be excluded @@ -2470,7 +2549,7 @@ pub mod test { total_stake, bank0.epoch_vote_accounts(0).unwrap(), ), - SwitchForkDecision::FailedSwitchThreshold + SwitchForkDecision::FailedSwitchThreshold(0, 20000) ); // Add lockout_interval which should not be excluded diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 83be7431c1..b53e734755 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -408,7 +408,7 @@ impl ReplayStage { &ancestors, &descendants, &progress, - &tower, + &mut tower, ); select_vote_and_reset_forks_time.stop(); @@ -1525,7 +1525,7 @@ impl ReplayStage { ancestors: &HashMap>, descendants: &HashMap>, progress: &ProgressMap, - tower: &Tower, + tower: &mut Tower, ) -> SelectVoteAndResetForkResult { // Try to vote on the actual heaviest fork. If the heaviest bank is // locked out or fails the threshold check, the validator will: @@ -1552,7 +1552,7 @@ impl ReplayStage { .epoch_vote_accounts(heaviest_bank.epoch()) .expect("Bank epoch vote accounts must contain entry for the bank's own epoch"), ); - if switch_fork_decision == SwitchForkDecision::FailedSwitchThreshold { + if let SwitchForkDecision::FailedSwitchThreshold(_, _) = switch_fork_decision { // If we can't switch, then reset to the the next votable // bank on the same fork as our last vote, but don't vote info!( @@ -1601,7 +1601,7 @@ impl ReplayStage { if !is_locked_out && vote_threshold && propagation_confirmed - && switch_fork_decision != SwitchForkDecision::FailedSwitchThreshold + && switch_fork_decision.can_vote() { info!("voting: {} {}", bank.slot(), fork_weight); SelectVoteAndResetForkResult { diff --git a/core/src/validator.rs b/core/src/validator.rs index 6b751a2b6c..7d032a074e 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -6,7 +6,7 @@ use crate::{ cluster_info::{ClusterInfo, Node}, cluster_info_vote_listener::VoteTracker, completed_data_sets_service::CompletedDataSetsService, - consensus::{reconcile_blockstore_roots_with_tower, Tower, TowerError}, + consensus::{reconcile_blockstore_roots_with_tower, Tower}, contact_info::ContactInfo, gossip_service::GossipService, optimistically_confirmed_bank_tracker::{ @@ -730,12 +730,7 @@ fn post_process_restored_tower( .unwrap_or_else(|err| { let voting_has_been_active = active_vote_account_exists_in_bank(&bank_forks.working_bank(), &vote_account); - let saved_tower_is_missing = if let TowerError::IOError(io_err) = &err { - io_err.kind() == std::io::ErrorKind::NotFound - } else { - false - }; - if !saved_tower_is_missing { + if !err.is_file_missing() { datapoint_error!( "tower_error", ( @@ -753,7 +748,7 @@ fn post_process_restored_tower( ); process::exit(1); } - if saved_tower_is_missing && !voting_has_been_active { + if err.is_file_missing() && !voting_has_been_active { // Currently, don't protect against spoofed snapshots with no tower at all info!( "Ignoring expected failed tower restore because this is the initial \