Persistent tower (#10718)
* Save/restore Tower * Avoid unwrap() * Rebase cleanups * Forcibly pass test * Correct reconcilation of votes after validator resume * d b g * Add more tests * fsync and fix test * Add test * Fix fmt * Debug * Fix tests... * save * Clarify error message and code cleaning around it * Move most of code out of tower save hot codepath * Proper comment for the lack of fsync on tower * Clean up * Clean up * Simpler type alias * Manage tower-restored ancestor slots without banks * Add comment * Extract long code blocks... * Add comment * Simplify returned tuple... * Tweak too aggresive log * Fix typo... * Add test * Update comment * Improve test to require non-empty stray restored slots * Measure tower save and dump all tower contents * Log adjust and add threshold related assertions * cleanup adjust * Properly lower stray restored slots priority... * Rust fmt * Fix test.... * Clarify comments a bit and add TowerError::TooNew * Further clean-up arround TowerError * Truly create ancestors by excluding last vote slot * Add comment for stray_restored_slots * Add comment for stray_restored_slots * Use BTreeSet * Consider root_slot into post-replay adjustment * Tweak logging * Add test for stray_restored_ancestors * Reorder some code * Better names for unit tests * Add frozen_abi to SavedTower * Fold long lines * Tweak stray ancestors and too old slot history * Re-adjust error conditon of too old slot history * Test normal ancestors is checked before stray ones * Fix conflict, update tests, adjust behavior a bit * Fix test * Address review comments * Last touch! * Immediately after creating cleaning pr * Revert stray slots * Revert comment... * Report error as metrics * Revert not to panic! and ignore unfixable test... * Normalize lockouts.root_slot more strictly * Add comments for panic! and more assertions * Proper initialize root without vote account * Clarify code and comments based on review feedback * Fix rebase * Further simplify based on assured tower root * Reorder code for more readability Co-authored-by: Michael Vines <mvines@gmail.com>
This commit is contained in:
parent
28f2c15597
commit
cb8661bd49
|
@ -0,0 +1,36 @@
|
||||||
|
#![feature(test)]
|
||||||
|
|
||||||
|
extern crate solana_core;
|
||||||
|
extern crate test;
|
||||||
|
|
||||||
|
use solana_core::consensus::Tower;
|
||||||
|
use solana_runtime::bank::Bank;
|
||||||
|
use solana_runtime::bank_forks::BankForks;
|
||||||
|
use solana_sdk::{
|
||||||
|
pubkey::Pubkey,
|
||||||
|
signature::{Keypair, Signer},
|
||||||
|
};
|
||||||
|
use std::sync::Arc;
|
||||||
|
use tempfile::TempDir;
|
||||||
|
use test::Bencher;
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn bench_save_tower(bench: &mut Bencher) {
|
||||||
|
let dir = TempDir::new().unwrap();
|
||||||
|
let path = dir.path();
|
||||||
|
|
||||||
|
let vote_account_pubkey = &Pubkey::default();
|
||||||
|
let node_keypair = Arc::new(Keypair::new());
|
||||||
|
let heaviest_bank = BankForks::new(Bank::default()).working_bank();
|
||||||
|
let tower = Tower::new(
|
||||||
|
&node_keypair.pubkey(),
|
||||||
|
&vote_account_pubkey,
|
||||||
|
0,
|
||||||
|
&heaviest_bank,
|
||||||
|
&path,
|
||||||
|
);
|
||||||
|
|
||||||
|
bench.iter(move || {
|
||||||
|
tower.save(&node_keypair).unwrap();
|
||||||
|
});
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
|
@ -491,6 +491,44 @@ impl HeaviestSubtreeForkChoice {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn heaviest_slot_on_same_voted_fork(&self, tower: &Tower) -> Option<Slot> {
|
||||||
|
tower
|
||||||
|
.last_voted_slot()
|
||||||
|
.map(|last_voted_slot| {
|
||||||
|
let heaviest_slot_on_same_voted_fork = self.best_slot(last_voted_slot);
|
||||||
|
if heaviest_slot_on_same_voted_fork.is_none() {
|
||||||
|
if !tower.is_stray_last_vote() {
|
||||||
|
// Unless last vote is stray, self.bast_slot(last_voted_slot) must return
|
||||||
|
// Some(_), justifying to panic! here.
|
||||||
|
// Also, adjust_lockouts_after_replay() correctly makes last_voted_slot None,
|
||||||
|
// if all saved votes are ancestors of replayed_root_slot. So this code shouldn't be
|
||||||
|
// touched in that case as well.
|
||||||
|
// In other words, except being stray, all other slots have been voted on while this
|
||||||
|
// validator has been running, so we must be able to fetch best_slots for all of
|
||||||
|
// them.
|
||||||
|
panic!(
|
||||||
|
"a bank at last_voted_slot({}) is a frozen bank so must have been\
|
||||||
|
added to heaviest_subtree_fork_choice at time of freezing",
|
||||||
|
last_voted_slot,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
// fork_infos doesn't have corresponding data for the stray restored last vote,
|
||||||
|
// meaning some inconsistency between saved tower and ledger.
|
||||||
|
// (newer snapshot, or only a saved tower is moved over to new setup?)
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let heaviest_slot_on_same_voted_fork = heaviest_slot_on_same_voted_fork.unwrap();
|
||||||
|
|
||||||
|
if heaviest_slot_on_same_voted_fork == last_voted_slot {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(heaviest_slot_on_same_voted_fork)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.unwrap_or(None)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
fn set_stake_voted_at(&mut self, slot: Slot, stake_voted_at: u64) {
|
fn set_stake_voted_at(&mut self, slot: Slot, stake_voted_at: u64) {
|
||||||
self.fork_infos.get_mut(&slot).unwrap().stake_voted_at = stake_voted_at;
|
self.fork_infos.get_mut(&slot).unwrap().stake_voted_at = stake_voted_at;
|
||||||
|
@ -550,26 +588,17 @@ impl ForkChoice for HeaviestSubtreeForkChoice {
|
||||||
_ancestors: &HashMap<u64, HashSet<u64>>,
|
_ancestors: &HashMap<u64, HashSet<u64>>,
|
||||||
bank_forks: &RwLock<BankForks>,
|
bank_forks: &RwLock<BankForks>,
|
||||||
) -> (Arc<Bank>, Option<Arc<Bank>>) {
|
) -> (Arc<Bank>, Option<Arc<Bank>>) {
|
||||||
let last_voted_slot = tower.last_voted_slot();
|
|
||||||
let heaviest_slot_on_same_voted_fork = last_voted_slot.map(|last_voted_slot| {
|
|
||||||
let heaviest_slot_on_same_voted_fork =
|
|
||||||
self.best_slot(last_voted_slot).expect("a bank at last_voted_slot is a frozen bank so must have been added to heaviest_subtree_fork_choice at time of freezing");
|
|
||||||
if heaviest_slot_on_same_voted_fork == last_voted_slot {
|
|
||||||
None
|
|
||||||
} else {
|
|
||||||
Some(heaviest_slot_on_same_voted_fork)
|
|
||||||
}
|
|
||||||
}).unwrap_or(None);
|
|
||||||
let heaviest_slot = self.best_overall_slot();
|
|
||||||
let r_bank_forks = bank_forks.read().unwrap();
|
let r_bank_forks = bank_forks.read().unwrap();
|
||||||
|
|
||||||
(
|
(
|
||||||
r_bank_forks.get(heaviest_slot).unwrap().clone(),
|
r_bank_forks.get(self.best_overall_slot()).unwrap().clone(),
|
||||||
heaviest_slot_on_same_voted_fork.map(|heaviest_slot_on_same_voted_fork| {
|
self.heaviest_slot_on_same_voted_fork(tower)
|
||||||
r_bank_forks
|
.map(|heaviest_slot_on_same_voted_fork| {
|
||||||
.get(heaviest_slot_on_same_voted_fork)
|
r_bank_forks
|
||||||
.unwrap()
|
.get(heaviest_slot_on_same_voted_fork)
|
||||||
.clone()
|
.unwrap()
|
||||||
}),
|
.clone()
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -611,6 +640,7 @@ mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::consensus::test::VoteSimulator;
|
use crate::consensus::test::VoteSimulator;
|
||||||
use solana_runtime::{bank::Bank, bank_utils};
|
use solana_runtime::{bank::Bank, bank_utils};
|
||||||
|
use solana_sdk::{hash::Hash, slot_history::SlotHistory};
|
||||||
use std::{collections::HashSet, ops::Range};
|
use std::{collections::HashSet, ops::Range};
|
||||||
use trees::tr;
|
use trees::tr;
|
||||||
|
|
||||||
|
@ -1490,6 +1520,48 @@ mod test {
|
||||||
assert!(heaviest_subtree_fork_choice.subtree_diff(0, 6).is_empty());
|
assert!(heaviest_subtree_fork_choice.subtree_diff(0, 6).is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_stray_restored_slot() {
|
||||||
|
let forks = tr(0) / (tr(1) / tr(2));
|
||||||
|
let heaviest_subtree_fork_choice = HeaviestSubtreeForkChoice::new_from_tree(forks);
|
||||||
|
|
||||||
|
let mut tower = Tower::new_for_tests(10, 0.9);
|
||||||
|
tower.record_vote(1, Hash::default());
|
||||||
|
|
||||||
|
assert_eq!(tower.is_stray_last_vote(), false);
|
||||||
|
assert_eq!(
|
||||||
|
heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower),
|
||||||
|
Some(2)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Make slot 1 (existing in bank_forks) a restored stray slot
|
||||||
|
let mut slot_history = SlotHistory::default();
|
||||||
|
slot_history.add(0);
|
||||||
|
// Work around TooOldSlotHistory
|
||||||
|
slot_history.add(999);
|
||||||
|
tower = tower
|
||||||
|
.adjust_lockouts_after_replay(0, &slot_history)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(tower.is_stray_last_vote(), true);
|
||||||
|
assert_eq!(
|
||||||
|
heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower),
|
||||||
|
Some(2)
|
||||||
|
);
|
||||||
|
|
||||||
|
// Make slot 3 (NOT existing in bank_forks) a restored stray slot
|
||||||
|
tower.record_vote(3, Hash::default());
|
||||||
|
tower = tower
|
||||||
|
.adjust_lockouts_after_replay(0, &slot_history)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(tower.is_stray_last_vote(), true);
|
||||||
|
assert_eq!(
|
||||||
|
heaviest_subtree_fork_choice.heaviest_slot_on_same_voted_fork(&tower),
|
||||||
|
None
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
fn setup_forks() -> HeaviestSubtreeForkChoice {
|
fn setup_forks() -> HeaviestSubtreeForkChoice {
|
||||||
/*
|
/*
|
||||||
Build fork structure:
|
Build fork structure:
|
||||||
|
|
|
@ -219,6 +219,7 @@ impl ReplayStage {
|
||||||
cluster_info: Arc<ClusterInfo>,
|
cluster_info: Arc<ClusterInfo>,
|
||||||
ledger_signal_receiver: Receiver<bool>,
|
ledger_signal_receiver: Receiver<bool>,
|
||||||
poh_recorder: Arc<Mutex<PohRecorder>>,
|
poh_recorder: Arc<Mutex<PohRecorder>>,
|
||||||
|
mut tower: Tower,
|
||||||
vote_tracker: Arc<VoteTracker>,
|
vote_tracker: Arc<VoteTracker>,
|
||||||
cluster_slots: Arc<ClusterSlots>,
|
cluster_slots: Arc<ClusterSlots>,
|
||||||
retransmit_slots_sender: RetransmitSlotsSender,
|
retransmit_slots_sender: RetransmitSlotsSender,
|
||||||
|
@ -255,53 +256,16 @@ impl ReplayStage {
|
||||||
let mut all_pubkeys = PubkeyReferences::default();
|
let mut all_pubkeys = PubkeyReferences::default();
|
||||||
let verify_recyclers = VerifyRecyclers::default();
|
let verify_recyclers = VerifyRecyclers::default();
|
||||||
let _exit = Finalizer::new(exit.clone());
|
let _exit = Finalizer::new(exit.clone());
|
||||||
let mut progress = ProgressMap::default();
|
let (
|
||||||
let mut frozen_banks: Vec<_> = bank_forks
|
mut progress,
|
||||||
.read()
|
mut heaviest_subtree_fork_choice,
|
||||||
.unwrap()
|
unlock_heaviest_subtree_fork_choice_slot,
|
||||||
.frozen_banks()
|
) = Self::initialize_progress_and_fork_choice_with_locked_bank_forks(
|
||||||
.values()
|
&bank_forks,
|
||||||
.cloned()
|
&my_pubkey,
|
||||||
.collect();
|
&vote_account,
|
||||||
|
);
|
||||||
frozen_banks.sort_by_key(|bank| bank.slot());
|
|
||||||
|
|
||||||
// Initialize progress map with any root banks
|
|
||||||
for bank in &frozen_banks {
|
|
||||||
let prev_leader_slot = progress.get_bank_prev_leader_slot(bank);
|
|
||||||
progress.insert(
|
|
||||||
bank.slot(),
|
|
||||||
ForkProgress::new_from_bank(
|
|
||||||
bank,
|
|
||||||
&my_pubkey,
|
|
||||||
&vote_account,
|
|
||||||
prev_leader_slot,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
let root_bank = bank_forks.read().unwrap().root_bank().clone();
|
|
||||||
let root = root_bank.slot();
|
|
||||||
let unlock_heaviest_subtree_fork_choice_slot =
|
|
||||||
Self::get_unlock_heaviest_subtree_fork_choice(root_bank.cluster_type());
|
|
||||||
let mut heaviest_subtree_fork_choice =
|
|
||||||
HeaviestSubtreeForkChoice::new_from_frozen_banks(root, &frozen_banks);
|
|
||||||
let mut bank_weight_fork_choice = BankWeightForkChoice::default();
|
let mut bank_weight_fork_choice = BankWeightForkChoice::default();
|
||||||
let heaviest_bank = if root > unlock_heaviest_subtree_fork_choice_slot {
|
|
||||||
bank_forks
|
|
||||||
.read()
|
|
||||||
.unwrap()
|
|
||||||
.get(heaviest_subtree_fork_choice.best_overall_slot())
|
|
||||||
.expect(
|
|
||||||
"The best overall slot must be one of `frozen_banks` which all
|
|
||||||
exist in bank_forks",
|
|
||||||
)
|
|
||||||
.clone()
|
|
||||||
} else {
|
|
||||||
Tower::find_heaviest_bank(&bank_forks, &my_pubkey).unwrap_or(root_bank)
|
|
||||||
};
|
|
||||||
let mut tower = Tower::new(&my_pubkey, &vote_account, root, &heaviest_bank);
|
|
||||||
let mut current_leader = None;
|
let mut current_leader = None;
|
||||||
let mut last_reset = Hash::default();
|
let mut last_reset = Hash::default();
|
||||||
let mut partition_exists = false;
|
let mut partition_exists = false;
|
||||||
|
@ -652,6 +616,65 @@ impl ReplayStage {
|
||||||
.unwrap_or(true)
|
.unwrap_or(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn initialize_progress_and_fork_choice_with_locked_bank_forks(
|
||||||
|
bank_forks: &RwLock<BankForks>,
|
||||||
|
my_pubkey: &Pubkey,
|
||||||
|
vote_account: &Pubkey,
|
||||||
|
) -> (ProgressMap, HeaviestSubtreeForkChoice, Slot) {
|
||||||
|
let (root_bank, frozen_banks) = {
|
||||||
|
let bank_forks = bank_forks.read().unwrap();
|
||||||
|
(
|
||||||
|
bank_forks.root_bank().clone(),
|
||||||
|
bank_forks.frozen_banks().values().cloned().collect(),
|
||||||
|
)
|
||||||
|
};
|
||||||
|
|
||||||
|
Self::initialize_progress_and_fork_choice(
|
||||||
|
&root_bank,
|
||||||
|
frozen_banks,
|
||||||
|
&my_pubkey,
|
||||||
|
&vote_account,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn initialize_progress_and_fork_choice(
|
||||||
|
root_bank: &Arc<Bank>,
|
||||||
|
mut frozen_banks: Vec<Arc<Bank>>,
|
||||||
|
my_pubkey: &Pubkey,
|
||||||
|
vote_account: &Pubkey,
|
||||||
|
) -> (ProgressMap, HeaviestSubtreeForkChoice, Slot) {
|
||||||
|
let mut progress = ProgressMap::default();
|
||||||
|
|
||||||
|
frozen_banks.sort_by_key(|bank| bank.slot());
|
||||||
|
|
||||||
|
// Initialize progress map with any root banks
|
||||||
|
for bank in &frozen_banks {
|
||||||
|
let prev_leader_slot = progress.get_bank_prev_leader_slot(bank);
|
||||||
|
progress.insert(
|
||||||
|
bank.slot(),
|
||||||
|
ForkProgress::new_from_bank(
|
||||||
|
bank,
|
||||||
|
&my_pubkey,
|
||||||
|
&vote_account,
|
||||||
|
prev_leader_slot,
|
||||||
|
0,
|
||||||
|
0,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let root = root_bank.slot();
|
||||||
|
let unlock_heaviest_subtree_fork_choice_slot =
|
||||||
|
Self::get_unlock_heaviest_subtree_fork_choice(root_bank.cluster_type());
|
||||||
|
let heaviest_subtree_fork_choice =
|
||||||
|
HeaviestSubtreeForkChoice::new_from_frozen_banks(root, &frozen_banks);
|
||||||
|
|
||||||
|
(
|
||||||
|
progress,
|
||||||
|
heaviest_subtree_fork_choice,
|
||||||
|
unlock_heaviest_subtree_fork_choice_slot,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
fn report_memory(
|
fn report_memory(
|
||||||
allocated: &solana_measure::thread_mem_usage::Allocatedp,
|
allocated: &solana_measure::thread_mem_usage::Allocatedp,
|
||||||
name: &'static str,
|
name: &'static str,
|
||||||
|
@ -1015,7 +1038,15 @@ impl ReplayStage {
|
||||||
}
|
}
|
||||||
trace!("handle votable bank {}", bank.slot());
|
trace!("handle votable bank {}", bank.slot());
|
||||||
let (vote, tower_index) = tower.new_vote_from_bank(bank, vote_account_pubkey);
|
let (vote, tower_index) = tower.new_vote_from_bank(bank, vote_account_pubkey);
|
||||||
if let Some(new_root) = tower.record_bank_vote(vote) {
|
let new_root = tower.record_bank_vote(vote);
|
||||||
|
let last_vote = tower.last_vote_and_timestamp();
|
||||||
|
|
||||||
|
if let Err(err) = tower.save(&cluster_info.keypair) {
|
||||||
|
error!("Unable to save tower: {:?}", err);
|
||||||
|
std::process::exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(new_root) = new_root {
|
||||||
// get the root bank before squash
|
// get the root bank before squash
|
||||||
let root_bank = bank_forks
|
let root_bank = bank_forks
|
||||||
.read()
|
.read()
|
||||||
|
@ -1075,7 +1106,7 @@ impl ReplayStage {
|
||||||
bank,
|
bank,
|
||||||
vote_account_pubkey,
|
vote_account_pubkey,
|
||||||
authorized_voter_keypairs,
|
authorized_voter_keypairs,
|
||||||
tower.last_vote_and_timestamp(),
|
last_vote,
|
||||||
tower_index,
|
tower_index,
|
||||||
switch_fork_decision,
|
switch_fork_decision,
|
||||||
);
|
);
|
||||||
|
|
|
@ -10,6 +10,7 @@ use crate::{
|
||||||
cluster_info_vote_listener::{VerifiedVoteReceiver, VoteTracker},
|
cluster_info_vote_listener::{VerifiedVoteReceiver, VoteTracker},
|
||||||
cluster_slots::ClusterSlots,
|
cluster_slots::ClusterSlots,
|
||||||
completed_data_sets_service::CompletedDataSetsSender,
|
completed_data_sets_service::CompletedDataSetsSender,
|
||||||
|
consensus::Tower,
|
||||||
ledger_cleanup_service::LedgerCleanupService,
|
ledger_cleanup_service::LedgerCleanupService,
|
||||||
poh_recorder::PohRecorder,
|
poh_recorder::PohRecorder,
|
||||||
replay_stage::{ReplayStage, ReplayStageConfig},
|
replay_stage::{ReplayStage, ReplayStageConfig},
|
||||||
|
@ -90,6 +91,7 @@ impl Tvu {
|
||||||
ledger_signal_receiver: Receiver<bool>,
|
ledger_signal_receiver: Receiver<bool>,
|
||||||
subscriptions: &Arc<RpcSubscriptions>,
|
subscriptions: &Arc<RpcSubscriptions>,
|
||||||
poh_recorder: &Arc<Mutex<PohRecorder>>,
|
poh_recorder: &Arc<Mutex<PohRecorder>>,
|
||||||
|
tower: Tower,
|
||||||
leader_schedule_cache: &Arc<LeaderScheduleCache>,
|
leader_schedule_cache: &Arc<LeaderScheduleCache>,
|
||||||
exit: &Arc<AtomicBool>,
|
exit: &Arc<AtomicBool>,
|
||||||
completed_slots_receiver: CompletedSlotsReceiver,
|
completed_slots_receiver: CompletedSlotsReceiver,
|
||||||
|
@ -203,6 +205,7 @@ impl Tvu {
|
||||||
cluster_info.clone(),
|
cluster_info.clone(),
|
||||||
ledger_signal_receiver,
|
ledger_signal_receiver,
|
||||||
poh_recorder.clone(),
|
poh_recorder.clone(),
|
||||||
|
tower,
|
||||||
vote_tracker,
|
vote_tracker,
|
||||||
cluster_slots,
|
cluster_slots,
|
||||||
retransmit_slots_sender,
|
retransmit_slots_sender,
|
||||||
|
@ -301,6 +304,7 @@ pub mod tests {
|
||||||
let (replay_vote_sender, _replay_vote_receiver) = unbounded();
|
let (replay_vote_sender, _replay_vote_receiver) = unbounded();
|
||||||
let (completed_data_sets_sender, _completed_data_sets_receiver) = unbounded();
|
let (completed_data_sets_sender, _completed_data_sets_receiver) = unbounded();
|
||||||
let bank_forks = Arc::new(RwLock::new(bank_forks));
|
let bank_forks = Arc::new(RwLock::new(bank_forks));
|
||||||
|
let tower = Tower::new_with_key(&target1_keypair.pubkey());
|
||||||
let tvu = Tvu::new(
|
let tvu = Tvu::new(
|
||||||
&vote_keypair.pubkey(),
|
&vote_keypair.pubkey(),
|
||||||
vec![Arc::new(vote_keypair)],
|
vec![Arc::new(vote_keypair)],
|
||||||
|
@ -322,6 +326,7 @@ pub mod tests {
|
||||||
block_commitment_cache.clone(),
|
block_commitment_cache.clone(),
|
||||||
)),
|
)),
|
||||||
&poh_recorder,
|
&poh_recorder,
|
||||||
|
tower,
|
||||||
&leader_schedule_cache,
|
&leader_schedule_cache,
|
||||||
&exit,
|
&exit,
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
|
|
|
@ -6,6 +6,7 @@ use crate::{
|
||||||
cluster_info::{ClusterInfo, Node},
|
cluster_info::{ClusterInfo, Node},
|
||||||
cluster_info_vote_listener::VoteTracker,
|
cluster_info_vote_listener::VoteTracker,
|
||||||
completed_data_sets_service::CompletedDataSetsService,
|
completed_data_sets_service::CompletedDataSetsService,
|
||||||
|
consensus::{reconcile_blockstore_roots_with_tower, Tower, TowerError},
|
||||||
contact_info::ContactInfo,
|
contact_info::ContactInfo,
|
||||||
gossip_service::{discover_cluster, GossipService},
|
gossip_service::{discover_cluster, GossipService},
|
||||||
poh_recorder::{PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS},
|
poh_recorder::{PohRecorder, GRACE_TICKS_FACTOR, MAX_GRACE_SLOTS},
|
||||||
|
@ -95,6 +96,7 @@ pub struct ValidatorConfig {
|
||||||
pub accounts_hash_interval_slots: u64,
|
pub accounts_hash_interval_slots: u64,
|
||||||
pub max_genesis_archive_unpacked_size: u64,
|
pub max_genesis_archive_unpacked_size: u64,
|
||||||
pub wal_recovery_mode: Option<BlockstoreRecoveryMode>,
|
pub wal_recovery_mode: Option<BlockstoreRecoveryMode>,
|
||||||
|
pub require_tower: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for ValidatorConfig {
|
impl Default for ValidatorConfig {
|
||||||
|
@ -125,6 +127,7 @@ impl Default for ValidatorConfig {
|
||||||
accounts_hash_interval_slots: std::u64::MAX,
|
accounts_hash_interval_slots: std::u64::MAX,
|
||||||
max_genesis_archive_unpacked_size: MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
|
max_genesis_archive_unpacked_size: MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
|
||||||
wal_recovery_mode: None,
|
wal_recovery_mode: None,
|
||||||
|
require_tower: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -253,7 +256,8 @@ impl Validator {
|
||||||
cache_block_time_sender,
|
cache_block_time_sender,
|
||||||
cache_block_time_service,
|
cache_block_time_service,
|
||||||
},
|
},
|
||||||
) = new_banks_from_ledger(config, ledger_path, poh_verify, &exit);
|
tower,
|
||||||
|
) = new_banks_from_ledger(&id, vote_account, config, ledger_path, poh_verify, &exit);
|
||||||
|
|
||||||
let leader_schedule_cache = Arc::new(leader_schedule_cache);
|
let leader_schedule_cache = Arc::new(leader_schedule_cache);
|
||||||
let bank = bank_forks.working_bank();
|
let bank = bank_forks.working_bank();
|
||||||
|
@ -475,6 +479,7 @@ impl Validator {
|
||||||
ledger_signal_receiver,
|
ledger_signal_receiver,
|
||||||
&subscriptions,
|
&subscriptions,
|
||||||
&poh_recorder,
|
&poh_recorder,
|
||||||
|
tower,
|
||||||
&leader_schedule_cache,
|
&leader_schedule_cache,
|
||||||
&exit,
|
&exit,
|
||||||
completed_slots_receiver,
|
completed_slots_receiver,
|
||||||
|
@ -613,8 +618,81 @@ impl Validator {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn active_vote_account_exists_in_bank(bank: &Arc<Bank>, vote_account: &Pubkey) -> bool {
|
||||||
|
if let Some(account) = &bank.get_account(vote_account) {
|
||||||
|
if let Some(vote_state) = VoteState::from(&account) {
|
||||||
|
return !vote_state.votes.is_empty();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
fn post_process_restored_tower(
|
||||||
|
restored_tower: crate::consensus::Result<Tower>,
|
||||||
|
validator_identity: &Pubkey,
|
||||||
|
vote_account: &Pubkey,
|
||||||
|
config: &ValidatorConfig,
|
||||||
|
ledger_path: &Path,
|
||||||
|
bank_forks: &BankForks,
|
||||||
|
) -> Tower {
|
||||||
|
restored_tower
|
||||||
|
.and_then(|tower| {
|
||||||
|
let root_bank = bank_forks.root_bank();
|
||||||
|
let slot_history = root_bank.get_slot_history();
|
||||||
|
tower.adjust_lockouts_after_replay(root_bank.slot(), &slot_history)
|
||||||
|
})
|
||||||
|
.unwrap_or_else(|err| {
|
||||||
|
let voting_has_been_active =
|
||||||
|
active_vote_account_exists_in_bank(&bank_forks.working_bank(), &vote_account);
|
||||||
|
let saved_tower_is_missing = if let TowerError::IOError(io_err) = &err {
|
||||||
|
io_err.kind() == std::io::ErrorKind::NotFound
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
|
if !saved_tower_is_missing {
|
||||||
|
datapoint_error!(
|
||||||
|
"tower_error",
|
||||||
|
(
|
||||||
|
"error",
|
||||||
|
format!("Unable to restore tower: {}", err),
|
||||||
|
String
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if config.require_tower && voting_has_been_active {
|
||||||
|
error!("Requested mandatory tower restore failed: {}", err);
|
||||||
|
error!(
|
||||||
|
"And there is an existing vote_account containing actual votes. \
|
||||||
|
Aborting due to possible conflicting duplicate votes"
|
||||||
|
);
|
||||||
|
process::exit(1);
|
||||||
|
}
|
||||||
|
if saved_tower_is_missing && !voting_has_been_active {
|
||||||
|
// Currently, don't protect against spoofed snapshots with no tower at all
|
||||||
|
info!(
|
||||||
|
"Ignoring expected failed tower restore because this is the initial \
|
||||||
|
validator start with the vote account..."
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
error!(
|
||||||
|
"Rebuilding a new tower from the latest vote account due to failed tower restore: {}",
|
||||||
|
err
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Tower::new_from_bankforks(
|
||||||
|
&bank_forks,
|
||||||
|
&ledger_path,
|
||||||
|
&validator_identity,
|
||||||
|
&vote_account,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::type_complexity)]
|
#[allow(clippy::type_complexity)]
|
||||||
fn new_banks_from_ledger(
|
fn new_banks_from_ledger(
|
||||||
|
validator_identity: &Pubkey,
|
||||||
|
vote_account: &Pubkey,
|
||||||
config: &ValidatorConfig,
|
config: &ValidatorConfig,
|
||||||
ledger_path: &Path,
|
ledger_path: &Path,
|
||||||
poh_verify: bool,
|
poh_verify: bool,
|
||||||
|
@ -628,6 +706,7 @@ fn new_banks_from_ledger(
|
||||||
LeaderScheduleCache,
|
LeaderScheduleCache,
|
||||||
Option<(Slot, Hash)>,
|
Option<(Slot, Hash)>,
|
||||||
TransactionHistoryServices,
|
TransactionHistoryServices,
|
||||||
|
Tower,
|
||||||
) {
|
) {
|
||||||
info!("loading ledger from {:?}...", ledger_path);
|
info!("loading ledger from {:?}...", ledger_path);
|
||||||
let genesis_config = open_genesis_config(ledger_path, config.max_genesis_archive_unpacked_size);
|
let genesis_config = open_genesis_config(ledger_path, config.max_genesis_archive_unpacked_size);
|
||||||
|
@ -659,6 +738,14 @@ fn new_banks_from_ledger(
|
||||||
.expect("Failed to open ledger database");
|
.expect("Failed to open ledger database");
|
||||||
blockstore.set_no_compaction(config.no_rocksdb_compaction);
|
blockstore.set_no_compaction(config.no_rocksdb_compaction);
|
||||||
|
|
||||||
|
let restored_tower = Tower::restore(ledger_path, &validator_identity);
|
||||||
|
if let Ok(tower) = &restored_tower {
|
||||||
|
reconcile_blockstore_roots_with_tower(&tower, &blockstore).unwrap_or_else(|err| {
|
||||||
|
error!("Failed to reconcile blockstore with tower: {:?}", err);
|
||||||
|
std::process::exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
let process_options = blockstore_processor::ProcessOptions {
|
let process_options = blockstore_processor::ProcessOptions {
|
||||||
poh_verify,
|
poh_verify,
|
||||||
dev_halt_at_slot: config.dev_halt_at_slot,
|
dev_halt_at_slot: config.dev_halt_at_slot,
|
||||||
|
@ -690,6 +777,17 @@ fn new_banks_from_ledger(
|
||||||
process::exit(1);
|
process::exit(1);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let tower = post_process_restored_tower(
|
||||||
|
restored_tower,
|
||||||
|
&validator_identity,
|
||||||
|
&vote_account,
|
||||||
|
&config,
|
||||||
|
&ledger_path,
|
||||||
|
&bank_forks,
|
||||||
|
);
|
||||||
|
|
||||||
|
info!("Tower state: {:?}", tower);
|
||||||
|
|
||||||
leader_schedule_cache.set_fixed_leader_schedule(config.fixed_leader_schedule.clone());
|
leader_schedule_cache.set_fixed_leader_schedule(config.fixed_leader_schedule.clone());
|
||||||
|
|
||||||
bank_forks.set_snapshot_config(config.snapshot_config.clone());
|
bank_forks.set_snapshot_config(config.snapshot_config.clone());
|
||||||
|
@ -704,6 +802,7 @@ fn new_banks_from_ledger(
|
||||||
leader_schedule_cache,
|
leader_schedule_cache,
|
||||||
snapshot_hash,
|
snapshot_hash,
|
||||||
transaction_history_services,
|
transaction_history_services,
|
||||||
|
tower,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,13 @@ impl<'a> AncestorIterator<'a> {
|
||||||
blockstore,
|
blockstore,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn new_inclusive(start_slot: Slot, blockstore: &'a Blockstore) -> Self {
|
||||||
|
Self {
|
||||||
|
current: blockstore.meta(start_slot).unwrap().map(|_| start_slot),
|
||||||
|
blockstore,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
impl<'a> Iterator for AncestorIterator<'a> {
|
impl<'a> Iterator for AncestorIterator<'a> {
|
||||||
type Item = Slot;
|
type Item = Slot;
|
||||||
|
@ -111,4 +118,33 @@ mod tests {
|
||||||
vec![2, 1, 0]
|
vec![2, 1, 0]
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ancestor_iterator_inclusive() {
|
||||||
|
let blockstore_path = get_tmp_ledger_path!();
|
||||||
|
let blockstore = Blockstore::open(&blockstore_path).unwrap();
|
||||||
|
|
||||||
|
let (shreds, _) = make_slot_entries(0, 0, 42);
|
||||||
|
blockstore.insert_shreds(shreds, None, false).unwrap();
|
||||||
|
let (shreds, _) = make_slot_entries(1, 0, 42);
|
||||||
|
blockstore.insert_shreds(shreds, None, false).unwrap();
|
||||||
|
let (shreds, _) = make_slot_entries(2, 1, 42);
|
||||||
|
blockstore.insert_shreds(shreds, None, false).unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
AncestorIterator::new(2, &blockstore).collect::<Vec<Slot>>(),
|
||||||
|
vec![1, 0]
|
||||||
|
);
|
||||||
|
// existing start_slot
|
||||||
|
assert_eq!(
|
||||||
|
AncestorIterator::new_inclusive(2, &blockstore).collect::<Vec<Slot>>(),
|
||||||
|
vec![2, 1, 0]
|
||||||
|
);
|
||||||
|
|
||||||
|
// non-existing start_slot
|
||||||
|
assert_eq!(
|
||||||
|
AncestorIterator::new_inclusive(3, &blockstore).collect::<Vec<Slot>>(),
|
||||||
|
vec![] as Vec<Slot>
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,7 +9,7 @@ use solana_client::{
|
||||||
use solana_core::{
|
use solana_core::{
|
||||||
broadcast_stage::BroadcastStageType,
|
broadcast_stage::BroadcastStageType,
|
||||||
cluster_info::VALIDATOR_PORT_RANGE,
|
cluster_info::VALIDATOR_PORT_RANGE,
|
||||||
consensus::{SWITCH_FORK_THRESHOLD, VOTE_THRESHOLD_DEPTH},
|
consensus::{Tower, SWITCH_FORK_THRESHOLD, VOTE_THRESHOLD_DEPTH},
|
||||||
gossip_service::discover_cluster,
|
gossip_service::discover_cluster,
|
||||||
validator::ValidatorConfig,
|
validator::ValidatorConfig,
|
||||||
};
|
};
|
||||||
|
@ -1370,18 +1370,19 @@ fn test_no_voting() {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_optimistic_confirmation_violation() {
|
#[serial]
|
||||||
|
fn test_optimistic_confirmation_violation_with_no_tower() {
|
||||||
solana_logger::setup();
|
solana_logger::setup();
|
||||||
let mut buf = BufferRedirect::stderr().unwrap();
|
let mut buf = BufferRedirect::stderr().unwrap();
|
||||||
// First set up the cluster with 2 nodes
|
// First set up the cluster with 2 nodes
|
||||||
let slots_per_epoch = 2048;
|
let slots_per_epoch = 2048;
|
||||||
let node_stakes = vec![50, 51];
|
let node_stakes = vec![51, 50];
|
||||||
let validator_keys: Vec<_> = iter::repeat_with(|| (Arc::new(Keypair::new()), true))
|
let validator_keys: Vec<_> = iter::repeat_with(|| (Arc::new(Keypair::new()), true))
|
||||||
.take(node_stakes.len())
|
.take(node_stakes.len())
|
||||||
.collect();
|
.collect();
|
||||||
let config = ClusterConfig {
|
let config = ClusterConfig {
|
||||||
cluster_lamports: 100_000,
|
cluster_lamports: 100_000,
|
||||||
node_stakes: vec![51, 50],
|
node_stakes: node_stakes.clone(),
|
||||||
validator_configs: vec![ValidatorConfig::default(); node_stakes.len()],
|
validator_configs: vec![ValidatorConfig::default(); node_stakes.len()],
|
||||||
validator_keys: Some(validator_keys),
|
validator_keys: Some(validator_keys),
|
||||||
slots_per_epoch,
|
slots_per_epoch,
|
||||||
|
@ -1415,7 +1416,9 @@ fn test_optimistic_confirmation_violation() {
|
||||||
|
|
||||||
// Mark fork as dead on the heavier validator, this should make the fork effectively
|
// Mark fork as dead on the heavier validator, this should make the fork effectively
|
||||||
// dead, even though it was optimistically confirmed. The smaller validator should
|
// dead, even though it was optimistically confirmed. The smaller validator should
|
||||||
// jump over to the new fork
|
// create and jump over to a new fork
|
||||||
|
// Also, remove saved tower to intentionally make the restarted validator to violate the
|
||||||
|
// optimistic confirmation
|
||||||
{
|
{
|
||||||
let blockstore = Blockstore::open_with_access_type(
|
let blockstore = Blockstore::open_with_access_type(
|
||||||
&exited_validator_info.info.ledger_path,
|
&exited_validator_info.info.ledger_path,
|
||||||
|
@ -1433,6 +1436,12 @@ fn test_optimistic_confirmation_violation() {
|
||||||
prev_voted_slot
|
prev_voted_slot
|
||||||
);
|
);
|
||||||
blockstore.set_dead_slot(prev_voted_slot).unwrap();
|
blockstore.set_dead_slot(prev_voted_slot).unwrap();
|
||||||
|
|
||||||
|
std::fs::remove_file(Tower::get_filename(
|
||||||
|
&exited_validator_info.info.ledger_path,
|
||||||
|
&entry_point_id,
|
||||||
|
))
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
cluster.restart_node(&entry_point_id, exited_validator_info);
|
cluster.restart_node(&entry_point_id, exited_validator_info);
|
||||||
|
|
||||||
|
@ -1465,6 +1474,220 @@ fn test_optimistic_confirmation_violation() {
|
||||||
assert!(output.contains(&expected_log));
|
assert!(output.contains(&expected_log));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[serial]
|
||||||
|
#[ignore]
|
||||||
|
fn test_no_optimistic_confirmation_violation_with_tower() {
|
||||||
|
solana_logger::setup();
|
||||||
|
let mut buf = BufferRedirect::stderr().unwrap();
|
||||||
|
|
||||||
|
// First set up the cluster with 2 nodes
|
||||||
|
let slots_per_epoch = 2048;
|
||||||
|
let node_stakes = vec![51, 50];
|
||||||
|
let validator_keys: Vec<_> = iter::repeat_with(|| (Arc::new(Keypair::new()), true))
|
||||||
|
.take(node_stakes.len())
|
||||||
|
.collect();
|
||||||
|
let config = ClusterConfig {
|
||||||
|
cluster_lamports: 100_000,
|
||||||
|
node_stakes: node_stakes.clone(),
|
||||||
|
validator_configs: vec![ValidatorConfig::default(); node_stakes.len()],
|
||||||
|
validator_keys: Some(validator_keys),
|
||||||
|
slots_per_epoch,
|
||||||
|
stakers_slot_offset: slots_per_epoch,
|
||||||
|
skip_warmup_slots: true,
|
||||||
|
..ClusterConfig::default()
|
||||||
|
};
|
||||||
|
let mut cluster = LocalCluster::new(&config);
|
||||||
|
let entry_point_id = cluster.entry_point_info.id;
|
||||||
|
// Let the nodes run for a while. Wait for validators to vote on slot `S`
|
||||||
|
// so that the vote on `S-1` is definitely in gossip and optimistic confirmation is
|
||||||
|
// detected on slot `S-1` for sure, then stop the heavier of the two
|
||||||
|
// validators
|
||||||
|
let client = cluster.get_validator_client(&entry_point_id).unwrap();
|
||||||
|
let mut prev_voted_slot = 0;
|
||||||
|
loop {
|
||||||
|
let last_voted_slot = client
|
||||||
|
.get_slot_with_commitment(CommitmentConfig::recent())
|
||||||
|
.unwrap();
|
||||||
|
if last_voted_slot > 50 {
|
||||||
|
if prev_voted_slot == 0 {
|
||||||
|
prev_voted_slot = last_voted_slot;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
}
|
||||||
|
|
||||||
|
let exited_validator_info = cluster.exit_node(&entry_point_id);
|
||||||
|
|
||||||
|
// Mark fork as dead on the heavier validator, this should make the fork effectively
|
||||||
|
// dead, even though it was optimistically confirmed. The smaller validator should
|
||||||
|
// create and jump over to a new fork
|
||||||
|
{
|
||||||
|
let blockstore = Blockstore::open_with_access_type(
|
||||||
|
&exited_validator_info.info.ledger_path,
|
||||||
|
AccessType::PrimaryOnly,
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.unwrap_or_else(|e| {
|
||||||
|
panic!(
|
||||||
|
"Failed to open ledger at {:?}, err: {}",
|
||||||
|
exited_validator_info.info.ledger_path, e
|
||||||
|
);
|
||||||
|
});
|
||||||
|
info!(
|
||||||
|
"Setting slot: {} on main fork as dead, should cause fork",
|
||||||
|
prev_voted_slot
|
||||||
|
);
|
||||||
|
blockstore.set_dead_slot(prev_voted_slot).unwrap();
|
||||||
|
}
|
||||||
|
cluster.restart_node(&entry_point_id, exited_validator_info);
|
||||||
|
|
||||||
|
cluster.check_no_new_roots(400, "test_no_optimistic_confirmation_violation_with_tower");
|
||||||
|
|
||||||
|
// Check to see that validator didn't detected optimistic confirmation for
|
||||||
|
// `prev_voted_slot` failed
|
||||||
|
let expected_log = format!("Optimistic slot {} was not rooted", prev_voted_slot);
|
||||||
|
let mut output = String::new();
|
||||||
|
buf.read_to_string(&mut output).unwrap();
|
||||||
|
assert!(!output.contains(&expected_log));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[serial]
|
||||||
|
fn test_validator_saves_tower() {
|
||||||
|
solana_logger::setup();
|
||||||
|
|
||||||
|
let validator_config = ValidatorConfig {
|
||||||
|
require_tower: true,
|
||||||
|
..ValidatorConfig::default()
|
||||||
|
};
|
||||||
|
let validator_identity_keypair = Arc::new(Keypair::new());
|
||||||
|
let validator_id = validator_identity_keypair.pubkey();
|
||||||
|
let config = ClusterConfig {
|
||||||
|
cluster_lamports: 10_000,
|
||||||
|
node_stakes: vec![100],
|
||||||
|
validator_configs: vec![validator_config],
|
||||||
|
validator_keys: Some(vec![(validator_identity_keypair.clone(), true)]),
|
||||||
|
..ClusterConfig::default()
|
||||||
|
};
|
||||||
|
let mut cluster = LocalCluster::new(&config);
|
||||||
|
|
||||||
|
let validator_client = cluster.get_validator_client(&validator_id).unwrap();
|
||||||
|
|
||||||
|
let ledger_path = cluster
|
||||||
|
.validators
|
||||||
|
.get(&validator_id)
|
||||||
|
.unwrap()
|
||||||
|
.info
|
||||||
|
.ledger_path
|
||||||
|
.clone();
|
||||||
|
|
||||||
|
// Wait for some votes to be generated
|
||||||
|
let mut last_replayed_root;
|
||||||
|
loop {
|
||||||
|
if let Ok(slot) = validator_client.get_slot_with_commitment(CommitmentConfig::recent()) {
|
||||||
|
trace!("current slot: {}", slot);
|
||||||
|
if slot > 2 {
|
||||||
|
// this will be the root next time a validator starts
|
||||||
|
last_replayed_root = slot;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(10));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop validator and check saved tower
|
||||||
|
let validator_info = cluster.exit_node(&validator_id);
|
||||||
|
let tower1 = Tower::restore(&ledger_path, &validator_id).unwrap();
|
||||||
|
trace!("tower1: {:?}", tower1);
|
||||||
|
assert_eq!(tower1.root(), Some(0));
|
||||||
|
|
||||||
|
// Restart the validator and wait for a new root
|
||||||
|
cluster.restart_node(&validator_id, validator_info);
|
||||||
|
let validator_client = cluster.get_validator_client(&validator_id).unwrap();
|
||||||
|
|
||||||
|
// Wait for the first root
|
||||||
|
loop {
|
||||||
|
if let Ok(root) = validator_client.get_slot_with_commitment(CommitmentConfig::root()) {
|
||||||
|
trace!("current root: {}", root);
|
||||||
|
if root > last_replayed_root + 1 {
|
||||||
|
last_replayed_root = root;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(50));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop validator, and check saved tower
|
||||||
|
let recent_slot = validator_client
|
||||||
|
.get_slot_with_commitment(CommitmentConfig::recent())
|
||||||
|
.unwrap();
|
||||||
|
let validator_info = cluster.exit_node(&validator_id);
|
||||||
|
let tower2 = Tower::restore(&ledger_path, &validator_id).unwrap();
|
||||||
|
trace!("tower2: {:?}", tower2);
|
||||||
|
assert_eq!(tower2.root(), Some(last_replayed_root));
|
||||||
|
last_replayed_root = recent_slot;
|
||||||
|
|
||||||
|
// Rollback saved tower to `tower1` to simulate a validator starting from a newer snapshot
|
||||||
|
// without having to wait for that snapshot to be generated in this test
|
||||||
|
tower1.save(&validator_identity_keypair).unwrap();
|
||||||
|
|
||||||
|
cluster.restart_node(&validator_id, validator_info);
|
||||||
|
let validator_client = cluster.get_validator_client(&validator_id).unwrap();
|
||||||
|
|
||||||
|
// Wait for a new root, demonstrating the validator was able to make progress from the older `tower1`
|
||||||
|
loop {
|
||||||
|
if let Ok(root) = validator_client.get_slot_with_commitment(CommitmentConfig::root()) {
|
||||||
|
trace!(
|
||||||
|
"current root: {}, last_replayed_root: {}",
|
||||||
|
root,
|
||||||
|
last_replayed_root
|
||||||
|
);
|
||||||
|
if root > last_replayed_root {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(50));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the new root is reflected in the saved tower state
|
||||||
|
let mut validator_info = cluster.exit_node(&validator_id);
|
||||||
|
let tower3 = Tower::restore(&ledger_path, &validator_id).unwrap();
|
||||||
|
trace!("tower3: {:?}", tower3);
|
||||||
|
assert!(tower3.root().unwrap() > last_replayed_root);
|
||||||
|
|
||||||
|
// Remove the tower file entirely and allow the validator to start without a tower. It will
|
||||||
|
// rebuild tower from its vote account contents
|
||||||
|
fs::remove_file(Tower::get_filename(&ledger_path, &validator_id)).unwrap();
|
||||||
|
validator_info.config.require_tower = false;
|
||||||
|
|
||||||
|
cluster.restart_node(&validator_id, validator_info);
|
||||||
|
let validator_client = cluster.get_validator_client(&validator_id).unwrap();
|
||||||
|
|
||||||
|
// Wait for a couple more slots to pass so another vote occurs
|
||||||
|
let current_slot = validator_client
|
||||||
|
.get_slot_with_commitment(CommitmentConfig::recent())
|
||||||
|
.unwrap();
|
||||||
|
loop {
|
||||||
|
if let Ok(slot) = validator_client.get_slot_with_commitment(CommitmentConfig::recent()) {
|
||||||
|
trace!("current_slot: {}, slot: {}", current_slot, slot);
|
||||||
|
if slot > current_slot + 1 {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sleep(Duration::from_millis(50));
|
||||||
|
}
|
||||||
|
|
||||||
|
cluster.close_preserve_ledgers();
|
||||||
|
|
||||||
|
let tower4 = Tower::restore(&ledger_path, &validator_id).unwrap();
|
||||||
|
trace!("tower4: {:?}", tower4);
|
||||||
|
// should tower4 advance 1 slot compared to tower3????
|
||||||
|
assert_eq!(tower4.root(), tower3.root().map(|s| s + 1));
|
||||||
|
}
|
||||||
|
|
||||||
fn wait_for_next_snapshot(
|
fn wait_for_next_snapshot(
|
||||||
cluster: &LocalCluster,
|
cluster: &LocalCluster,
|
||||||
snapshot_package_output_path: &Path,
|
snapshot_package_output_path: &Path,
|
||||||
|
|
|
@ -93,6 +93,7 @@ ledger_dir="$SOLANA_CONFIG_DIR"/bootstrap-validator
|
||||||
args+=(
|
args+=(
|
||||||
--enable-rpc-exit
|
--enable-rpc-exit
|
||||||
--enable-rpc-set-log-filter
|
--enable-rpc-set-log-filter
|
||||||
|
--require-tower
|
||||||
--ledger "$ledger_dir"
|
--ledger "$ledger_dir"
|
||||||
--rpc-port 8899
|
--rpc-port 8899
|
||||||
--snapshot-interval-slots 200
|
--snapshot-interval-slots 200
|
||||||
|
|
|
@ -228,6 +228,7 @@ default_arg --ledger "$ledger_dir"
|
||||||
default_arg --log -
|
default_arg --log -
|
||||||
default_arg --enable-rpc-exit
|
default_arg --enable-rpc-exit
|
||||||
default_arg --enable-rpc-set-log-filter
|
default_arg --enable-rpc-set-log-filter
|
||||||
|
default_arg --require-tower
|
||||||
|
|
||||||
if [[ -n $SOLANA_CUDA ]]; then
|
if [[ -n $SOLANA_CUDA ]]; then
|
||||||
program=$solana_validator_cuda
|
program=$solana_validator_cuda
|
||||||
|
|
|
@ -163,6 +163,9 @@ pub struct VoteState {
|
||||||
pub commission: u8,
|
pub commission: u8,
|
||||||
|
|
||||||
pub votes: VecDeque<Lockout>,
|
pub votes: VecDeque<Lockout>,
|
||||||
|
|
||||||
|
// This usually the last Lockout which was popped from self.votes.
|
||||||
|
// However, it can be arbitrary slot, when being used inside Tower
|
||||||
pub root_slot: Option<Slot>,
|
pub root_slot: Option<Slot>,
|
||||||
|
|
||||||
/// the signer for vote transactions
|
/// the signer for vote transactions
|
||||||
|
|
1
run.sh
1
run.sh
|
@ -104,6 +104,7 @@ args=(
|
||||||
--enable-rpc-exit
|
--enable-rpc-exit
|
||||||
--enable-rpc-transaction-history
|
--enable-rpc-transaction-history
|
||||||
--init-complete-file "$dataDir"/init-completed
|
--init-complete-file "$dataDir"/init-completed
|
||||||
|
--require-tower
|
||||||
)
|
)
|
||||||
solana-validator "${args[@]}" &
|
solana-validator "${args[@]}" &
|
||||||
validator=$!
|
validator=$!
|
||||||
|
|
|
@ -913,6 +913,10 @@ impl Bank {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn get_slot_history(&self) -> SlotHistory {
|
||||||
|
SlotHistory::from_account(&self.get_account(&sysvar::slot_history::id()).unwrap()).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
fn update_epoch_stakes(&mut self, leader_schedule_epoch: Epoch) {
|
fn update_epoch_stakes(&mut self, leader_schedule_epoch: Epoch) {
|
||||||
// update epoch_stakes cache
|
// update epoch_stakes cache
|
||||||
// if my parent didn't populate for this staker's epoch, we've
|
// if my parent didn't populate for this staker's epoch, we've
|
||||||
|
|
|
@ -63,7 +63,7 @@ impl SlotHistory {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn check(&self, slot: Slot) -> Check {
|
pub fn check(&self, slot: Slot) -> Check {
|
||||||
if slot >= self.next_slot {
|
if slot > self.newest() {
|
||||||
Check::Future
|
Check::Future
|
||||||
} else if slot < self.oldest() {
|
} else if slot < self.oldest() {
|
||||||
Check::TooOld
|
Check::TooOld
|
||||||
|
@ -77,6 +77,10 @@ impl SlotHistory {
|
||||||
pub fn oldest(&self) -> Slot {
|
pub fn oldest(&self) -> Slot {
|
||||||
self.next_slot.saturating_sub(MAX_ENTRIES)
|
self.next_slot.saturating_sub(MAX_ENTRIES)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn newest(&self) -> Slot {
|
||||||
|
self.next_slot - 1
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
@ -793,6 +793,12 @@ pub fn main() {
|
||||||
.takes_value(false)
|
.takes_value(false)
|
||||||
.help("Use CUDA"),
|
.help("Use CUDA"),
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
clap::Arg::with_name("require_tower")
|
||||||
|
.long("require-tower")
|
||||||
|
.takes_value(false)
|
||||||
|
.help("Refuse to start if saved tower state is not found"),
|
||||||
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("expected_genesis_hash")
|
Arg::with_name("expected_genesis_hash")
|
||||||
.long("expected-genesis-hash")
|
.long("expected-genesis-hash")
|
||||||
|
@ -1015,6 +1021,7 @@ pub fn main() {
|
||||||
let restricted_repair_only_mode = matches.is_present("restricted_repair_only_mode");
|
let restricted_repair_only_mode = matches.is_present("restricted_repair_only_mode");
|
||||||
|
|
||||||
let mut validator_config = ValidatorConfig {
|
let mut validator_config = ValidatorConfig {
|
||||||
|
require_tower: matches.is_present("require_tower"),
|
||||||
dev_halt_at_slot: value_t!(matches, "dev_halt_at_slot", Slot).ok(),
|
dev_halt_at_slot: value_t!(matches, "dev_halt_at_slot", Slot).ok(),
|
||||||
expected_genesis_hash: matches
|
expected_genesis_hash: matches
|
||||||
.value_of("expected_genesis_hash")
|
.value_of("expected_genesis_hash")
|
||||||
|
|
Loading…
Reference in New Issue