diff --git a/core/src/consensus.rs b/core/src/consensus.rs index b40b4fa3e..059284f30 100644 --- a/core/src/consensus.rs +++ b/core/src/consensus.rs @@ -105,7 +105,7 @@ pub(crate) struct ComputedBankState { #[frozen_abi(digest = "GMs1FxKteU7K4ZFRofMBqNhBpM4xkPVxfYod6R8DQmpT")] #[derive(Clone, Serialize, Deserialize, Debug, PartialEq, AbiExample)] pub struct Tower { - pub(crate) node_pubkey: Pubkey, + pub node_pubkey: Pubkey, threshold_depth: usize, threshold_size: f64, vote_state: VoteState, @@ -432,6 +432,13 @@ impl Tower { self.record_bank_vote_and_update_lockouts(slot, hash, self.last_voted_slot()) } + /// Used for tests + pub fn increase_lockout(&mut self, confirmation_count_increase: u32) { + for vote in self.vote_state.votes.iter_mut() { + vote.confirmation_count += confirmation_count_increase; + } + } + pub fn last_voted_slot(&self) -> Option { self.last_vote.slots.last().copied() } diff --git a/local-cluster/src/local_cluster.rs b/local-cluster/src/local_cluster.rs index 748a03a71..6b254ffcb 100644 --- a/local-cluster/src/local_cluster.rs +++ b/local-cluster/src/local_cluster.rs @@ -66,6 +66,8 @@ pub struct ClusterConfig { pub validator_keys: Option, bool)>>, /// The stakes of each node pub node_stakes: Vec, + /// Optional vote keypairs to use for each node + pub node_vote_keys: Option>>, /// The total lamports available to the cluster pub cluster_lamports: u64, pub ticks_per_slot: u64, @@ -85,6 +87,7 @@ impl Default for ClusterConfig { num_listeners: 0, validator_keys: None, node_stakes: vec![], + node_vote_keys: None, cluster_lamports: 0, ticks_per_slot: DEFAULT_TICKS_PER_SLOT, slots_per_epoch: DEFAULT_DEV_SLOTS_PER_EPOCH, @@ -129,6 +132,7 @@ impl LocalCluster { pub fn new(config: &mut ClusterConfig, socket_addr_space: SocketAddrSpace) -> Self { assert_eq!(config.validator_configs.len(), config.node_stakes.len()); + let mut validator_keys = { if let Some(ref keys) = config.validator_keys { assert_eq!(config.validator_configs.len(), keys.len()); @@ -140,16 +144,29 @@ impl LocalCluster { } }; + let vote_keys = { + if let Some(ref node_vote_keys) = config.node_vote_keys { + assert_eq!(config.validator_configs.len(), node_vote_keys.len()); + node_vote_keys.clone() + } else { + iter::repeat_with(|| Arc::new(Keypair::new())) + .take(config.validator_configs.len()) + .collect() + } + }; + // Bootstrap leader should always be in genesis block validator_keys[0].1 = true; let (keys_in_genesis, stakes_in_genesis): (Vec, Vec) = validator_keys .iter() .zip(&config.node_stakes) - .filter_map(|((node_keypair, in_genesis), stake)| { + .zip(&vote_keys) + .filter_map(|(((node_keypair, in_genesis), stake), vote_keypair)| { info!( - "STARTING LOCAL CLUSTER: key {} has {} stake", + "STARTING LOCAL CLUSTER: key {} vote_key {} has {} stake", node_keypair.pubkey(), + vote_keypair.pubkey(), stake ); if *in_genesis { @@ -157,7 +174,8 @@ impl LocalCluster { ValidatorVoteKeypairs { node_keypair: Keypair::from_bytes(&node_keypair.to_bytes()) .unwrap(), - vote_keypair: Keypair::new(), + vote_keypair: Keypair::from_bytes(&vote_keypair.to_bytes()) + .unwrap(), stake_keypair: Keypair::new(), }, stake, @@ -171,6 +189,7 @@ impl LocalCluster { let leader_vote_keypair = &keys_in_genesis[0].vote_keypair; let leader_pubkey = leader_keypair.pubkey(); let leader_node = Node::new_localhost_with_pubkey(&leader_pubkey); + let GenesisConfigInfo { mut genesis_config, mint_keypair, diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index 4152d6ff6..d49dcc488 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -2,9 +2,10 @@ use { assert_matches::assert_matches, common::{ - create_custom_leader_schedule, last_vote_in_tower, ms_for_n_slots, open_blockstore, - purge_slots, remove_tower, restore_tower, run_cluster_partition, - run_kill_partition_switch_threshold, test_faulty_node, RUST_LOG_FILTER, + copy_blocks, create_custom_leader_schedule, last_vote_in_tower, ms_for_n_slots, + open_blockstore, purge_slots, remove_tower, restore_tower, run_cluster_partition, + run_kill_partition_switch_threshold, test_faulty_node, + wait_for_last_vote_in_tower_to_land_in_ledger, RUST_LOG_FILTER, }, crossbeam_channel::{unbounded, Receiver}, gag::BufferRedirect, @@ -1920,6 +1921,206 @@ fn root_in_tower(tower_path: &Path, node_pubkey: &Pubkey) -> Option { restore_tower(tower_path, node_pubkey).map(|tower| tower.root()) } +// This test verifies that even if votes from a validator end up taking too long to land, and thus +// some of the referenced slots are slots are no longer present in the slot hashes sysvar, +// consensus can still be attained. +// +// Validator A (60%) +// Validator B (40%) +// / --- 10 --- [..] --- 16 (B is voting, due to network issues is initally not able to see the other fork at all) +// / +// 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 (A votes 1 - 9 votes are landing normally. B does the same however votes are not landing) +// \ +// \--[..]-- 73 (majority fork) +// A is voting on the majority fork and B wants to switch to this fork however in this majority fork +// the earlier votes for B (1 - 9) never landed so when B eventually goes to vote on 73, slots in +// its local vote state are no longer present in slot hashes. +// +// 1. Wait for B's tower to see local vote state was updated to new fork +// 2. Wait X blocks, check B's vote state on chain has been properly updated +// +// NOTE: it is not reliable for B to organically have 1 to reach 2^16 lockout, so we simulate the 6 +// consecutive votes on the minor fork by manually incrementing the confirmation levels for the +// common ancestor votes in tower. +// To allow this test to run in a reasonable time we change the +// slot_hash expiry to 64 slots. + +#[test] +#[ignore] +fn test_slot_hash_expiry() { + solana_logger::setup_with_default(RUST_LOG_FILTER); + solana_sdk::slot_hashes::set_entries_for_tests_only(64); + + let slots_per_epoch = 2048; + let node_stakes = vec![60, 40]; + let validator_keys = vec![ + "28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4", + "2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8", + ] + .iter() + .map(|s| (Arc::new(Keypair::from_base58_string(s)), true)) + .collect::>(); + let node_vote_keys = vec![ + "3NDQ3ud86RTVg8hTy2dDWnS4P8NfjhZ2gDgQAJbr3heaKaUVS1FW3sTLKA1GmDrY9aySzsa4QxpDkbLv47yHxzr3", + "46ZHpHE6PEvXYPu3hf9iQqjBk2ZNDaJ9ejqKWHEjxaQjpAGasKaWKbKHbP3646oZhfgDRzx95DH9PCBKKsoCVngk", + ] + .iter() + .map(|s| Arc::new(Keypair::from_base58_string(s))) + .collect::>(); + let vs = validator_keys + .iter() + .map(|(kp, _)| kp.pubkey()) + .collect::>(); + let (a_pubkey, b_pubkey) = (vs[0], vs[1]); + + // We want B to not vote (we are trying to simulate its votes not landing until it gets to the + // minority fork) + let mut validator_configs = + make_identical_validator_configs(&ValidatorConfig::default(), node_stakes.len()); + validator_configs[1].voting_disabled = true; + + let mut config = ClusterConfig { + cluster_lamports: 100_000, + node_stakes, + validator_configs, + validator_keys: Some(validator_keys), + node_vote_keys: Some(node_vote_keys), + slots_per_epoch, + stakers_slot_offset: slots_per_epoch, + skip_warmup_slots: true, + ..ClusterConfig::default() + }; + let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified); + + let mut common_ancestor_slot = 8; + + let a_ledger_path = cluster.ledger_path(&a_pubkey); + let b_ledger_path = cluster.ledger_path(&b_pubkey); + + // Immediately kill B (we just needed it for the initial stake distribution) + info!("Killing B"); + let mut b_info = cluster.exit_node(&b_pubkey); + + // Let A run for a while until we get to the common ancestor + info!("Letting A run until common_ancestor_slot"); + loop { + if let Some((last_vote, _)) = last_vote_in_tower(&a_ledger_path, &a_pubkey) { + if last_vote >= common_ancestor_slot { + break; + } + } + sleep(Duration::from_millis(100)); + } + + // Keep A running, but setup B so that it thinks it has voted up until common ancestor (but + // doesn't know anything past that) + { + info!("Copying A's ledger to B"); + std::fs::remove_dir_all(&b_info.info.ledger_path).unwrap(); + let mut opt = fs_extra::dir::CopyOptions::new(); + opt.copy_inside = true; + fs_extra::dir::copy(&a_ledger_path, &b_ledger_path, &opt).unwrap(); + + // remove A's tower in B's new copied ledger + info!("Removing A's tower in B's ledger dir"); + remove_tower(&b_ledger_path, &a_pubkey); + + // load A's tower and save it as B's tower + info!("Loading A's tower"); + if let Some(mut a_tower) = restore_tower(&a_ledger_path, &a_pubkey) { + a_tower.node_pubkey = b_pubkey; + // Update common_ancestor_slot because A is still running + if let Some(s) = a_tower.last_voted_slot() { + common_ancestor_slot = s; + info!("New common_ancestor_slot {}", common_ancestor_slot); + } else { + panic!("A's tower has no votes"); + } + info!("Increase lockout by 6 confirmation levels and save as B's tower"); + a_tower.increase_lockout(6); + save_tower(&b_ledger_path, &a_tower, &b_info.info.keypair); + info!("B's new tower: {:?}", a_tower.tower_slots()); + } else { + panic!("A's tower is missing"); + } + + // Get rid of any slots past common_ancestor_slot + info!("Removing extra slots from B's blockstore"); + let blockstore = open_blockstore(&b_ledger_path); + purge_slots(&blockstore, common_ancestor_slot + 1, 100); + } + + info!( + "Run A on majority fork until it reaches slot hash expiry {}", + solana_sdk::slot_hashes::get_entries() + ); + let mut last_vote_on_a; + // Keep A running for a while longer so the majority fork has some decent size + loop { + last_vote_on_a = wait_for_last_vote_in_tower_to_land_in_ledger(&a_ledger_path, &a_pubkey); + if last_vote_on_a + >= common_ancestor_slot + 2 * (solana_sdk::slot_hashes::get_entries() as u64) + { + let blockstore = open_blockstore(&a_ledger_path); + info!( + "A majority fork: {:?}", + AncestorIterator::new(last_vote_on_a, &blockstore).collect::>() + ); + break; + } + sleep(Duration::from_millis(100)); + } + + // Kill A and restart B with voting. B should now fork off + info!("Killing A"); + let a_info = cluster.exit_node(&a_pubkey); + + info!("Restarting B"); + b_info.config.voting_disabled = false; + cluster.restart_node(&b_pubkey, b_info, SocketAddrSpace::Unspecified); + + // B will fork off and accumulate enough lockout + info!("Allowing B to fork"); + loop { + let blockstore = open_blockstore(&b_ledger_path); + let last_vote = wait_for_last_vote_in_tower_to_land_in_ledger(&b_ledger_path, &b_pubkey); + let mut ancestors = AncestorIterator::new(last_vote, &blockstore); + if let Some(index) = ancestors.position(|x| x == common_ancestor_slot) { + if index > 7 { + info!( + "B has forked for enough lockout: {:?}", + AncestorIterator::new(last_vote, &blockstore).collect::>() + ); + break; + } + } + sleep(Duration::from_millis(1000)); + } + + info!("Kill B"); + b_info = cluster.exit_node(&b_pubkey); + + info!("Resolve the partition"); + { + // Here we let B know about the missing blocks that A had produced on its partition + let a_blockstore = open_blockstore(&a_ledger_path); + let b_blockstore = open_blockstore(&b_ledger_path); + copy_blocks(last_vote_on_a, &a_blockstore, &b_blockstore); + } + + // Now restart A and B and see if B is able to eventually switch onto the majority fork + info!("Restarting A & B"); + cluster.restart_node(&a_pubkey, a_info, SocketAddrSpace::Unspecified); + cluster.restart_node(&b_pubkey, b_info, SocketAddrSpace::Unspecified); + + info!("Waiting for B to switch to majority fork and make a root"); + cluster_tests::check_for_new_roots( + 16, + &[cluster.get_contact_info(&a_pubkey).unwrap().clone()], + "test_slot_hashes_expiry", + ); +} + enum ClusterMode { MasterOnly, MasterSlave, diff --git a/sdk/program/src/slot_hashes.rs b/sdk/program/src/slot_hashes.rs index ae9efd7c5..6fafbd7ee 100644 --- a/sdk/program/src/slot_hashes.rs +++ b/sdk/program/src/slot_hashes.rs @@ -7,9 +7,23 @@ use { std::{iter::FromIterator, ops::Deref}, }; +pub use crate::clock::Slot; + pub const MAX_ENTRIES: usize = 512; // about 2.5 minutes to get your vote in -pub use crate::clock::Slot; +// This is to allow tests with custom slot hash expiry to avoid having to generate +// 512 blocks for such tests. +static mut NUM_ENTRIES: usize = MAX_ENTRIES; + +pub fn get_entries() -> usize { + unsafe { NUM_ENTRIES } +} + +pub fn set_entries_for_tests_only(_entries: usize) { + unsafe { + NUM_ENTRIES = _entries; + } +} pub type SlotHash = (Slot, Hash); @@ -23,7 +37,7 @@ impl SlotHashes { Ok(index) => (self.0)[index] = (slot, hash), Err(index) => (self.0).insert(index, (slot, hash)), } - (self.0).truncate(MAX_ENTRIES); + (self.0).truncate(get_entries()); } pub fn position(&self, slot: &Slot) -> Option { self.binary_search_by(|(probe, _)| slot.cmp(probe)).ok()