Remove banks in locktower not in bank_forks (#5837)

* Remove unnecessary calculations from collect_vote_lockouts * Add test for locktower startup from snapshot
2019-09-10 13:58:27 -07:00 · 2019-09-10 13:58:27 -07:00 · ee4266bc59
parent 294d531e0b
commit ee4266bc59
4 changed files with 93 additions and 86 deletions
--- a/core/src/bank_forks.rs
+++ b/core/src/bank_forks.rs
@ -58,8 +58,14 @@ impl BankForks {
    /// Create a map of bank slot id to the set of ancestors for the bank slot.
    pub fn ancestors(&self) -> HashMap<u64, HashSet<u64>> {
        let mut ancestors = HashMap::new();
+        let root = self.root;
        for bank in self.banks.values() {
-            let mut set: HashSet<u64> = bank.ancestors.keys().cloned().collect();
+            let mut set: HashSet<u64> = bank
+                .ancestors
+                .keys()
+                .filter(|k| **k >= root)
+                .cloned()
+                .collect();
            set.remove(&bank.slot());
            ancestors.insert(bank.slot(), set);
        }
--- a/core/src/confidence.rs
+++ b/core/src/confidence.rs
@ -74,6 +74,9 @@ impl ForkConfidenceCache {
    }

    pub fn prune_confidence_cache(&mut self, ancestors: &HashMap<u64, HashSet<u64>>, root: u64) {
+        // For Every slot `s` in this cache must exist some bank `b` in BankForks with
+        // `b.slot() == s`, and because `ancestors` has an entry for every bank in BankForks,
+        // then there must be an entry in `ancestors` for every slot in `self.confidence`
        self.confidence
            .retain(|slot, _| slot == &root || ancestors[&slot].contains(&root));
    }
--- a/core/src/consensus.rs
+++ b/core/src/consensus.rs
@ -275,7 +275,9 @@ impl Tower {
        if let Some(root) = lockouts.root_slot {
            // This case should never happen because bank forks purges all
            // non-descendants of the root every time root is set
-            assert!(ancestors[&slot].contains(&root));
+            if slot != root {
+                assert!(ancestors[&slot].contains(&root));
+            }
        }

        false
@ -328,8 +330,15 @@ impl Tower {
        vote: &Lockout,
        ancestors: &HashMap<u64, HashSet<u64>>,
    ) {
+        // If there's no ancestors, that means this slot must be from before the current root,
+        // in which case the lockouts won't be calculated in bank_weight anyways, so ignore
+        // this slot
+        let vote_slot_ancestors = ancestors.get(&vote.slot);
+        if vote_slot_ancestors.is_none() {
+            return;
+        }
        let mut slot_with_ancestors = vec![vote.slot];
-        slot_with_ancestors.extend(ancestors.get(&vote.slot).unwrap_or(&HashSet::new()));
+        slot_with_ancestors.extend(vote_slot_ancestors.unwrap());
        for slot in slot_with_ancestors {
            let entry = &mut stake_lockouts.entry(slot).or_default();
            entry.lockout += vote.lockout();
@ -344,8 +353,15 @@ impl Tower {
        lamports: u64,
        ancestors: &HashMap<u64, HashSet<u64>>,
    ) {
+        // If there's no ancestors, that means this slot must be from before the current root,
+        // in which case the lockouts won't be calculated in bank_weight anyways, so ignore
+        // this slot
+        let vote_slot_ancestors = ancestors.get(&slot);
+        if vote_slot_ancestors.is_none() {
+            return;
+        }
        let mut slot_with_ancestors = vec![slot];
-        slot_with_ancestors.extend(ancestors.get(&slot).unwrap_or(&HashSet::new()));
+        slot_with_ancestors.extend(vote_slot_ancestors.unwrap());
        for slot in slot_with_ancestors {
            let entry = &mut stake_lockouts.entry(slot).or_default();
            entry.stake += lamports;
@ -381,9 +397,12 @@ impl Tower {
        vote_account_pubkey: &Pubkey,
    ) {
        if let Some(bank) = self.find_heaviest_bank(bank_forks) {
+            let root = bank_forks.root();
            if let Some((_stake, vote_account)) = bank.vote_accounts().get(vote_account_pubkey) {
-                let vote_state = VoteState::deserialize(&vote_account.data)
+                let mut vote_state = VoteState::deserialize(&vote_account.data)
                    .expect("vote_account isn't a VoteState?");
+                vote_state.root_slot = Some(root);
+                vote_state.votes.retain(|v| v.slot > root);
                trace!(
                    "{} lockouts initialized to {:?}",
                    self.node_pubkey,
--- a/local_cluster/tests/local_cluster.rs
+++ b/local_cluster/tests/local_cluster.rs
@ -16,7 +16,7 @@ use solana_runtime::{
    epoch_schedule::{EpochSchedule, MINIMUM_SLOTS_PER_EPOCH},
 };
 use solana_sdk::{client::SyncClient, clock, poh_config::PohConfig};
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
 use std::{
    collections::{HashMap, HashSet},
    fs,
@ -301,7 +301,7 @@ fn test_listener_startup() {
    assert_eq!(cluster_nodes.len(), 4);
 }

-/*#[allow(unused_attributes)]
+#[allow(unused_attributes)]
 #[test]
 #[serial]
 fn test_snapshot_restart_locktower() {
@ -314,17 +314,13 @@ fn test_snapshot_restart_locktower() {
    let validator_snapshot_test_config =
        setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);

-    let snapshot_package_output_path = &leader_snapshot_test_config
-        .validator_config
-        .snapshot_config
-        .as_ref()
-        .unwrap()
-        .snapshot_package_output_path;
-
    let config = ClusterConfig {
-        node_stakes: vec![10000],
+        node_stakes: vec![10000, 10],
        cluster_lamports: 100000,
-        validator_configs: vec![leader_snapshot_test_config.validator_config.clone()],
+        validator_configs: vec![
+            leader_snapshot_test_config.validator_config.clone(),
+            validator_snapshot_test_config.validator_config.clone(),
+        ],
        ..ClusterConfig::default()
    };

@ -332,65 +328,44 @@ fn test_snapshot_restart_locktower() {

    // Let the nodes run for a while, then stop one of the validators
    sleep(Duration::from_millis(5000));
-    cluster_tests::fullnode_exit(&local.entry_point_info, num_nodes);
-
-    trace!("Waiting for snapshot tar to be generated with slot",);
-    let tar = snapshot_utils::get_snapshot_tar_path(&snapshot_package_output_path);
-    loop {
-        if tar.exists() {
-            trace!("snapshot tar exists");
-            break;
-        }
-        sleep(Duration::from_millis(5000));
-    }
-
-    // Copy tar to validator's snapshot output directory
-    let validator_tar_path =
-        snapshot_utils::get_snapshot_tar_path(&validator_snapshot_test_config.snapshot_output_path);
-    fs::hard_link(tar, &validator_tar_path).unwrap();
-    let slot_floor = snapshot_utils::bank_slot_from_archive(&validator_tar_path).unwrap();
-
-    // Start up a new node from a snapshot
-    let validator_stake = 5;
-    cluster.add_validator(
-        &validator_snapshot_test_config.validator_config,
-        validator_stake,
-    );
    let all_pubkeys = cluster.get_node_pubkeys();
    let validator_id = all_pubkeys
        .into_iter()
        .find(|x| *x != cluster.entry_point_info.id)
        .unwrap();
-    let validator_client = cluster.get_validator_client(&validator_id).unwrap();
-    let mut current_slot = 0;
+    let validator_info = cluster.exit_node(&validator_id);

-    // Let this validator run a while with repair
-    let target_slot = slot_floor + 40;
-    while current_slot <= target_slot {
-        trace!("current_slot: {}", current_slot);
-        if let Ok(slot) = validator_client.get_slot() {
-            current_slot = slot;
-        } else {
-            continue;
-        }
-        sleep(Duration::from_secs(1));
-    }
+    // Get slot after which this was generated
+    let snapshot_package_output_path = &leader_snapshot_test_config
+        .validator_config
+        .snapshot_config
+        .as_ref()
+        .unwrap()
+        .snapshot_package_output_path;
+    let tar = snapshot_utils::get_snapshot_tar_path(&snapshot_package_output_path);
+    wait_for_next_snapshot(&cluster, &tar);

-    // Check the validator ledger doesn't contain any slots < slot_floor
-    cluster.close_preserve_ledgers();
-    let validator_ledger_path = &cluster.fullnode_infos[&validator_id];
-    let blocktree = Blocktree::open(&validator_ledger_path.info.ledger_path).unwrap();
+    // Copy tar to validator's snapshot output directory
+    let validator_tar_path =
+        snapshot_utils::get_snapshot_tar_path(&validator_snapshot_test_config.snapshot_output_path);
+    fs::hard_link(tar, &validator_tar_path).unwrap();

-    // Skip the zeroth slot in blocktree that the ledger is initialized with
-    let (first_slot, _) = blocktree.slot_meta_iterator(1).unwrap().next().unwrap();
+    // Restart validator from snapshot, the validator's locktower state in this snapshot
+    // will contain slots < the root bank of the snapshot. Validator should not panic.
+    cluster.restart_node(&validator_id, validator_info);

-    assert_eq!(first_slot, slot_floor);
-}*/
+    // Test cluster can still make progress and get confirmations in locktower
+    cluster_tests::spend_and_verify_all_nodes(
+        &cluster.entry_point_info,
+        &cluster.funding_keypair,
+        1,
+        HashSet::new(),
+    );
+}

 #[allow(unused_attributes)]
 #[test]
 #[serial]
-#[ignore]
 fn test_snapshots_blocktree_floor() {
    // First set up the cluster with 1 snapshotting leader
    let snapshot_interval_slots = 10;
@ -519,30 +494,8 @@ fn test_snapshots_restart_validity() {

        expected_balances.extend(new_balances);

-        // Get slot after which this was generated
-        let client = cluster
-            .get_validator_client(&cluster.entry_point_info.id)
-            .unwrap();
-        let last_slot = client.get_slot().expect("Couldn't get slot");
-
-        // Wait for a snapshot for a bank >= last_slot to be made so we know that the snapshot
-        // must include the transactions just pushed
        let tar = snapshot_utils::get_snapshot_tar_path(&snapshot_package_output_path);
-        trace!(
-            "Waiting for snapshot tar to be generated with slot > {}",
-            last_slot
-        );
-        loop {
-            if tar.exists() {
-                trace!("snapshot tar exists");
-                let slot = snapshot_utils::bank_slot_from_archive(&tar).unwrap();
-                if slot >= last_slot {
-                    break;
-                }
-                trace!("snapshot tar slot {} < last_slot {}", slot, last_slot);
-            }
-            sleep(Duration::from_millis(5000));
-        }
+        wait_for_next_snapshot(&cluster, &tar);

        // Create new account paths since fullnode exit is not guaranteed to cleanup RPC threads,
        // which may delete the old accounts on exit at any point
@ -551,7 +504,7 @@ fn test_snapshots_restart_validity() {
        all_account_storage_dirs.push(new_account_storage_dirs);
        snapshot_test_config.validator_config.account_paths = Some(new_account_storage_paths);

-        // Restart a node
+        // Restart node
        trace!("Restarting cluster from snapshot");
        let nodes = cluster.get_node_pubkeys();
        cluster.exit_restart_node(&nodes[0], snapshot_test_config.validator_config.clone());
@ -735,6 +688,32 @@ fn run_repairman_catchup(num_repairmen: u64) {
    }
 }

+fn wait_for_next_snapshot<P: AsRef<Path>>(cluster: &LocalCluster, tar: P) {
+    // Get slot after which this was generated
+    let client = cluster
+        .get_validator_client(&cluster.entry_point_info.id)
+        .unwrap();
+    let last_slot = client.get_slot().expect("Couldn't get slot");
+
+    // Wait for a snapshot for a bank >= last_slot to be made so we know that the snapshot
+    // must include the transactions just pushed
+    trace!(
+        "Waiting for snapshot tar to be generated with slot > {}",
+        last_slot
+    );
+    loop {
+        if tar.as_ref().exists() {
+            trace!("snapshot tar exists");
+            let slot = snapshot_utils::bank_slot_from_archive(&tar).unwrap();
+            if slot >= last_slot {
+                break;
+            }
+            trace!("snapshot tar slot {} < last_slot {}", slot, last_slot);
+        }
+        sleep(Duration::from_millis(5000));
+    }
+}
+
 fn generate_account_paths(num_account_paths: usize) -> (Vec<TempDir>, String) {
    let account_storage_dirs: Vec<TempDir> = (0..num_account_paths)
        .map(|_| TempDir::new().unwrap())