Refactor restart function in local cluster to support separate exit and restart functions (#5845)

This commit is contained in:
carllin 2019-09-08 17:53:34 -07:00 committed by GitHub
parent b35c022629
commit 7607800d47
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 158 additions and 53 deletions

View File

@ -1,9 +0,0 @@
use crate::validator::ValidatorConfig;
use solana_client::thin_client::ThinClient;
use solana_sdk::pubkey::Pubkey;
pub trait Cluster {
fn get_node_pubkeys(&self) -> Vec<Pubkey>;
fn get_validator_client(&self, pubkey: &Pubkey) -> Option<ThinClient>;
fn restart_node(&mut self, pubkey: Pubkey, config: &ValidatorConfig);
}

View File

@ -28,7 +28,6 @@ pub mod blocktree;
pub mod blockstream;
pub mod blockstream_service;
pub mod blocktree_processor;
pub mod cluster;
pub mod cluster_info;
pub mod cluster_info_repair_listener;
pub mod consensus;

View File

@ -0,0 +1,37 @@
use solana_client::thin_client::ThinClient;
use solana_core::contact_info::ContactInfo;
use solana_core::validator::ValidatorConfig;
use solana_sdk::pubkey::Pubkey;
use solana_sdk::signature::Keypair;
use std::path::PathBuf;
use std::sync::Arc;
pub struct ValidatorInfo {
pub keypair: Arc<Keypair>,
pub voting_keypair: Arc<Keypair>,
pub storage_keypair: Arc<Keypair>,
pub ledger_path: PathBuf,
pub contact_info: ContactInfo,
}
pub struct ClusterValidatorInfo {
pub info: ValidatorInfo,
pub config: ValidatorConfig,
}
impl ClusterValidatorInfo {
pub fn new(validator_info: ValidatorInfo, config: ValidatorConfig) -> Self {
Self {
info: validator_info,
config,
}
}
}
pub trait Cluster {
fn get_node_pubkeys(&self) -> Vec<Pubkey>;
fn get_validator_client(&self, pubkey: &Pubkey) -> Option<ThinClient>;
fn exit_node(&mut self, pubkey: &Pubkey) -> ClusterValidatorInfo;
fn restart_node(&mut self, pubkey: &Pubkey, cluster_validator_info: ClusterValidatorInfo);
fn exit_restart_node(&mut self, pubkey: &Pubkey, config: ValidatorConfig);
}

View File

@ -1,3 +1,4 @@
pub mod cluster;
pub mod cluster_tests;
pub mod local_cluster;

View File

@ -1,7 +1,7 @@
use crate::cluster::{Cluster, ClusterValidatorInfo, ValidatorInfo};
use solana_client::thin_client::{create_client, ThinClient};
use solana_core::{
blocktree::create_new_tmp_ledger,
cluster::Cluster,
cluster_info::{Node, FULLNODE_PORT_RANGE},
contact_info::ContactInfo,
genesis_utils::{create_genesis_block_with_leader, GenesisBlockInfo},
@ -33,14 +33,6 @@ use std::{
sync::Arc,
};
pub struct ValidatorInfo {
pub keypair: Arc<Keypair>,
pub voting_keypair: Arc<Keypair>,
pub storage_keypair: Arc<Keypair>,
pub ledger_path: PathBuf,
pub contact_info: ContactInfo,
}
pub struct ReplicatorInfo {
pub replicator_storage_pubkey: Pubkey,
pub ledger_path: PathBuf,
@ -55,20 +47,6 @@ impl ReplicatorInfo {
}
}
pub struct ClusterValidatorInfo {
pub info: ValidatorInfo,
pub config: ValidatorConfig,
}
impl ClusterValidatorInfo {
pub fn new(validator_info: ValidatorInfo, config: ValidatorConfig) -> Self {
Self {
info: validator_info,
config,
}
}
}
#[derive(Clone, Debug)]
pub struct ClusterConfig {
/// The fullnode config that should be applied to every node in the cluster
@ -585,27 +563,32 @@ impl Cluster for LocalCluster {
})
}
fn restart_node(&mut self, pubkey: Pubkey, config: &ValidatorConfig) {
// Shut down the fullnode
fn exit_node(&mut self, pubkey: &Pubkey) -> ClusterValidatorInfo {
let mut node = self.fullnodes.remove(&pubkey).unwrap();
// Shut down the fullnode
node.exit();
node.join().unwrap();
self.fullnode_infos.remove(&pubkey).unwrap()
}
fn restart_node(&mut self, pubkey: &Pubkey, mut cluster_validator_info: ClusterValidatorInfo) {
// Update the stored ContactInfo for this node
let node_pubkey = &self.fullnode_infos[&pubkey].info.keypair.pubkey();
let node = Node::new_localhost_with_pubkey(&node_pubkey);
self.fullnode_infos
.get_mut(&pubkey)
.unwrap()
.info
.contact_info = node.info.clone();
if pubkey == self.entry_point_info.id {
self.entry_point_info = node.info.clone();
}
let node = Node::new_localhost_with_pubkey(&pubkey);
cluster_validator_info.info.contact_info = node.info.clone();
let entry_point_info = {
if *pubkey == self.entry_point_info.id {
self.entry_point_info = node.info.clone();
None
} else {
Some(&self.entry_point_info)
}
};
// Restart the node
self.fullnode_infos.get_mut(&pubkey).unwrap().config = config.clone();
let fullnode_info = &self.fullnode_infos[&pubkey].info;
let fullnode_info = &cluster_validator_info.info;
let restarted_node = Validator::new(
node,
@ -614,12 +597,19 @@ impl Cluster for LocalCluster {
&fullnode_info.voting_keypair.pubkey(),
&fullnode_info.voting_keypair,
&fullnode_info.storage_keypair,
None,
entry_point_info,
true,
config,
&cluster_validator_info.config,
);
self.fullnodes.insert(pubkey, restarted_node);
self.fullnodes.insert(*pubkey, restarted_node);
self.fullnode_infos.insert(*pubkey, cluster_validator_info);
}
fn exit_restart_node(&mut self, pubkey: &Pubkey, validator_config: ValidatorConfig) {
let mut cluster_validator_info = self.exit_node(pubkey);
cluster_validator_info.config = validator_config;
self.restart_node(pubkey, cluster_validator_info);
}
}

View File

@ -4,8 +4,9 @@ use log::*;
use serial_test_derive::serial;
use solana_core::{
bank_forks::SnapshotConfig, blocktree::Blocktree, broadcast_stage::BroadcastStageType,
cluster::Cluster, gossip_service::discover_cluster, snapshot_utils, validator::ValidatorConfig,
gossip_service::discover_cluster, snapshot_utils, validator::ValidatorConfig,
};
use solana_local_cluster::cluster::Cluster;
use solana_local_cluster::{
cluster_tests,
local_cluster::{ClusterConfig, LocalCluster},
@ -270,7 +271,7 @@ fn test_restart_node() {
clock::DEFAULT_TICKS_PER_SLOT,
slots_per_epoch,
);
cluster.restart_node(nodes[0], &validator_config);
cluster.exit_restart_node(&nodes[0], validator_config);
cluster_tests::sleep_n_epochs(
0.5,
&cluster.genesis_block.poh_config,
@ -300,6 +301,92 @@ fn test_listener_startup() {
assert_eq!(cluster_nodes.len(), 4);
}
/*#[allow(unused_attributes)]
#[test]
#[serial]
fn test_snapshot_restart_locktower() {
// First set up the cluster with 2 nodes
let snapshot_interval_slots = 10;
let num_account_paths = 4;
let leader_snapshot_test_config =
setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
let validator_snapshot_test_config =
setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
let snapshot_package_output_path = &leader_snapshot_test_config
.validator_config
.snapshot_config
.as_ref()
.unwrap()
.snapshot_package_output_path;
let config = ClusterConfig {
node_stakes: vec![10000],
cluster_lamports: 100000,
validator_configs: vec![leader_snapshot_test_config.validator_config.clone()],
..ClusterConfig::default()
};
let mut cluster = LocalCluster::new(&config);
// Let the nodes run for a while, then stop one of the validators
sleep(Duration::from_millis(5000));
cluster_tests::fullnode_exit(&local.entry_point_info, num_nodes);
trace!("Waiting for snapshot tar to be generated with slot",);
let tar = snapshot_utils::get_snapshot_tar_path(&snapshot_package_output_path);
loop {
if tar.exists() {
trace!("snapshot tar exists");
break;
}
sleep(Duration::from_millis(5000));
}
// Copy tar to validator's snapshot output directory
let validator_tar_path =
snapshot_utils::get_snapshot_tar_path(&validator_snapshot_test_config.snapshot_output_path);
fs::hard_link(tar, &validator_tar_path).unwrap();
let slot_floor = snapshot_utils::bank_slot_from_archive(&validator_tar_path).unwrap();
// Start up a new node from a snapshot
let validator_stake = 5;
cluster.add_validator(
&validator_snapshot_test_config.validator_config,
validator_stake,
);
let all_pubkeys = cluster.get_node_pubkeys();
let validator_id = all_pubkeys
.into_iter()
.find(|x| *x != cluster.entry_point_info.id)
.unwrap();
let validator_client = cluster.get_validator_client(&validator_id).unwrap();
let mut current_slot = 0;
// Let this validator run a while with repair
let target_slot = slot_floor + 40;
while current_slot <= target_slot {
trace!("current_slot: {}", current_slot);
if let Ok(slot) = validator_client.get_slot() {
current_slot = slot;
} else {
continue;
}
sleep(Duration::from_secs(1));
}
// Check the validator ledger doesn't contain any slots < slot_floor
cluster.close_preserve_ledgers();
let validator_ledger_path = &cluster.fullnode_infos[&validator_id];
let blocktree = Blocktree::open(&validator_ledger_path.info.ledger_path).unwrap();
// Skip the zeroth slot in blocktree that the ledger is initialized with
let (first_slot, _) = blocktree.slot_meta_iterator(1).unwrap().next().unwrap();
assert_eq!(first_slot, slot_floor);
}*/
#[allow(unused_attributes)]
#[test]
#[serial]
@ -467,7 +554,7 @@ fn test_snapshots_restart_validity() {
// Restart a node
trace!("Restarting cluster from snapshot");
let nodes = cluster.get_node_pubkeys();
cluster.restart_node(nodes[0], &snapshot_test_config.validator_config);
cluster.exit_restart_node(&nodes[0], snapshot_test_config.validator_config.clone());
// Verify account balances on validator
trace!("Verifying balances");