Enable more fine-grained control in partition tests (#10418)

Co-authored-by: Carl <carl@solana.com>
This commit is contained in:
carllin 2020-06-04 23:32:53 -07:00 committed by GitHub
parent aa6832964c
commit db82d9e914
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 80 additions and 71 deletions

View File

@ -284,7 +284,7 @@ pub fn kill_entry_and_spend_and_verify_rest(
}
}
pub fn check_for_new_roots(num_new_roots: usize, contact_infos: &[ContactInfo]) {
pub fn check_for_new_roots(num_new_roots: usize, contact_infos: &[ContactInfo], test_name: &str) {
let mut roots = vec![HashSet::new(); contact_infos.len()];
let mut done = false;
let mut last_print = Instant::now();
@ -295,7 +295,7 @@ pub fn check_for_new_roots(num_new_roots: usize, contact_infos: &[ContactInfo])
roots[i].insert(slot);
let min_node = roots.iter().map(|r| r.len()).min().unwrap_or(0);
if last_print.elapsed().as_secs() > 3 {
info!("PARTITION_TEST min observed roots {}/16", min_node);
info!("{} min observed roots {}/16", test_name, min_node);
last_print = Instant::now();
}
done = min_node >= num_new_roots;

View File

@ -1,4 +1,7 @@
use crate::cluster::{Cluster, ClusterValidatorInfo, ValidatorInfo};
use crate::{
cluster::{Cluster, ClusterValidatorInfo, ValidatorInfo},
cluster_tests,
};
use itertools::izip;
use log::*;
use solana_client::thin_client::{create_client, ThinClient};
@ -137,7 +140,7 @@ impl LocalCluster {
OperatingMode::Stable | OperatingMode::Preview => {
genesis_config.native_instruction_processors =
solana_genesis_programs::get_programs(genesis_config.operating_mode, 0)
.unwrap_or_else(|| vec![])
.unwrap_or_default()
}
_ => (),
}
@ -336,6 +339,25 @@ impl LocalCluster {
Self::transfer_with_client(&client, source_keypair, dest_pubkey, lamports)
}
pub fn check_for_new_roots(&self, num_new_roots: usize, test_name: &str) {
let alive_node_contact_infos: Vec<_> = self
.validators
.values()
.map(|v| v.info.contact_info.clone())
.collect();
assert!(!alive_node_contact_infos.is_empty());
info!("{} discovering nodes", test_name);
let cluster_nodes = discover_cluster(
&alive_node_contact_infos[0].gossip,
alive_node_contact_infos.len(),
)
.unwrap();
info!("{} discovered {} nodes", test_name, cluster_nodes.len());
info!("{} looking for new roots on all nodes", test_name);
cluster_tests::check_for_new_roots(num_new_roots, &alive_node_contact_infos, test_name);
info!("{} done waiting for roots", test_name);
}
fn transfer_with_client(
client: &ThinClient,
source_keypair: &Keypair,

View File

@ -207,16 +207,21 @@ fn test_leader_failure_4() {
/// * `leader_schedule` - An option that specifies whether the cluster should
/// run with a fixed, predetermined leader schedule
#[allow(clippy::cognitive_complexity)]
fn run_cluster_partition(
partitions: &[&[(usize, bool)]],
fn run_cluster_partition<E, F>(
partitions: &[&[usize]],
leader_schedule: Option<(LeaderSchedule, Vec<Arc<Keypair>>)>,
) {
on_partition_start: E,
on_partition_resolved: F,
) where
E: Fn(&mut LocalCluster) -> (),
F: Fn(&mut LocalCluster) -> (),
{
solana_logger::setup();
info!("PARTITION_TEST!");
let num_nodes = partitions.len();
let node_stakes: Vec<_> = partitions
.iter()
.flat_map(|p| p.iter().map(|(stake_weight, _)| 100 * *stake_weight as u64))
.flat_map(|p| p.iter().map(|stake_weight| 100 * *stake_weight as u64))
.collect();
assert_eq!(node_stakes.len(), num_nodes);
let cluster_lamports = node_stakes.iter().sum::<u64>() * 2;
@ -226,7 +231,7 @@ fn run_cluster_partition(
validator_config.enable_partition = Some(enable_partition.clone());
// Returns:
// 1) The keys for the validiators
// 1) The keys for the validators
// 2) The amount of time it would take to iterate through one full iteration of the given
// leader schedule
let (validator_keys, leader_schedule_time): (Vec<_>, u64) = {
@ -252,7 +257,6 @@ fn run_cluster_partition(
}
};
let validator_pubkeys: Vec<_> = validator_keys.iter().map(|v| v.pubkey()).collect();
let config = ClusterConfig {
cluster_lamports,
node_stakes,
@ -287,7 +291,8 @@ fn run_cluster_partition(
if reached_epoch {
info!("PARTITION_TEST start partition");
enable_partition.clone().store(false, Ordering::Relaxed);
enable_partition.store(false, Ordering::Relaxed);
on_partition_start(&mut cluster);
break;
} else {
sleep(Duration::from_millis(100));
@ -298,56 +303,22 @@ fn run_cluster_partition(
info!("PARTITION_TEST remove partition");
enable_partition.store(true, Ordering::Relaxed);
let mut dead_nodes = HashSet::new();
let mut alive_node_contact_infos = vec![];
let should_exits: Vec<_> = partitions
.iter()
.flat_map(|p| p.iter().map(|(_, should_exit)| should_exit))
.collect();
assert_eq!(should_exits.len(), validator_pubkeys.len());
let timeout = 10;
if timeout > 0 {
// Give partitions time to propagate their blocks from during the partition
// after the partition resolves
let propagation_time = leader_schedule_time;
info!("PARTITION_TEST resolving partition. sleeping {}ms", timeout);
sleep(Duration::from_millis(10_000));
info!(
"PARTITION_TEST waiting for blocks to propagate after partition {}ms",
propagation_time
);
sleep(Duration::from_millis(propagation_time));
info!("PARTITION_TEST resuming normal operation");
for (pubkey, should_exit) in validator_pubkeys.iter().zip(should_exits) {
if *should_exit {
info!("Killing validator with id: {}", pubkey);
cluster.exit_node(pubkey);
dead_nodes.insert(*pubkey);
} else {
alive_node_contact_infos.push(
cluster
.validators
.get(pubkey)
.unwrap()
.info
.contact_info
.clone(),
);
}
}
}
assert_eq!(alive_node_contact_infos.is_empty(), false);
info!("PARTITION_TEST discovering nodes");
let cluster_nodes = discover_cluster(
&alive_node_contact_infos[0].gossip,
alive_node_contact_infos.len(),
)
.unwrap();
info!("PARTITION_TEST discovered {} nodes", cluster_nodes.len());
info!("PARTITION_TEST looking for new roots on all nodes");
cluster_tests::check_for_new_roots(16, &alive_node_contact_infos);
info!("PARTITION_TEST done waiting for roots");
// Give partitions time to propagate their blocks from during the partition
// after the partition resolves
let timeout = 10_000;
let propagation_time = leader_schedule_time;
info!(
"PARTITION_TEST resolving partition. sleeping {} ms",
timeout
);
sleep(Duration::from_millis(timeout));
info!(
"PARTITION_TEST waiting for blocks to propagate after partition {}ms",
propagation_time
);
sleep(Duration::from_millis(propagation_time));
info!("PARTITION_TEST resuming normal operation");
on_partition_resolved(&mut cluster);
}
#[allow(unused_attributes)]
@ -355,7 +326,11 @@ fn run_cluster_partition(
#[test]
#[serial]
fn test_cluster_partition_1_2() {
run_cluster_partition(&[&[(1, false)], &[(1, false), (1, false)]], None)
let empty = |_: &mut LocalCluster| {};
let on_partition_resolved = |cluster: &mut LocalCluster| {
cluster.check_for_new_roots(16, &"PARTITION_TEST");
};
run_cluster_partition(&[&[1], &[1, 1]], None, empty, on_partition_resolved)
}
#[allow(unused_attributes)]
@ -363,13 +338,21 @@ fn test_cluster_partition_1_2() {
#[test]
#[serial]
fn test_cluster_partition_1_1() {
run_cluster_partition(&[&[(1, false)], &[(1, false)]], None)
let empty = |_: &mut LocalCluster| {};
let on_partition_resolved = |cluster: &mut LocalCluster| {
cluster.check_for_new_roots(16, &"PARTITION_TEST");
};
run_cluster_partition(&[&[1], &[1]], None, empty, on_partition_resolved)
}
#[test]
#[serial]
fn test_cluster_partition_1_1_1() {
run_cluster_partition(&[&[(1, false)], &[(1, false)], &[(1, false)]], None)
let empty = |_: &mut LocalCluster| {};
let on_partition_resolved = |cluster: &mut LocalCluster| {
cluster.check_for_new_roots(16, &"PARTITION_TEST");
};
run_cluster_partition(&[&[1], &[1], &[1]], None, empty, on_partition_resolved)
}
#[test]
@ -387,7 +370,7 @@ fn test_kill_partition() {
// 5) Check for recovery
let mut leader_schedule = vec![];
let num_slots_per_validator = 8;
let partitions: [&[(usize, bool)]; 3] = [&[(9, true)], &[(10, false)], &[(10, false)]];
let partitions: [&[usize]; 3] = [&[9], &[10], &[10]];
let validator_keys: Vec<_> = iter::repeat_with(|| Arc::new(Keypair::new()))
.take(partitions.len())
.collect();
@ -406,12 +389,21 @@ fn test_kill_partition() {
}
info!("leader_schedule: {}", leader_schedule.len());
let empty = |_: &mut LocalCluster| {};
let validator_to_kill = validator_keys[0].pubkey();
let on_partition_resolved = |cluster: &mut LocalCluster| {
info!("Killing validator with id: {}", validator_to_kill);
cluster.exit_node(&validator_to_kill);
cluster.check_for_new_roots(16, &"PARTITION_TEST");
};
run_cluster_partition(
&partitions,
Some((
LeaderSchedule::new_from_schedule(leader_schedule),
validator_keys,
)),
empty,
on_partition_resolved,
)
}
@ -1113,12 +1105,7 @@ fn test_faulty_node(faulty_node_type: BroadcastStageType) {
let cluster = LocalCluster::new(&cluster_config);
// Check for new roots
let alive_node_contact_infos: Vec<_> = cluster
.validators
.values()
.map(|v| v.info.contact_info.clone())
.collect();
cluster_tests::check_for_new_roots(16, &alive_node_contact_infos);
cluster.check_for_new_roots(16, &"test_faulty_node");
}
#[test]