Fixup flaky tests (#21617)

* Fixup flaky tests

* Fixup listeners
This commit is contained in:
carllin 2021-12-06 17:14:38 -05:00 committed by GitHub
parent e123883b26
commit f493a88258
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 114 additions and 38 deletions

View File

@ -281,7 +281,7 @@ impl LocalCluster {
let mut listener_config = safe_clone_config(&config.validator_configs[0]); let mut listener_config = safe_clone_config(&config.validator_configs[0]);
listener_config.voting_disabled = true; listener_config.voting_disabled = true;
(0..config.num_listeners).for_each(|_| { (0..config.num_listeners).for_each(|_| {
cluster.add_validator( cluster.add_validator_listener(
&listener_config, &listener_config,
0, 0,
Arc::new(Keypair::new()), Arc::new(Keypair::new()),
@ -324,11 +324,50 @@ impl LocalCluster {
} }
} }
/// Set up validator without voting or staking accounts
pub fn add_validator_listener(
&mut self,
validator_config: &ValidatorConfig,
stake: u64,
validator_keypair: Arc<Keypair>,
voting_keypair: Option<Arc<Keypair>>,
socket_addr_space: SocketAddrSpace,
) -> Pubkey {
self.do_add_validator(
validator_config,
true,
stake,
validator_keypair,
voting_keypair,
socket_addr_space,
)
}
/// Set up validator with voting and staking accounts
pub fn add_validator( pub fn add_validator(
&mut self, &mut self,
validator_config: &ValidatorConfig, validator_config: &ValidatorConfig,
stake: u64, stake: u64,
validator_keypair: Arc<Keypair>, validator_keypair: Arc<Keypair>,
voting_keypair: Option<Arc<Keypair>>,
socket_addr_space: SocketAddrSpace,
) -> Pubkey {
self.do_add_validator(
validator_config,
false,
stake,
validator_keypair,
voting_keypair,
socket_addr_space,
)
}
fn do_add_validator(
&mut self,
validator_config: &ValidatorConfig,
is_listener: bool,
stake: u64,
validator_keypair: Arc<Keypair>,
mut voting_keypair: Option<Arc<Keypair>>, mut voting_keypair: Option<Arc<Keypair>>,
socket_addr_space: SocketAddrSpace, socket_addr_space: SocketAddrSpace,
) -> Pubkey { ) -> Pubkey {
@ -347,30 +386,28 @@ impl LocalCluster {
let contact_info = validator_node.info.clone(); let contact_info = validator_node.info.clone();
let (ledger_path, _blockhash) = create_new_tmp_ledger!(&self.genesis_config); let (ledger_path, _blockhash) = create_new_tmp_ledger!(&self.genesis_config);
if validator_config.voting_disabled { // Give the validator some lamports to setup vote accounts
if is_listener {
// setup as a listener // setup as a listener
info!("listener {} ", validator_pubkey,); info!("listener {} ", validator_pubkey,);
} else { } else if should_create_vote_pubkey {
// Give the validator some lamports to setup vote accounts let validator_balance = Self::transfer_with_client(
if should_create_vote_pubkey { &client,
let validator_balance = Self::transfer_with_client( &self.funding_keypair,
&client, &validator_pubkey,
&self.funding_keypair, stake * 2 + 2,
&validator_pubkey, );
stake * 2 + 2, info!(
); "validator {} balance {}",
info!( validator_pubkey, validator_balance
"validator {} balance {}", );
validator_pubkey, validator_balance Self::setup_vote_and_stake_accounts(
); &client,
Self::setup_vote_and_stake_accounts( voting_keypair.as_ref().unwrap(),
&client, &validator_keypair,
voting_keypair.as_ref().unwrap(), stake,
&validator_keypair, )
stake, .unwrap();
)
.unwrap();
}
} }
let mut config = safe_clone_config(validator_config); let mut config = safe_clone_config(validator_config);

View File

@ -3190,13 +3190,27 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) = let (validator_a_pubkey, validator_b_pubkey, validator_c_pubkey) =
(validators[0], validators[1], validators[2]); (validators[0], validators[1], validators[2]);
// Disable voting on all validators other than validator B to ensure neither of the below two
// scenarios occur:
// 1. If the cluster immediately forks on restart while we're killing validators A and C,
// with Validator B on one side, and `A` and `C` on a heavier fork, it's possible that the lockouts
// on `A` and `C`'s latest votes do not extend past validator B's latest vote. Then validator B
// will be stuck unable to vote, but also unable generate a switching proof to the heavier fork.
//
// 2. Validator A doesn't vote past `next_slot_on_a` before we can kill it. This is essential
// because if validator A votes past `next_slot_on_a`, and then we copy over validator B's ledger
// below only for slots <= `next_slot_on_a`, validator A will not know how it's last vote chains
// to the otehr forks, and may violate switching proofs on restart.
let mut validator_configs =
make_identical_validator_configs(&ValidatorConfig::default(), node_stakes.len());
validator_configs[0].voting_disabled = true;
validator_configs[2].voting_disabled = true;
let mut config = ClusterConfig { let mut config = ClusterConfig {
cluster_lamports: 100_000, cluster_lamports: 100_000,
node_stakes: node_stakes.clone(), node_stakes,
validator_configs: make_identical_validator_configs( validator_configs,
&ValidatorConfig::default(),
node_stakes.len(),
),
validator_keys: Some(validator_keys), validator_keys: Some(validator_keys),
slots_per_epoch, slots_per_epoch,
stakers_slot_offset: slots_per_epoch, stakers_slot_offset: slots_per_epoch,
@ -3213,9 +3227,23 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
let val_b_ledger_path = cluster.ledger_path(&validator_b_pubkey); let val_b_ledger_path = cluster.ledger_path(&validator_b_pubkey);
let val_c_ledger_path = cluster.ledger_path(&validator_c_pubkey); let val_c_ledger_path = cluster.ledger_path(&validator_c_pubkey);
info!(
"val_a {} ledger path {:?}",
validator_a_pubkey, val_a_ledger_path
);
info!(
"val_b {} ledger path {:?}",
validator_b_pubkey, val_b_ledger_path
);
info!(
"val_c {} ledger path {:?}",
validator_c_pubkey, val_c_ledger_path
);
// Immediately kill validator A, and C // Immediately kill validator A, and C
let validator_a_info = cluster.exit_node(&validator_a_pubkey); info!("Exiting validators A and C");
let validator_c_info = cluster.exit_node(&validator_c_pubkey); let mut validator_a_info = cluster.exit_node(&validator_a_pubkey);
let mut validator_c_info = cluster.exit_node(&validator_c_pubkey);
// Step 1: // Step 1:
// Let validator B, (D) run for a while. // Let validator B, (D) run for a while.
@ -3224,7 +3252,8 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
let elapsed = now.elapsed(); let elapsed = now.elapsed();
assert!( assert!(
elapsed <= Duration::from_secs(30), elapsed <= Duration::from_secs(30),
"LocalCluster nodes failed to log enough tower votes in {} secs", "Validator B failed to vote on any slot >= {} in {} secs",
next_slot_on_a,
elapsed.as_secs() elapsed.as_secs()
); );
sleep(Duration::from_millis(100)); sleep(Duration::from_millis(100));
@ -3269,29 +3298,38 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
} }
// Step 3: // Step 3:
// Restart A so that it can vote for the slots in B's fork // Restart A with voting enabled so that it can vote on B's fork
// up to `next_slot_on_a`, thereby optimistcally confirming `next_slot_on_a`
info!("Restarting A"); info!("Restarting A");
validator_a_info.config.voting_disabled = false;
cluster.restart_node( cluster.restart_node(
&validator_a_pubkey, &validator_a_pubkey,
validator_a_info, validator_a_info,
SocketAddrSpace::Unspecified, SocketAddrSpace::Unspecified,
); );
info!("Waiting for A to vote"); info!("Waiting for A to vote on slot descended from slot `next_slot_on_a`");
let mut last_print = Instant::now(); let now = Instant::now();
loop { loop {
if let Some((last_vote_slot, _)) = if let Some((last_vote_slot, _)) =
last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey) last_vote_in_tower(&val_a_ledger_path, &validator_a_pubkey)
{ {
if last_vote_slot >= next_slot_on_a { if last_vote_slot >= next_slot_on_a {
info!("Validator A has caught up: {}", last_vote_slot); info!(
"Validator A has caught up and voted on slot: {}",
last_vote_slot
);
break; break;
} else if last_print.elapsed().as_secs() >= 10 {
info!("Validator A latest vote: {}", last_vote_slot);
last_print = Instant::now();
} }
} }
if now.elapsed().as_secs() >= 30 {
panic!(
"Validator A has not seen optimistic confirmation slot > {} in 30 seconds",
next_slot_on_a
);
}
sleep(Duration::from_millis(20)); sleep(Duration::from_millis(20));
} }
@ -3319,6 +3357,7 @@ fn do_test_optimistic_confirmation_violation_with_or_without_tower(with_tower: b
// Step 4: // Step 4:
// Run validator C only to make it produce and vote on its own fork. // Run validator C only to make it produce and vote on its own fork.
info!("Restart validator C again!!!"); info!("Restart validator C again!!!");
validator_c_info.config.voting_disabled = false;
cluster.restart_node( cluster.restart_node(
&validator_c_pubkey, &validator_c_pubkey,
validator_c_info, validator_c_info,