wait-for-restart-window now indicates how far away the next restart window is

This commit is contained in:
Michael Vines 2021-04-02 18:19:15 -07:00
parent 4e5ef6bce2
commit c8c89dd5f7
1 changed files with 68 additions and 24 deletions

View File

@ -121,6 +121,7 @@ fn wait_for_restart_window(
let mut current_epoch = None; let mut current_epoch = None;
let mut leader_schedule = VecDeque::new(); let mut leader_schedule = VecDeque::new();
let mut restart_snapshot = None; let mut restart_snapshot = None;
let mut upcoming_idle_windows = vec![]; // Vec<(starting slot, idle window length in slots)>
let progress_bar = new_spinner_progress_bar(); let progress_bar = new_spinner_progress_bar();
let monitor_start_time = SystemTime::now(); let monitor_start_time = SystemTime::now();
@ -166,7 +167,32 @@ fn wait_for_restart_window(
.unwrap_or_default() .unwrap_or_default()
.into_iter() .into_iter()
.map(|slot_index| first_slot_in_epoch.saturating_add(slot_index as u64)) .map(|slot_index| first_slot_in_epoch.saturating_add(slot_index as u64))
.filter(|slot| *slot > epoch_info.absolute_slot)
.collect::<VecDeque<_>>(); .collect::<VecDeque<_>>();
upcoming_idle_windows.clear();
{
let mut leader_schedule = leader_schedule.clone();
let mut max_idle_window = 0;
let mut idle_window_start_slot = epoch_info.absolute_slot;
while let Some(next_leader_slot) = leader_schedule.pop_front() {
let idle_window = next_leader_slot - idle_window_start_slot;
max_idle_window = max_idle_window.max(idle_window);
if idle_window > min_idle_slots {
upcoming_idle_windows.push((idle_window_start_slot, idle_window));
}
idle_window_start_slot = next_leader_slot;
}
if upcoming_idle_windows.is_empty() {
return Err(format!(
"Validator has no idle window of at least {} slots. Largest idle window for epoch {} is {} slots",
min_idle_slots, epoch_info.epoch, max_idle_window
)
.into());
}
}
current_epoch = Some(epoch_info.epoch); current_epoch = Some(epoch_info.epoch);
} }
@ -175,17 +201,26 @@ fn wait_for_restart_window(
style("Node is unhealthy").red().to_string() style("Node is unhealthy").red().to_string()
} else { } else {
// Wait until a hole in the leader schedule before restarting the node // Wait until a hole in the leader schedule before restarting the node
let in_leader_schedule_hole = let in_leader_schedule_hole = if epoch_info.slot_index + min_idle_slots as u64
if epoch_info.slot_index + min_idle_slots as u64 > epoch_info.slots_in_epoch { > epoch_info.slots_in_epoch
{
Err("Current epoch is almost complete".to_string()) Err("Current epoch is almost complete".to_string())
} else { } else {
while leader_schedule while leader_schedule
.get(0) .get(0)
.map(|slot_index| *slot_index < epoch_info.absolute_slot) .map(|slot| *slot < epoch_info.absolute_slot)
.unwrap_or(false) .unwrap_or(false)
{ {
leader_schedule.pop_front(); leader_schedule.pop_front();
} }
while upcoming_idle_windows
.get(0)
.map(|(slot, _)| *slot < epoch_info.absolute_slot)
.unwrap_or(false)
{
upcoming_idle_windows.pop();
}
match leader_schedule.get(0) { match leader_schedule.get(0) {
None => { None => {
Ok(()) // Validator has no leader slots Ok(()) // Validator has no leader slots
@ -196,10 +231,19 @@ fn wait_for_restart_window(
if idle_slots >= min_idle_slots { if idle_slots >= min_idle_slots {
Ok(()) Ok(())
} else { } else {
Err(format!( Err(match upcoming_idle_windows.get(0) {
Some((starting_slot, length_in_slots)) => {
format!(
"Next idle window in {} slots, for {} slots",
starting_slot.saturating_sub(epoch_info.absolute_slot),
length_in_slots
)
}
None => format!(
"Validator will be leader soon. Next leader slot is {}", "Validator will be leader soon. Next leader slot is {}",
next_leader_slot next_leader_slot
)) ),
})
} }
} }
} }