stops nodes from broadcasting slots twice (#30681)

https://github.com/solana-labs/solana/blob/94ef881de/core/src/progress_map.rs#L178
always returns true the first time around because retry_time is None.
So every slot is broadcasted twice.
This commit is contained in:
behzad nouri 2023-03-11 02:46:08 +00:00 committed by GitHub
parent 17b48edd7b
commit f9805b6fbb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 37 deletions

View File

@ -163,26 +163,22 @@ impl ValidatorStakeInfo {
pub const RETRANSMIT_BASE_DELAY_MS: u64 = 5_000;
pub const RETRANSMIT_BACKOFF_CAP: u32 = 6;
#[derive(Debug, Default)]
#[derive(Debug)]
pub struct RetransmitInfo {
pub retry_time: Option<Instant>,
pub retry_iteration: u32,
pub(crate) retry_time: Instant,
pub(crate) retry_iteration: u32,
}
impl RetransmitInfo {
pub fn reached_retransmit_threshold(&self) -> bool {
let backoff = std::cmp::min(self.retry_iteration, RETRANSMIT_BACKOFF_CAP);
let backoff_duration_ms = (1_u64 << backoff) * RETRANSMIT_BASE_DELAY_MS;
self.retry_time
.map(|time| time.elapsed().as_millis() > backoff_duration_ms.into())
.unwrap_or(true)
self.retry_time.elapsed().as_millis() > u128::from(backoff_duration_ms)
}
pub fn increment_retry_iteration(&mut self) {
if self.retry_time.is_some() {
self.retry_iteration += 1;
}
self.retry_time = Some(Instant::now());
self.retry_iteration = self.retry_iteration.saturating_add(1);
self.retry_time = Instant::now();
}
}
@ -250,7 +246,10 @@ impl ForkProgress {
total_epoch_stake,
..PropagatedStats::default()
},
retransmit_info: RetransmitInfo::default(),
retransmit_info: RetransmitInfo {
retry_time: Instant::now(),
retry_iteration: 0u32,
},
}
}

View File

@ -6840,7 +6840,7 @@ pub(crate) mod tests {
&mut progress,
);
let res = retransmit_slots_receiver.recv_timeout(Duration::from_millis(10));
assert!(res.is_ok(), "retry_iteration=0, retry_time=None");
assert_matches!(res, Err(_));
assert_eq!(
progress.get_retransmit_info(0).unwrap().retry_iteration,
0,
@ -6858,11 +6858,9 @@ pub(crate) mod tests {
"retry_iteration=0, elapsed < 2^0 * RETRANSMIT_BASE_DELAY_MS"
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Some(
Instant::now()
.checked_sub(Duration::from_millis(RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap(),
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Instant::now()
.checked_sub(Duration::from_millis(RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap();
ReplayStage::retransmit_latest_unpropagated_leader_slot(
&poh_recorder,
&retransmit_slots_sender,
@ -6890,11 +6888,9 @@ pub(crate) mod tests {
"retry_iteration=1, elapsed < 2^1 * RETRY_BASE_DELAY_MS"
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Some(
Instant::now()
.checked_sub(Duration::from_millis(RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap(),
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Instant::now()
.checked_sub(Duration::from_millis(RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap();
ReplayStage::retransmit_latest_unpropagated_leader_slot(
&poh_recorder,
&retransmit_slots_sender,
@ -6906,11 +6902,9 @@ pub(crate) mod tests {
"retry_iteration=1, elapsed < 2^1 * RETRANSMIT_BASE_DELAY_MS"
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Some(
Instant::now()
.checked_sub(Duration::from_millis(2 * RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap(),
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Instant::now()
.checked_sub(Duration::from_millis(2 * RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap();
ReplayStage::retransmit_latest_unpropagated_leader_slot(
&poh_recorder,
&retransmit_slots_sender,
@ -6933,11 +6927,9 @@ pub(crate) mod tests {
.unwrap()
.increment_retry_iteration();
progress.get_retransmit_info_mut(0).unwrap().retry_time = Some(
Instant::now()
.checked_sub(Duration::from_millis(2 * RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap(),
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Instant::now()
.checked_sub(Duration::from_millis(2 * RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap();
ReplayStage::retransmit_latest_unpropagated_leader_slot(
&poh_recorder,
&retransmit_slots_sender,
@ -6949,11 +6941,9 @@ pub(crate) mod tests {
"retry_iteration=3, elapsed < 2^3 * RETRANSMIT_BASE_DELAY_MS"
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Some(
Instant::now()
.checked_sub(Duration::from_millis(8 * RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap(),
);
progress.get_retransmit_info_mut(0).unwrap().retry_time = Instant::now()
.checked_sub(Duration::from_millis(8 * RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap();
ReplayStage::retransmit_latest_unpropagated_leader_slot(
&poh_recorder,
&retransmit_slots_sender,
@ -6995,6 +6985,10 @@ pub(crate) mod tests {
} = vote_simulator;
let (retransmit_slots_sender, retransmit_slots_receiver) = unbounded();
let retry_time = Instant::now()
.checked_sub(Duration::from_millis(RETRANSMIT_BASE_DELAY_MS + 1))
.unwrap();
progress.get_retransmit_info_mut(0).unwrap().retry_time = retry_time;
let mut prev_index = 0;
for i in (1..10).chain(11..15) {
@ -7020,6 +7014,7 @@ pub(crate) mod tests {
bank.freeze();
bank_forks.write().unwrap().insert(bank);
prev_index = i;
progress.get_retransmit_info_mut(i).unwrap().retry_time = retry_time;
}
// expect single slot when latest_leader_slot is the start of a consecutive range