Add stake breakdown to metrics for HeaviestForkFailures (#31067)

This commit is contained in:
Ashwin Sekar 2023-04-05 20:35:12 -06:00 committed by GitHub
parent 0ff8a09041
commit 85dbd3d94d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 103 additions and 37 deletions

View File

@ -39,11 +39,27 @@ use {
thiserror::Error,
};
#[derive(PartialEq, Eq, Clone, Copy, Debug, Default)]
pub enum ThresholdDecision {
#[default]
PassedThreshold,
FailedThreshold(/* Observed stake */ u64),
}
impl ThresholdDecision {
pub fn passed(&self) -> bool {
matches!(self, Self::PassedThreshold)
}
}
#[derive(PartialEq, Eq, Clone, Debug, AbiExample)]
pub enum SwitchForkDecision {
SwitchProof(Hash),
SameFork,
FailedSwitchThreshold(u64, u64),
FailedSwitchThreshold(
/* Switch proof stake */ u64,
/* Total stake */ u64,
),
FailedSwitchDuplicateRollback(Slot),
}
@ -978,12 +994,15 @@ impl Tower {
self.last_switch_threshold_check.is_none()
}
/// Performs threshold check for `slot`
///
/// If it passes the check returns None, otherwise returns Some(fork_stake)
pub fn check_vote_stake_threshold(
&self,
slot: Slot,
voted_stakes: &VotedStakes,
total_stake: Stake,
) -> bool {
) -> ThresholdDecision {
let mut vote_state = self.vote_state.clone();
process_slot_vote_unchecked(&mut vote_state, slot);
let vote = vote_state.nth_recent_vote(self.threshold_depth);
@ -999,16 +1018,20 @@ impl Tower {
if old_vote.slot() == vote.slot()
&& old_vote.confirmation_count() == vote.confirmation_count()
{
return true;
return ThresholdDecision::PassedThreshold;
}
}
}
lockout > self.threshold_size
if lockout > self.threshold_size {
return ThresholdDecision::PassedThreshold;
}
ThresholdDecision::FailedThreshold(*fork_stake)
} else {
false
// We haven't seen any votes on this fork yet, so no stake
ThresholdDecision::FailedThreshold(0)
}
} else {
true
ThresholdDecision::PassedThreshold
}
}
@ -2009,16 +2032,17 @@ pub mod test {
&node_pubkey,
&mut tower,
);
for slot in 46..=48 {
if slot == 48 {
assert!(results.get(&slot).unwrap().is_empty());
} else {
assert_eq!(
*results.get(&slot).unwrap(),
vec![HeaviestForkFailures::FailedSwitchThreshold(slot)]
);
}
}
assert_eq!(
*results.get(&46).unwrap(),
vec![HeaviestForkFailures::FailedSwitchThreshold(46, 0, 40000)]
);
assert_eq!(
*results.get(&47).unwrap(),
vec![HeaviestForkFailures::FailedSwitchThreshold(
47, 10000, 40000
)]
);
assert!(results.get(&48).unwrap().is_empty());
}
#[test]
@ -2202,7 +2226,7 @@ pub mod test {
fn test_check_vote_threshold_without_votes() {
let tower = Tower::new_for_tests(1, 0.67);
let stakes = vec![(0, 1)].into_iter().collect();
assert!(tower.check_vote_stake_threshold(0, &stakes, 2));
assert!(tower.check_vote_stake_threshold(0, &stakes, 2).passed());
}
#[test]
@ -2214,7 +2238,9 @@ pub mod test {
stakes.insert(i, 1);
tower.record_vote(i, Hash::default());
}
assert!(!tower.check_vote_stake_threshold(MAX_LOCKOUT_HISTORY as u64 + 1, &stakes, 2,));
assert!(!tower
.check_vote_stake_threshold(MAX_LOCKOUT_HISTORY as u64 + 1, &stakes, 2,)
.passed());
}
#[test]
@ -2329,14 +2355,14 @@ pub mod test {
let mut tower = Tower::new_for_tests(1, 0.67);
let stakes = vec![(0, 1)].into_iter().collect();
tower.record_vote(0, Hash::default());
assert!(!tower.check_vote_stake_threshold(1, &stakes, 2));
assert!(!tower.check_vote_stake_threshold(1, &stakes, 2).passed());
}
#[test]
fn test_check_vote_threshold_above_threshold() {
let mut tower = Tower::new_for_tests(1, 0.67);
let stakes = vec![(0, 2)].into_iter().collect();
tower.record_vote(0, Hash::default());
assert!(tower.check_vote_stake_threshold(1, &stakes, 2));
assert!(tower.check_vote_stake_threshold(1, &stakes, 2).passed());
}
#[test]
@ -2346,7 +2372,7 @@ pub mod test {
tower.record_vote(0, Hash::default());
tower.record_vote(1, Hash::default());
tower.record_vote(2, Hash::default());
assert!(tower.check_vote_stake_threshold(6, &stakes, 2));
assert!(tower.check_vote_stake_threshold(6, &stakes, 2).passed());
}
#[test]
@ -2354,7 +2380,7 @@ pub mod test {
let mut tower = Tower::new_for_tests(1, 0.67);
let stakes = HashMap::new();
tower.record_vote(0, Hash::default());
assert!(!tower.check_vote_stake_threshold(1, &stakes, 2));
assert!(!tower.check_vote_stake_threshold(1, &stakes, 2).passed());
}
#[test]
@ -2365,7 +2391,7 @@ pub mod test {
tower.record_vote(0, Hash::default());
tower.record_vote(1, Hash::default());
tower.record_vote(2, Hash::default());
assert!(tower.check_vote_stake_threshold(6, &stakes, 2,));
assert!(tower.check_vote_stake_threshold(6, &stakes, 2,).passed());
}
#[test]
@ -2483,7 +2509,9 @@ pub mod test {
|_| None,
&mut LatestValidatorVotesForFrozenBanks::default(),
);
assert!(tower.check_vote_stake_threshold(vote_to_evaluate, &voted_stakes, total_stake,));
assert!(tower
.check_vote_stake_threshold(vote_to_evaluate, &voted_stakes, total_stake,)
.passed());
// CASE 2: Now we want to evaluate a vote for slot VOTE_THRESHOLD_DEPTH + 1. This slot
// will expire the vote in one of the vote accounts, so we should have insufficient
@ -2501,7 +2529,9 @@ pub mod test {
|_| None,
&mut LatestValidatorVotesForFrozenBanks::default(),
);
assert!(!tower.check_vote_stake_threshold(vote_to_evaluate, &voted_stakes, total_stake,));
assert!(!tower
.check_vote_stake_threshold(vote_to_evaluate, &voted_stakes, total_stake,)
.passed());
}
fn vote_and_check_recent(num_votes: usize) {

View File

@ -2,7 +2,7 @@ use {
crate::{
cluster_info_vote_listener::SlotVoteTracker,
cluster_slots::SlotPubkeys,
consensus::{Stake, VotedStakes},
consensus::{Stake, ThresholdDecision, VotedStakes},
replay_stage::SUPERMINORITY_THRESHOLD,
},
solana_ledger::blockstore_processor::{ConfirmationProgress, ConfirmationTiming},
@ -299,7 +299,7 @@ pub struct ForkStats {
pub has_voted: bool,
pub is_recent: bool,
pub is_empty: bool,
pub vote_threshold: bool,
pub vote_threshold: ThresholdDecision,
pub is_locked_out: bool,
pub voted_stakes: VotedStakes,
pub is_supermajority_confirmed: bool,

View File

@ -14,7 +14,8 @@ use {
cluster_slots_service::ClusterSlotsUpdateSender,
commitment_service::{AggregateCommitmentService, CommitmentAggregationData},
consensus::{
ComputedBankState, Stake, SwitchForkDecision, Tower, VotedStakes, SWITCH_FORK_THRESHOLD,
ComputedBankState, Stake, SwitchForkDecision, ThresholdDecision, Tower, VotedStakes,
SWITCH_FORK_THRESHOLD,
},
cost_update_service::CostUpdate,
fork_choice::{ForkChoice, SelectVoteAndResetForkResult},
@ -108,9 +109,21 @@ lazy_static! {
#[derive(PartialEq, Eq, Debug)]
pub enum HeaviestForkFailures {
LockedOut(u64),
FailedThreshold(u64),
FailedSwitchThreshold(u64),
NoPropagatedConfirmation(u64),
FailedThreshold(
Slot,
/* Observed stake */ u64,
/* Total stake */ u64,
),
FailedSwitchThreshold(
Slot,
/* Observed stake */ u64,
/* Total stake */ u64,
),
NoPropagatedConfirmation(
Slot,
/* Observed stake */ u64,
/* Total stake */ u64,
),
}
// Implement a destructor for the ReplayStage thread to signal it exited
@ -800,7 +813,7 @@ impl ReplayStage {
);
for r in &heaviest_fork_failures {
if let HeaviestForkFailures::NoPropagatedConfirmation(slot) = r {
if let HeaviestForkFailures::NoPropagatedConfirmation(slot, ..) = r {
if let Some(latest_leader_slot) =
progress.get_latest_leader_slot_must_exist(*slot)
{
@ -3150,6 +3163,8 @@ impl ReplayStage {
);
failure_reasons.push(HeaviestForkFailures::FailedSwitchThreshold(
heaviest_bank.slot(),
switch_proof_stake,
total_stake,
));
reset_bank.map(|b| (b, switch_fork_decision))
}
@ -3198,6 +3213,8 @@ impl ReplayStage {
);
failure_reasons.push(HeaviestForkFailures::FailedSwitchThreshold(
heaviest_bank.slot(),
0, // In this case we never actually performed the switch check, 0 for now
0,
));
reset_bank.map(|b| (b, switch_fork_decision))
}
@ -3206,14 +3223,25 @@ impl ReplayStage {
};
if let Some((bank, switch_fork_decision)) = selected_fork {
let (is_locked_out, vote_threshold, is_leader_slot, fork_weight) = {
let (
is_locked_out,
vote_threshold,
propagated_stake,
is_leader_slot,
fork_weight,
total_threshold_stake,
total_epoch_stake,
) = {
let fork_stats = progress.get_fork_stats(bank.slot()).unwrap();
let propagated_stats = &progress.get_propagated_stats(bank.slot()).unwrap();
(
fork_stats.is_locked_out,
fork_stats.vote_threshold,
propagated_stats.propagated_validators_stake,
propagated_stats.is_leader_slot,
fork_stats.weight,
fork_stats.total_stake,
propagated_stats.total_epoch_stake,
)
};
@ -3225,15 +3253,23 @@ impl ReplayStage {
if is_locked_out {
failure_reasons.push(HeaviestForkFailures::LockedOut(bank.slot()));
}
if !vote_threshold {
failure_reasons.push(HeaviestForkFailures::FailedThreshold(bank.slot()));
if let ThresholdDecision::FailedThreshold(fork_stake) = vote_threshold {
failure_reasons.push(HeaviestForkFailures::FailedThreshold(
bank.slot(),
fork_stake,
total_threshold_stake,
));
}
if !propagation_confirmed {
failure_reasons.push(HeaviestForkFailures::NoPropagatedConfirmation(bank.slot()));
failure_reasons.push(HeaviestForkFailures::NoPropagatedConfirmation(
bank.slot(),
propagated_stake,
total_epoch_stake,
));
}
if !is_locked_out
&& vote_threshold
&& vote_threshold.passed()
&& propagation_confirmed
&& switch_fork_decision.can_vote()
{