Add stake breakdown to metrics for HeaviestForkFailures (#31067)
This commit is contained in:
parent
0ff8a09041
commit
85dbd3d94d
|
@ -39,11 +39,27 @@ use {
|
||||||
thiserror::Error,
|
thiserror::Error,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[derive(PartialEq, Eq, Clone, Copy, Debug, Default)]
|
||||||
|
pub enum ThresholdDecision {
|
||||||
|
#[default]
|
||||||
|
PassedThreshold,
|
||||||
|
FailedThreshold(/* Observed stake */ u64),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ThresholdDecision {
|
||||||
|
pub fn passed(&self) -> bool {
|
||||||
|
matches!(self, Self::PassedThreshold)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, Eq, Clone, Debug, AbiExample)]
|
#[derive(PartialEq, Eq, Clone, Debug, AbiExample)]
|
||||||
pub enum SwitchForkDecision {
|
pub enum SwitchForkDecision {
|
||||||
SwitchProof(Hash),
|
SwitchProof(Hash),
|
||||||
SameFork,
|
SameFork,
|
||||||
FailedSwitchThreshold(u64, u64),
|
FailedSwitchThreshold(
|
||||||
|
/* Switch proof stake */ u64,
|
||||||
|
/* Total stake */ u64,
|
||||||
|
),
|
||||||
FailedSwitchDuplicateRollback(Slot),
|
FailedSwitchDuplicateRollback(Slot),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -978,12 +994,15 @@ impl Tower {
|
||||||
self.last_switch_threshold_check.is_none()
|
self.last_switch_threshold_check.is_none()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Performs threshold check for `slot`
|
||||||
|
///
|
||||||
|
/// If it passes the check returns None, otherwise returns Some(fork_stake)
|
||||||
pub fn check_vote_stake_threshold(
|
pub fn check_vote_stake_threshold(
|
||||||
&self,
|
&self,
|
||||||
slot: Slot,
|
slot: Slot,
|
||||||
voted_stakes: &VotedStakes,
|
voted_stakes: &VotedStakes,
|
||||||
total_stake: Stake,
|
total_stake: Stake,
|
||||||
) -> bool {
|
) -> ThresholdDecision {
|
||||||
let mut vote_state = self.vote_state.clone();
|
let mut vote_state = self.vote_state.clone();
|
||||||
process_slot_vote_unchecked(&mut vote_state, slot);
|
process_slot_vote_unchecked(&mut vote_state, slot);
|
||||||
let vote = vote_state.nth_recent_vote(self.threshold_depth);
|
let vote = vote_state.nth_recent_vote(self.threshold_depth);
|
||||||
|
@ -999,16 +1018,20 @@ impl Tower {
|
||||||
if old_vote.slot() == vote.slot()
|
if old_vote.slot() == vote.slot()
|
||||||
&& old_vote.confirmation_count() == vote.confirmation_count()
|
&& old_vote.confirmation_count() == vote.confirmation_count()
|
||||||
{
|
{
|
||||||
return true;
|
return ThresholdDecision::PassedThreshold;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
lockout > self.threshold_size
|
if lockout > self.threshold_size {
|
||||||
|
return ThresholdDecision::PassedThreshold;
|
||||||
|
}
|
||||||
|
ThresholdDecision::FailedThreshold(*fork_stake)
|
||||||
} else {
|
} else {
|
||||||
false
|
// We haven't seen any votes on this fork yet, so no stake
|
||||||
|
ThresholdDecision::FailedThreshold(0)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
true
|
ThresholdDecision::PassedThreshold
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2009,16 +2032,17 @@ pub mod test {
|
||||||
&node_pubkey,
|
&node_pubkey,
|
||||||
&mut tower,
|
&mut tower,
|
||||||
);
|
);
|
||||||
for slot in 46..=48 {
|
|
||||||
if slot == 48 {
|
|
||||||
assert!(results.get(&slot).unwrap().is_empty());
|
|
||||||
} else {
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
*results.get(&slot).unwrap(),
|
*results.get(&46).unwrap(),
|
||||||
vec![HeaviestForkFailures::FailedSwitchThreshold(slot)]
|
vec![HeaviestForkFailures::FailedSwitchThreshold(46, 0, 40000)]
|
||||||
);
|
);
|
||||||
}
|
assert_eq!(
|
||||||
}
|
*results.get(&47).unwrap(),
|
||||||
|
vec![HeaviestForkFailures::FailedSwitchThreshold(
|
||||||
|
47, 10000, 40000
|
||||||
|
)]
|
||||||
|
);
|
||||||
|
assert!(results.get(&48).unwrap().is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2202,7 +2226,7 @@ pub mod test {
|
||||||
fn test_check_vote_threshold_without_votes() {
|
fn test_check_vote_threshold_without_votes() {
|
||||||
let tower = Tower::new_for_tests(1, 0.67);
|
let tower = Tower::new_for_tests(1, 0.67);
|
||||||
let stakes = vec![(0, 1)].into_iter().collect();
|
let stakes = vec![(0, 1)].into_iter().collect();
|
||||||
assert!(tower.check_vote_stake_threshold(0, &stakes, 2));
|
assert!(tower.check_vote_stake_threshold(0, &stakes, 2).passed());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2214,7 +2238,9 @@ pub mod test {
|
||||||
stakes.insert(i, 1);
|
stakes.insert(i, 1);
|
||||||
tower.record_vote(i, Hash::default());
|
tower.record_vote(i, Hash::default());
|
||||||
}
|
}
|
||||||
assert!(!tower.check_vote_stake_threshold(MAX_LOCKOUT_HISTORY as u64 + 1, &stakes, 2,));
|
assert!(!tower
|
||||||
|
.check_vote_stake_threshold(MAX_LOCKOUT_HISTORY as u64 + 1, &stakes, 2,)
|
||||||
|
.passed());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2329,14 +2355,14 @@ pub mod test {
|
||||||
let mut tower = Tower::new_for_tests(1, 0.67);
|
let mut tower = Tower::new_for_tests(1, 0.67);
|
||||||
let stakes = vec![(0, 1)].into_iter().collect();
|
let stakes = vec![(0, 1)].into_iter().collect();
|
||||||
tower.record_vote(0, Hash::default());
|
tower.record_vote(0, Hash::default());
|
||||||
assert!(!tower.check_vote_stake_threshold(1, &stakes, 2));
|
assert!(!tower.check_vote_stake_threshold(1, &stakes, 2).passed());
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
fn test_check_vote_threshold_above_threshold() {
|
fn test_check_vote_threshold_above_threshold() {
|
||||||
let mut tower = Tower::new_for_tests(1, 0.67);
|
let mut tower = Tower::new_for_tests(1, 0.67);
|
||||||
let stakes = vec![(0, 2)].into_iter().collect();
|
let stakes = vec![(0, 2)].into_iter().collect();
|
||||||
tower.record_vote(0, Hash::default());
|
tower.record_vote(0, Hash::default());
|
||||||
assert!(tower.check_vote_stake_threshold(1, &stakes, 2));
|
assert!(tower.check_vote_stake_threshold(1, &stakes, 2).passed());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2346,7 +2372,7 @@ pub mod test {
|
||||||
tower.record_vote(0, Hash::default());
|
tower.record_vote(0, Hash::default());
|
||||||
tower.record_vote(1, Hash::default());
|
tower.record_vote(1, Hash::default());
|
||||||
tower.record_vote(2, Hash::default());
|
tower.record_vote(2, Hash::default());
|
||||||
assert!(tower.check_vote_stake_threshold(6, &stakes, 2));
|
assert!(tower.check_vote_stake_threshold(6, &stakes, 2).passed());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2354,7 +2380,7 @@ pub mod test {
|
||||||
let mut tower = Tower::new_for_tests(1, 0.67);
|
let mut tower = Tower::new_for_tests(1, 0.67);
|
||||||
let stakes = HashMap::new();
|
let stakes = HashMap::new();
|
||||||
tower.record_vote(0, Hash::default());
|
tower.record_vote(0, Hash::default());
|
||||||
assert!(!tower.check_vote_stake_threshold(1, &stakes, 2));
|
assert!(!tower.check_vote_stake_threshold(1, &stakes, 2).passed());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2365,7 +2391,7 @@ pub mod test {
|
||||||
tower.record_vote(0, Hash::default());
|
tower.record_vote(0, Hash::default());
|
||||||
tower.record_vote(1, Hash::default());
|
tower.record_vote(1, Hash::default());
|
||||||
tower.record_vote(2, Hash::default());
|
tower.record_vote(2, Hash::default());
|
||||||
assert!(tower.check_vote_stake_threshold(6, &stakes, 2,));
|
assert!(tower.check_vote_stake_threshold(6, &stakes, 2,).passed());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -2483,7 +2509,9 @@ pub mod test {
|
||||||
|_| None,
|
|_| None,
|
||||||
&mut LatestValidatorVotesForFrozenBanks::default(),
|
&mut LatestValidatorVotesForFrozenBanks::default(),
|
||||||
);
|
);
|
||||||
assert!(tower.check_vote_stake_threshold(vote_to_evaluate, &voted_stakes, total_stake,));
|
assert!(tower
|
||||||
|
.check_vote_stake_threshold(vote_to_evaluate, &voted_stakes, total_stake,)
|
||||||
|
.passed());
|
||||||
|
|
||||||
// CASE 2: Now we want to evaluate a vote for slot VOTE_THRESHOLD_DEPTH + 1. This slot
|
// CASE 2: Now we want to evaluate a vote for slot VOTE_THRESHOLD_DEPTH + 1. This slot
|
||||||
// will expire the vote in one of the vote accounts, so we should have insufficient
|
// will expire the vote in one of the vote accounts, so we should have insufficient
|
||||||
|
@ -2501,7 +2529,9 @@ pub mod test {
|
||||||
|_| None,
|
|_| None,
|
||||||
&mut LatestValidatorVotesForFrozenBanks::default(),
|
&mut LatestValidatorVotesForFrozenBanks::default(),
|
||||||
);
|
);
|
||||||
assert!(!tower.check_vote_stake_threshold(vote_to_evaluate, &voted_stakes, total_stake,));
|
assert!(!tower
|
||||||
|
.check_vote_stake_threshold(vote_to_evaluate, &voted_stakes, total_stake,)
|
||||||
|
.passed());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn vote_and_check_recent(num_votes: usize) {
|
fn vote_and_check_recent(num_votes: usize) {
|
||||||
|
|
|
@ -2,7 +2,7 @@ use {
|
||||||
crate::{
|
crate::{
|
||||||
cluster_info_vote_listener::SlotVoteTracker,
|
cluster_info_vote_listener::SlotVoteTracker,
|
||||||
cluster_slots::SlotPubkeys,
|
cluster_slots::SlotPubkeys,
|
||||||
consensus::{Stake, VotedStakes},
|
consensus::{Stake, ThresholdDecision, VotedStakes},
|
||||||
replay_stage::SUPERMINORITY_THRESHOLD,
|
replay_stage::SUPERMINORITY_THRESHOLD,
|
||||||
},
|
},
|
||||||
solana_ledger::blockstore_processor::{ConfirmationProgress, ConfirmationTiming},
|
solana_ledger::blockstore_processor::{ConfirmationProgress, ConfirmationTiming},
|
||||||
|
@ -299,7 +299,7 @@ pub struct ForkStats {
|
||||||
pub has_voted: bool,
|
pub has_voted: bool,
|
||||||
pub is_recent: bool,
|
pub is_recent: bool,
|
||||||
pub is_empty: bool,
|
pub is_empty: bool,
|
||||||
pub vote_threshold: bool,
|
pub vote_threshold: ThresholdDecision,
|
||||||
pub is_locked_out: bool,
|
pub is_locked_out: bool,
|
||||||
pub voted_stakes: VotedStakes,
|
pub voted_stakes: VotedStakes,
|
||||||
pub is_supermajority_confirmed: bool,
|
pub is_supermajority_confirmed: bool,
|
||||||
|
|
|
@ -14,7 +14,8 @@ use {
|
||||||
cluster_slots_service::ClusterSlotsUpdateSender,
|
cluster_slots_service::ClusterSlotsUpdateSender,
|
||||||
commitment_service::{AggregateCommitmentService, CommitmentAggregationData},
|
commitment_service::{AggregateCommitmentService, CommitmentAggregationData},
|
||||||
consensus::{
|
consensus::{
|
||||||
ComputedBankState, Stake, SwitchForkDecision, Tower, VotedStakes, SWITCH_FORK_THRESHOLD,
|
ComputedBankState, Stake, SwitchForkDecision, ThresholdDecision, Tower, VotedStakes,
|
||||||
|
SWITCH_FORK_THRESHOLD,
|
||||||
},
|
},
|
||||||
cost_update_service::CostUpdate,
|
cost_update_service::CostUpdate,
|
||||||
fork_choice::{ForkChoice, SelectVoteAndResetForkResult},
|
fork_choice::{ForkChoice, SelectVoteAndResetForkResult},
|
||||||
|
@ -108,9 +109,21 @@ lazy_static! {
|
||||||
#[derive(PartialEq, Eq, Debug)]
|
#[derive(PartialEq, Eq, Debug)]
|
||||||
pub enum HeaviestForkFailures {
|
pub enum HeaviestForkFailures {
|
||||||
LockedOut(u64),
|
LockedOut(u64),
|
||||||
FailedThreshold(u64),
|
FailedThreshold(
|
||||||
FailedSwitchThreshold(u64),
|
Slot,
|
||||||
NoPropagatedConfirmation(u64),
|
/* Observed stake */ u64,
|
||||||
|
/* Total stake */ u64,
|
||||||
|
),
|
||||||
|
FailedSwitchThreshold(
|
||||||
|
Slot,
|
||||||
|
/* Observed stake */ u64,
|
||||||
|
/* Total stake */ u64,
|
||||||
|
),
|
||||||
|
NoPropagatedConfirmation(
|
||||||
|
Slot,
|
||||||
|
/* Observed stake */ u64,
|
||||||
|
/* Total stake */ u64,
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
// Implement a destructor for the ReplayStage thread to signal it exited
|
// Implement a destructor for the ReplayStage thread to signal it exited
|
||||||
|
@ -800,7 +813,7 @@ impl ReplayStage {
|
||||||
);
|
);
|
||||||
|
|
||||||
for r in &heaviest_fork_failures {
|
for r in &heaviest_fork_failures {
|
||||||
if let HeaviestForkFailures::NoPropagatedConfirmation(slot) = r {
|
if let HeaviestForkFailures::NoPropagatedConfirmation(slot, ..) = r {
|
||||||
if let Some(latest_leader_slot) =
|
if let Some(latest_leader_slot) =
|
||||||
progress.get_latest_leader_slot_must_exist(*slot)
|
progress.get_latest_leader_slot_must_exist(*slot)
|
||||||
{
|
{
|
||||||
|
@ -3150,6 +3163,8 @@ impl ReplayStage {
|
||||||
);
|
);
|
||||||
failure_reasons.push(HeaviestForkFailures::FailedSwitchThreshold(
|
failure_reasons.push(HeaviestForkFailures::FailedSwitchThreshold(
|
||||||
heaviest_bank.slot(),
|
heaviest_bank.slot(),
|
||||||
|
switch_proof_stake,
|
||||||
|
total_stake,
|
||||||
));
|
));
|
||||||
reset_bank.map(|b| (b, switch_fork_decision))
|
reset_bank.map(|b| (b, switch_fork_decision))
|
||||||
}
|
}
|
||||||
|
@ -3198,6 +3213,8 @@ impl ReplayStage {
|
||||||
);
|
);
|
||||||
failure_reasons.push(HeaviestForkFailures::FailedSwitchThreshold(
|
failure_reasons.push(HeaviestForkFailures::FailedSwitchThreshold(
|
||||||
heaviest_bank.slot(),
|
heaviest_bank.slot(),
|
||||||
|
0, // In this case we never actually performed the switch check, 0 for now
|
||||||
|
0,
|
||||||
));
|
));
|
||||||
reset_bank.map(|b| (b, switch_fork_decision))
|
reset_bank.map(|b| (b, switch_fork_decision))
|
||||||
}
|
}
|
||||||
|
@ -3206,14 +3223,25 @@ impl ReplayStage {
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some((bank, switch_fork_decision)) = selected_fork {
|
if let Some((bank, switch_fork_decision)) = selected_fork {
|
||||||
let (is_locked_out, vote_threshold, is_leader_slot, fork_weight) = {
|
let (
|
||||||
|
is_locked_out,
|
||||||
|
vote_threshold,
|
||||||
|
propagated_stake,
|
||||||
|
is_leader_slot,
|
||||||
|
fork_weight,
|
||||||
|
total_threshold_stake,
|
||||||
|
total_epoch_stake,
|
||||||
|
) = {
|
||||||
let fork_stats = progress.get_fork_stats(bank.slot()).unwrap();
|
let fork_stats = progress.get_fork_stats(bank.slot()).unwrap();
|
||||||
let propagated_stats = &progress.get_propagated_stats(bank.slot()).unwrap();
|
let propagated_stats = &progress.get_propagated_stats(bank.slot()).unwrap();
|
||||||
(
|
(
|
||||||
fork_stats.is_locked_out,
|
fork_stats.is_locked_out,
|
||||||
fork_stats.vote_threshold,
|
fork_stats.vote_threshold,
|
||||||
|
propagated_stats.propagated_validators_stake,
|
||||||
propagated_stats.is_leader_slot,
|
propagated_stats.is_leader_slot,
|
||||||
fork_stats.weight,
|
fork_stats.weight,
|
||||||
|
fork_stats.total_stake,
|
||||||
|
propagated_stats.total_epoch_stake,
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -3225,15 +3253,23 @@ impl ReplayStage {
|
||||||
if is_locked_out {
|
if is_locked_out {
|
||||||
failure_reasons.push(HeaviestForkFailures::LockedOut(bank.slot()));
|
failure_reasons.push(HeaviestForkFailures::LockedOut(bank.slot()));
|
||||||
}
|
}
|
||||||
if !vote_threshold {
|
if let ThresholdDecision::FailedThreshold(fork_stake) = vote_threshold {
|
||||||
failure_reasons.push(HeaviestForkFailures::FailedThreshold(bank.slot()));
|
failure_reasons.push(HeaviestForkFailures::FailedThreshold(
|
||||||
|
bank.slot(),
|
||||||
|
fork_stake,
|
||||||
|
total_threshold_stake,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
if !propagation_confirmed {
|
if !propagation_confirmed {
|
||||||
failure_reasons.push(HeaviestForkFailures::NoPropagatedConfirmation(bank.slot()));
|
failure_reasons.push(HeaviestForkFailures::NoPropagatedConfirmation(
|
||||||
|
bank.slot(),
|
||||||
|
propagated_stake,
|
||||||
|
total_epoch_stake,
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
if !is_locked_out
|
if !is_locked_out
|
||||||
&& vote_threshold
|
&& vote_threshold.passed()
|
||||||
&& propagation_confirmed
|
&& propagation_confirmed
|
||||||
&& switch_fork_decision.can_vote()
|
&& switch_fork_decision.can_vote()
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in New Issue