disambiguate the matches then mismatches case for ancestor samples (#31617)

This commit is contained in:
Ashwin Sekar 2023-05-13 11:12:21 -07:00 committed by GitHub
parent ef75f1cb4e
commit c85b057cc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 91 additions and 15 deletions

View File

@ -324,20 +324,69 @@ impl AncestorRequestStatus {
}
let our_frozen_hash = blockstore.get_bank_hash(*ancestor_slot);
if let Some(our_frozen_hash) = our_frozen_hash {
if earliest_erroring_ancestor.is_some() && our_frozen_hash == *agreed_upon_hash {
// It's impossible have a different version of an earlier ancestor, but
// then also have the same version of a later ancestor.
info!("{} mismatches then matches", self.requested_mismatched_slot);
return DuplicateAncestorDecision::InvalidSample;
} else if our_frozen_hash != *agreed_upon_hash
&& earliest_erroring_ancestor.is_none()
{
earliest_erroring_ancestor = Some((
agreed_response.len() - i - 1,
DuplicateAncestorDecision::EarliestMismatchFound(
DuplicateSlotRepairStatus::default(),
),
));
match (
&earliest_erroring_ancestor,
our_frozen_hash == *agreed_upon_hash,
) {
(
Some((mismatch_i, DuplicateAncestorDecision::EarliestMismatchFound(_))),
true,
) => {
// It's impossible have a different version of an earlier ancestor, but
// then also have the same version of a later ancestor.
let (mismatch_slot, mismatch_agreed_upon_hash) =
agreed_response[*mismatch_i];
let mismatch_our_frozen_hash = blockstore.get_bank_hash(mismatch_slot);
info!(
"When processing the ancestor sample for {}, there was a mismatch
for {mismatch_slot}: we had frozen hash {:?} and the cluster agreed upon
{mismatch_agreed_upon_hash}. However for a later ancestor {ancestor_slot}
we have agreement on {our_frozen_hash} as the bank hash. This should never
be possible, something is wrong or the cluster sample is invalid.
Rejecting and queuing the ancestor hashes request for retry",
self.requested_mismatched_slot,
mismatch_our_frozen_hash
);
return DuplicateAncestorDecision::InvalidSample;
}
(
Some((mismatch_i, DuplicateAncestorDecision::EarliestAncestorNotFrozen(_))),
true,
) => {
// In this case an earlier ancestor was not frozen in our blockstore,
// however this later ancestor is matching with our version. This most
// likely should never happen however it could happen if we initiate an
// ancestor_hashes request immediately after startup from snapshot, we will
// have a match for the snapshot bank, however we don't have any of the
// ancestors frozen
let (mismatch_slot, mismatch_agreed_upon_hash) =
agreed_response[*mismatch_i];
info!(
"When processing the ancestor sample for {}, an earlier ancestor {mismatch_slot}
was agreed upon by the cluster with hash {mismatch_agreed_upon_hash} but not
frozen in our blockstore. However for a later ancestor {ancestor_slot} we have
agreement on {our_frozen_hash} as the bank hash. This should only be possible if
we have just started from snapshot and immediately encountered a duplicate block,
otherwise something is seriously wrong. Continuing with the repair",
self.requested_mismatched_slot
);
}
(Some(decision), true) => panic!(
"Programmer error, {:?} should not be set in decision loop",
decision
),
(Some(_), false) => { /* Already found a mismatch, descendants continue to mismatch as well */
}
(None, true) => { /* Mismatch hasn't been found yet */ }
(None, false) => {
// A mismatch has been found
earliest_erroring_ancestor = Some((
agreed_response.len() - i - 1,
DuplicateAncestorDecision::EarliestMismatchFound(
DuplicateSlotRepairStatus::default(),
),
));
}
}
} else if earliest_erroring_ancestor.is_none() && self.request_type.is_pruned() {
// If the slot we are requesting for is pruned, then the slot and many of its
@ -779,7 +828,7 @@ pub mod tests {
}
#[test]
fn test_add_multiple_responses_invalid_sample_matches_then_mismatches() {
fn test_add_multiple_responses_invalid_sample_mismatches_then_matches() {
let request_slot = 100;
let mut test_setup = setup_add_response_test(request_slot, 10);
@ -808,6 +857,33 @@ pub mod tests {
);
}
#[test]
fn test_add_multiple_responses_start_from_snapshot_missing_then_matches() {
let request_slot = 100;
let mut test_setup = setup_add_response_test(request_slot, 10);
// Only insert the later half in our blockstore
for &(slot, correct_hash) in test_setup.correct_ancestors_response.iter().take(5) {
test_setup
.blockstore
.insert_bank_hash(slot, correct_hash, false);
}
// We don't have the earlier ancestors because we just started up, however sample should
// not be rejected as invalid.
let repair_status =
match run_add_multiple_correct_and_incorrect_responses(vec![], &mut test_setup) {
DuplicateAncestorDecision::ContinueSearch(repair_status) => repair_status,
x => panic!("Incorrect decision {x:?}"),
};
// Expect to find everything in the `correct_ancestors_to_repair`.
assert_eq!(
repair_status.correct_ancestors_to_repair,
test_setup.correct_ancestors_response
);
}
#[test]
fn test_add_multiple_responses_ancestors_all_not_frozen() {
let request_slot = 100;