Output BankHashDetails file when leader drops its' own block (#34256)

Currently, the file is generated when a node drops a block that was
produced by another node. However, it would also be beneficial to see
the account state when a node drops its' own block.

Output the file in this additional failure codepath
This commit is contained in:
steviez 2023-11-29 17:20:27 -06:00 committed by GitHub
parent e949ef9daa
commit 935e06f8f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 15 deletions

View File

@ -1349,14 +1349,22 @@ impl ReplayStage {
);
}
// Should not dump slots for which we were the leader
if Some(*my_pubkey) == leader_schedule_cache.slot_leader_at(*duplicate_slot, None) {
panic!("We are attempting to dump a block that we produced. \
This indicates that we are producing duplicate blocks, \
or that there is a bug in our runtime/replay code which \
causes us to compute different bank hashes than the rest of the cluster. \
We froze slot {duplicate_slot} with hash {frozen_hash:?} while the cluster hash is {correct_hash}");
if let Some(bank) = bank_forks.read().unwrap().get(*duplicate_slot) {
bank_hash_details::write_bank_hash_details_file(&bank)
.map_err(|err| {
warn!("Unable to write bank hash details file: {err}");
})
.ok();
} else {
warn!("Unable to get bank for slot {duplicate_slot} from bank forks");
}
panic!("We are attempting to dump a block that we produced. \
This indicates that we are producing duplicate blocks, \
or that there is a bug in our runtime/replay code which \
causes us to compute different bank hashes than the rest of the cluster. \
We froze slot {duplicate_slot} with hash {frozen_hash:?} while the cluster hash is {correct_hash}");
}
let attempt_no = purge_repair_slot_counter
@ -1507,7 +1515,11 @@ impl ReplayStage {
let bank = w_bank_forks
.remove(*slot)
.expect("BankForks should not have been purged yet");
let _ = bank_hash_details::write_bank_hash_details_file(&bank);
bank_hash_details::write_bank_hash_details_file(&bank)
.map_err(|err| {
warn!("Unable to write bank hash details file: {err}");
})
.ok();
((*slot, bank.bank_id()), bank)
})
.unzip()

View File

@ -2695,7 +2695,11 @@ fn main() {
}
if write_bank_file {
let working_bank = bank_forks.read().unwrap().working_bank();
let _ = bank_hash_details::write_bank_hash_details_file(&working_bank);
bank_hash_details::write_bank_hash_details_file(&working_bank)
.map_err(|err| {
warn!("Unable to write bank hash_details file: {err}");
})
.ok();
}
exit_signal.store(true, Ordering::Relaxed);
system_monitor_service.join().unwrap();

View File

@ -216,14 +216,10 @@ pub fn write_bank_hash_details_file(bank: &Bank) -> std::result::Result<(), Stri
// path does not exist. So, call std::fs_create_dir_all first.
// https://doc.rust-lang.org/std/fs/fn.write.html
_ = std::fs::create_dir_all(parent_dir);
let file = std::fs::File::create(&path).map_err(|err| {
format!(
"Unable to create bank hash file at {}: {err}",
path.display()
)
})?;
let file = std::fs::File::create(&path)
.map_err(|err| format!("Unable to create file at {}: {err}", path.display()))?;
serde_json::to_writer_pretty(file, &details)
.map_err(|err| format!("Unable to write bank hash file contents: {err}"))?;
.map_err(|err| format!("Unable to write file at {}: {err}", path.display()))?;
}
Ok(())
}