Output BankHashDetails file when leader drops its' own block (#34256)

Currently, the file is generated when a node drops a block that was
produced by another node. However, it would also be beneficial to see
the account state when a node drops its' own block.

Output the file in this additional failure codepath
This commit is contained in:
steviez 2023-11-29 17:20:27 -06:00 committed by GitHub
parent e949ef9daa
commit 935e06f8f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 15 deletions

View File

@ -1349,14 +1349,22 @@ impl ReplayStage {
); );
} }
// Should not dump slots for which we were the leader // Should not dump slots for which we were the leader
if Some(*my_pubkey) == leader_schedule_cache.slot_leader_at(*duplicate_slot, None) { if Some(*my_pubkey) == leader_schedule_cache.slot_leader_at(*duplicate_slot, None) {
panic!("We are attempting to dump a block that we produced. \ if let Some(bank) = bank_forks.read().unwrap().get(*duplicate_slot) {
This indicates that we are producing duplicate blocks, \ bank_hash_details::write_bank_hash_details_file(&bank)
or that there is a bug in our runtime/replay code which \ .map_err(|err| {
causes us to compute different bank hashes than the rest of the cluster. \ warn!("Unable to write bank hash details file: {err}");
We froze slot {duplicate_slot} with hash {frozen_hash:?} while the cluster hash is {correct_hash}"); })
.ok();
} else {
warn!("Unable to get bank for slot {duplicate_slot} from bank forks");
}
panic!("We are attempting to dump a block that we produced. \
This indicates that we are producing duplicate blocks, \
or that there is a bug in our runtime/replay code which \
causes us to compute different bank hashes than the rest of the cluster. \
We froze slot {duplicate_slot} with hash {frozen_hash:?} while the cluster hash is {correct_hash}");
} }
let attempt_no = purge_repair_slot_counter let attempt_no = purge_repair_slot_counter
@ -1507,7 +1515,11 @@ impl ReplayStage {
let bank = w_bank_forks let bank = w_bank_forks
.remove(*slot) .remove(*slot)
.expect("BankForks should not have been purged yet"); .expect("BankForks should not have been purged yet");
let _ = bank_hash_details::write_bank_hash_details_file(&bank); bank_hash_details::write_bank_hash_details_file(&bank)
.map_err(|err| {
warn!("Unable to write bank hash details file: {err}");
})
.ok();
((*slot, bank.bank_id()), bank) ((*slot, bank.bank_id()), bank)
}) })
.unzip() .unzip()

View File

@ -2695,7 +2695,11 @@ fn main() {
} }
if write_bank_file { if write_bank_file {
let working_bank = bank_forks.read().unwrap().working_bank(); let working_bank = bank_forks.read().unwrap().working_bank();
let _ = bank_hash_details::write_bank_hash_details_file(&working_bank); bank_hash_details::write_bank_hash_details_file(&working_bank)
.map_err(|err| {
warn!("Unable to write bank hash_details file: {err}");
})
.ok();
} }
exit_signal.store(true, Ordering::Relaxed); exit_signal.store(true, Ordering::Relaxed);
system_monitor_service.join().unwrap(); system_monitor_service.join().unwrap();

View File

@ -216,14 +216,10 @@ pub fn write_bank_hash_details_file(bank: &Bank) -> std::result::Result<(), Stri
// path does not exist. So, call std::fs_create_dir_all first. // path does not exist. So, call std::fs_create_dir_all first.
// https://doc.rust-lang.org/std/fs/fn.write.html // https://doc.rust-lang.org/std/fs/fn.write.html
_ = std::fs::create_dir_all(parent_dir); _ = std::fs::create_dir_all(parent_dir);
let file = std::fs::File::create(&path).map_err(|err| { let file = std::fs::File::create(&path)
format!( .map_err(|err| format!("Unable to create file at {}: {err}", path.display()))?;
"Unable to create bank hash file at {}: {err}",
path.display()
)
})?;
serde_json::to_writer_pretty(file, &details) serde_json::to_writer_pretty(file, &details)
.map_err(|err| format!("Unable to write bank hash file contents: {err}"))?; .map_err(|err| format!("Unable to write file at {}: {err}", path.display()))?;
} }
Ok(()) Ok(())
} }