From 935e06f8f1c0ecb638cf88e1f6285a1bc6055343 Mon Sep 17 00:00:00 2001 From: steviez Date: Wed, 29 Nov 2023 17:20:27 -0600 Subject: [PATCH] Output BankHashDetails file when leader drops its' own block (#34256) Currently, the file is generated when a node drops a block that was produced by another node. However, it would also be beneficial to see the account state when a node drops its' own block. Output the file in this additional failure codepath --- core/src/replay_stage.rs | 26 +++++++++++++++++++------- ledger-tool/src/main.rs | 6 +++++- runtime/src/bank/bank_hash_details.rs | 10 +++------- 3 files changed, 27 insertions(+), 15 deletions(-) diff --git a/core/src/replay_stage.rs b/core/src/replay_stage.rs index 4c546307e..fbf97d13e 100644 --- a/core/src/replay_stage.rs +++ b/core/src/replay_stage.rs @@ -1349,14 +1349,22 @@ impl ReplayStage { ); } - // Should not dump slots for which we were the leader if Some(*my_pubkey) == leader_schedule_cache.slot_leader_at(*duplicate_slot, None) { - panic!("We are attempting to dump a block that we produced. \ - This indicates that we are producing duplicate blocks, \ - or that there is a bug in our runtime/replay code which \ - causes us to compute different bank hashes than the rest of the cluster. \ - We froze slot {duplicate_slot} with hash {frozen_hash:?} while the cluster hash is {correct_hash}"); + if let Some(bank) = bank_forks.read().unwrap().get(*duplicate_slot) { + bank_hash_details::write_bank_hash_details_file(&bank) + .map_err(|err| { + warn!("Unable to write bank hash details file: {err}"); + }) + .ok(); + } else { + warn!("Unable to get bank for slot {duplicate_slot} from bank forks"); + } + panic!("We are attempting to dump a block that we produced. \ + This indicates that we are producing duplicate blocks, \ + or that there is a bug in our runtime/replay code which \ + causes us to compute different bank hashes than the rest of the cluster. \ + We froze slot {duplicate_slot} with hash {frozen_hash:?} while the cluster hash is {correct_hash}"); } let attempt_no = purge_repair_slot_counter @@ -1507,7 +1515,11 @@ impl ReplayStage { let bank = w_bank_forks .remove(*slot) .expect("BankForks should not have been purged yet"); - let _ = bank_hash_details::write_bank_hash_details_file(&bank); + bank_hash_details::write_bank_hash_details_file(&bank) + .map_err(|err| { + warn!("Unable to write bank hash details file: {err}"); + }) + .ok(); ((*slot, bank.bank_id()), bank) }) .unzip() diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index 3cd4153bd..053656990 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -2695,7 +2695,11 @@ fn main() { } if write_bank_file { let working_bank = bank_forks.read().unwrap().working_bank(); - let _ = bank_hash_details::write_bank_hash_details_file(&working_bank); + bank_hash_details::write_bank_hash_details_file(&working_bank) + .map_err(|err| { + warn!("Unable to write bank hash_details file: {err}"); + }) + .ok(); } exit_signal.store(true, Ordering::Relaxed); system_monitor_service.join().unwrap(); diff --git a/runtime/src/bank/bank_hash_details.rs b/runtime/src/bank/bank_hash_details.rs index a1b4fa74f..6b40e7aef 100644 --- a/runtime/src/bank/bank_hash_details.rs +++ b/runtime/src/bank/bank_hash_details.rs @@ -216,14 +216,10 @@ pub fn write_bank_hash_details_file(bank: &Bank) -> std::result::Result<(), Stri // path does not exist. So, call std::fs_create_dir_all first. // https://doc.rust-lang.org/std/fs/fn.write.html _ = std::fs::create_dir_all(parent_dir); - let file = std::fs::File::create(&path).map_err(|err| { - format!( - "Unable to create bank hash file at {}: {err}", - path.display() - ) - })?; + let file = std::fs::File::create(&path) + .map_err(|err| format!("Unable to create file at {}: {err}", path.display()))?; serde_json::to_writer_pretty(file, &details) - .map_err(|err| format!("Unable to write bank hash file contents: {err}"))?; + .map_err(|err| format!("Unable to write file at {}: {err}", path.display()))?; } Ok(()) }