From e24cc537a4b577dcd001f008e3795c1040a7d8dd Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" Date: Tue, 14 Jun 2022 10:10:44 -0500 Subject: [PATCH] avoid adding to 'uncleaned_roots' when generating index and caller passes accounts-db-skip-shrink (#25936) --- runtime/src/accounts_db.rs | 8 +++++++- runtime/src/bank.rs | 18 +++++++++++++++--- runtime/src/serde_snapshot.rs | 6 ++++++ runtime/src/serde_snapshot/tests.rs | 4 ++++ runtime/src/snapshot_utils.rs | 8 +++++++- 5 files changed, 39 insertions(+), 5 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index c35899182b..b69f3c90ad 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -8084,6 +8084,7 @@ impl AccountsDb { limit_load_slot_count_from_snapshot: Option, verify: bool, genesis_config: &GenesisConfig, + accounts_db_skip_shrink: bool, ) -> IndexGenerationInfo { let mut slots = self.storage.all_slots(); #[allow(clippy::stable_sort_primitive)] @@ -8271,7 +8272,12 @@ impl AccountsDb { if pass == 0 { // Need to add these last, otherwise older updates will be cleaned for slot in &slots { - self.accounts_index.add_root(*slot, false); + // passing 'false' to 'add_root' causes all slots to be added to 'uncleaned_slots' + // passing 'true' to 'add_root' does NOT add all slots to 'uncleaned_slots' + // if we are skipping shrink, this potentially massive amount of work is never processed at startup, when all threads can be used. + // This causes failures such as oom during the first bg clean, which is expecting to work in 'normal' operating circumstances. + // So, don't add all slots to 'uncleaned_slots' here since by requesting to skip clean and shrink, caller is expecting the starting snapshot to be reasonable. + self.accounts_index.add_root(*slot, accounts_db_skip_shrink); } self.set_storage_count_and_alive_bytes(storage_info, &mut timings); diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index b2b58ae2a3..c87ca93b41 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -6820,9 +6820,21 @@ impl Bank { last_full_snapshot_slot: Option, ) -> bool { let mut clean_time = Measure::start("clean"); - if !accounts_db_skip_shrink && self.slot() > 0 { - info!("cleaning.."); - self.clean_accounts(true, true, last_full_snapshot_slot); + if !accounts_db_skip_shrink { + if self.slot() > 0 { + info!("cleaning.."); + self.clean_accounts(true, true, last_full_snapshot_slot); + } + } else { + // if we are skipping shrink, there should be no uncleaned_roots deferred to later + assert_eq!( + self.rc + .accounts + .accounts_db + .accounts_index + .uncleaned_roots_len(), + 0 + ); } clean_time.stop(); diff --git a/runtime/src/serde_snapshot.rs b/runtime/src/serde_snapshot.rs index 27953bc3a4..acd724b708 100644 --- a/runtime/src/serde_snapshot.rs +++ b/runtime/src/serde_snapshot.rs @@ -242,6 +242,7 @@ pub(crate) fn bank_from_streams( verify_index: bool, accounts_db_config: Option, accounts_update_notifier: Option, + accounts_db_skip_shrink: bool, ) -> std::result::Result where R: Read, @@ -280,6 +281,7 @@ where verify_index, accounts_db_config, accounts_update_notifier, + accounts_db_skip_shrink, )?; Ok(bank) }}; @@ -488,6 +490,7 @@ fn reconstruct_bank_from_fields( verify_index: bool, accounts_db_config: Option, accounts_update_notifier: Option, + accounts_db_skip_shrink: bool, ) -> Result where E: SerializableStorage + std::marker::Sync, @@ -504,6 +507,7 @@ where verify_index, accounts_db_config, accounts_update_notifier, + accounts_db_skip_shrink, )?; let bank_rc = BankRc::new(Accounts::new_empty(accounts_db), bank_fields.slot); @@ -563,6 +567,7 @@ fn reconstruct_accountsdb_from_fields( verify_index: bool, accounts_db_config: Option, accounts_update_notifier: Option, + accounts_db_skip_shrink: bool, ) -> Result<(AccountsDb, ReconstructedAccountsDbInfo), Error> where E: SerializableStorage + std::marker::Sync, @@ -712,6 +717,7 @@ where limit_load_slot_count_from_snapshot, verify_index, genesis_config, + accounts_db_skip_shrink, ); accounts_db.maybe_add_filler_accounts( diff --git a/runtime/src/serde_snapshot/tests.rs b/runtime/src/serde_snapshot/tests.rs index 2d4fca051a..d638185fc1 100644 --- a/runtime/src/serde_snapshot/tests.rs +++ b/runtime/src/serde_snapshot/tests.rs @@ -88,6 +88,7 @@ where false, Some(crate::accounts_db::ACCOUNTS_DB_CONFIG_FOR_TESTING), None, + false, ) .map(|(accounts_db, _)| accounts_db) } @@ -300,6 +301,7 @@ fn test_bank_serialize_style( false, Some(crate::accounts_db::ACCOUNTS_DB_CONFIG_FOR_TESTING), None, + false, ) .unwrap(); dbank.src = ref_sc; @@ -415,6 +417,7 @@ fn test_extra_fields_eof() { false, Some(crate::accounts_db::ACCOUNTS_DB_CONFIG_FOR_TESTING), None, + false, ) .unwrap(); @@ -536,6 +539,7 @@ fn test_blank_extra_fields() { false, Some(crate::accounts_db::ACCOUNTS_DB_CONFIG_FOR_TESTING), None, + false, ) .unwrap(); diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index ac08be28d9..2b52a8321c 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -821,6 +821,9 @@ pub fn bank_from_snapshot_archives( incremental_snapshot_archive_info, )?; + let accounts_db_skip_shrink = + accounts_db_skip_shrink || !full_snapshot_archive_info.is_remote(); + let parallel_divisions = std::cmp::min( PARALLEL_UNTAR_READERS_DEFAULT, std::cmp::max(1, num_cpus::get() / 4), @@ -879,6 +882,7 @@ pub fn bank_from_snapshot_archives( verify_index, accounts_db_config, accounts_update_notifier, + accounts_db_skip_shrink, )?; measure_rebuild.stop(); info!("{}", measure_rebuild); @@ -886,7 +890,7 @@ pub fn bank_from_snapshot_archives( let mut measure_verify = Measure::start("verify"); if !bank.verify_snapshot_bank( test_hash_calculation, - accounts_db_skip_shrink || !full_snapshot_archive_info.is_remote(), + accounts_db_skip_shrink, Some(full_snapshot_archive_info.slot()), ) && limit_load_slot_count_from_snapshot.is_none() { @@ -1575,6 +1579,7 @@ fn rebuild_bank_from_snapshots( verify_index: bool, accounts_db_config: Option, accounts_update_notifier: Option, + accounts_db_skip_shrink: bool, ) -> Result { let (full_snapshot_version, full_snapshot_root_paths) = verify_unpacked_snapshots_dir_and_version( @@ -1623,6 +1628,7 @@ fn rebuild_bank_from_snapshots( verify_index, accounts_db_config, accounts_update_notifier, + accounts_db_skip_shrink, ), }?, )