From 4bba59f0638f6b303526dd3b6261862a13eeb28e Mon Sep 17 00:00:00 2001 From: Brooks Date: Mon, 4 Dec 2023 14:23:15 -0500 Subject: [PATCH] Rebuilds skipped rewrites when loading from a snapshot (#34280) --- runtime/src/bank.rs | 86 +++++++++++++++++++++++------ runtime/src/bank/tests.rs | 113 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 181 insertions(+), 18 deletions(-) diff --git a/runtime/src/bank.rs b/runtime/src/bank.rs index ff935e5f3..e21caddcd 100644 --- a/runtime/src/bank.rs +++ b/runtime/src/bank.rs @@ -1903,6 +1903,7 @@ impl Bank { debug_do_not_add_builtins, ); bank.fill_missing_sysvar_cache_entries(); + bank.rebuild_skipped_rewrites(); // Sanity assertions between bank snapshot and genesis config // Consider removing from serializable bank state @@ -5712,6 +5713,70 @@ impl Bank { }); } + /// After deserialize, populate skipped rewrites with accounts that would normally + /// have had their data rewritten in this slot due to rent collection (but didn't). + /// + /// This is required when starting up from a snapshot to verify the bank hash. + /// + /// A second usage is from the `bank_to_xxx_snapshot_archive()` functions. These fns call + /// `Bank::rehash()` to handle if the user manually modified any accounts and thus requires + /// calculating the bank hash again. Since calculating the bank hash *takes* the skipped + /// rewrites, this second time will not have any skipped rewrites, and thus the hash would be + /// updated to the wrong value. So, rebuild the skipped rewrites before rehashing. + fn rebuild_skipped_rewrites(&self) { + // If the feature gate to *not* add rent collection rewrites to the bank hash is enabled, + // then do *not* add anything to our skipped_rewrites. + if self.bank_hash_skips_rent_rewrites() { + return; + } + + let (skipped_rewrites, measure_skipped_rewrites) = + measure!(self.calculate_skipped_rewrites()); + info!( + "Rebuilding skipped rewrites of {} accounts{measure_skipped_rewrites}", + skipped_rewrites.len() + ); + + *self.skipped_rewrites.lock().unwrap() = skipped_rewrites; + } + + /// Calculates (and returns) skipped rewrites for this bank + /// + /// Refer to `rebuild_skipped_rewrites()` for more documentation. + /// This implementaion is purposely separate to facilitate testing. + /// + /// The key observation is that accounts in Bank::skipped_rewrites are only used IFF the + /// specific account is *not* already in the accounts delta hash. If an account is not in + /// the accounts delta hash, then it means the account was not modified. Since (basically) + /// all accounts are rent exempt, this means (basically) all accounts are unmodified by rent + /// collection. So we just need to load the accounts that would've been checked for rent + /// collection, hash them, and add them to Bank::skipped_rewrites. + /// + /// As of this writing, there are ~350 million acounts on mainnet-beta. + /// Rent collection almost always collects a single slot at a time. + /// So 1 slot of 432,000, of 350 million accounts, is ~800 accounts per slot. + /// Since we haven't started processing anything yet, it should be fast enough to simply + /// load the accounts directly. + /// Empirically, this takes about 3-4 milliseconds. + fn calculate_skipped_rewrites(&self) -> HashMap { + // The returned skipped rewrites may include accounts that were actually *not* skipped! + // (This is safe, as per the fn's documentation above.) + HashMap::from_iter( + self.rent_collection_partitions() + .into_iter() + .map(accounts_partition::pubkey_range_from_partition) + .flat_map(|pubkey_range| { + self.rc + .accounts + .load_to_collect_rent_eagerly(&self.ancestors, pubkey_range) + }) + .map(|(pubkey, account, _slot)| { + let account_hash = AccountsDb::hash_account(&account, &pubkey); + (pubkey, account_hash) + }), + ) + } + fn collect_rent_eagerly(&self) { if self.lazy_rent_collection.load(Relaxed) { return; @@ -5971,8 +6036,6 @@ impl Bank { /// collect rent and update 'account.rent_epoch' as necessary /// store accounts, whether rent was collected or not (depending on whether we skipping rewrites is enabled) /// update bank's rewrites set for all rewrites that were skipped - /// if 'just_rewrites', function will only update bank's rewrites set and not actually store any accounts. - /// This flag is used when restoring from a snapshot to calculate and verify the initial bank's delta hash. fn collect_rent_in_range( &self, partition: Partition, @@ -7464,22 +7527,9 @@ impl Bank { } }); - let (verified_bank, verify_bank_time_us) = measure_us!({ - let should_verify_bank = !self - .rc - .accounts - .accounts_db - .test_skip_rewrites_but_include_in_bank_hash; - if should_verify_bank { - info!("Verifying bank..."); - let verified = self.verify_hash(); - info!("Verifying bank... Done."); - verified - } else { - info!("Verifying bank... Skipped."); - true - } - }); + info!("Verifying bank..."); + let (verified_bank, verify_bank_time_us) = measure_us!(self.verify_hash()); + info!("Verifying bank... Done."); datapoint_info!( "verify_snapshot_bank", diff --git a/runtime/src/bank/tests.rs b/runtime/src/bank/tests.rs index d8c7679df..a183798ea 100644 --- a/runtime/src/bank/tests.rs +++ b/runtime/src/bank/tests.rs @@ -16,6 +16,7 @@ use { create_genesis_config_with_leader, create_genesis_config_with_vote_accounts, genesis_sysvar_and_builtin_program_lamports, GenesisConfigInfo, ValidatorVoteKeypairs, }, + snapshot_bank_utils, snapshot_utils, status_cache::MAX_CACHE_ENTRIES, }, assert_matches::assert_matches, @@ -127,6 +128,7 @@ use { thread::Builder, time::{Duration, Instant}, }, + tempfile::TempDir, test_case::test_case, }; @@ -13966,3 +13968,114 @@ fn test_rehash_with_skipped_rewrites() { let bank_rehash = bank.hash(); assert_eq!(bank_rehash, bank_hash); } + +/// Test that skipped_rewrites are properly rebuilt when booting from a snapshot +/// that was generated by a node skipping rewrites. +#[test] +fn test_rebuild_skipped_rewrites() { + let genesis_config = GenesisConfig::default(); + let accounts_db_config = AccountsDbConfig { + test_skip_rewrites_but_include_in_bank_hash: true, + ..ACCOUNTS_DB_CONFIG_FOR_TESTING + }; + let bank = Arc::new(Bank::new_with_paths( + &genesis_config, + Arc::new(RuntimeConfig::default()), + Vec::default(), + None, + None, + AccountSecondaryIndexes::default(), + AccountShrinkThreshold::default(), + false, + Some(accounts_db_config.clone()), + None, + Arc::new(AtomicBool::new(false)), + )); + // This test is only meaningful while the bank hash contains rewrites. + // Once this feature is enabled, it may be possible to remove this test entirely. + assert!(!bank.bank_hash_skips_rent_rewrites()); + + // Store an account *in this bank* that will be checked for rent collection *in the next bank* + let pubkey = { + let rent_collection_partition = bank + .variable_cycle_partitions_between_slots(bank.slot(), bank.slot() + 1) + .last() + .copied() + .unwrap(); + let pubkey_range = + accounts_partition::pubkey_range_from_partition(rent_collection_partition); + *pubkey_range.end() + }; + let mut account = AccountSharedData::new(123_456_789, 0, &Pubkey::default()); + // The account's rent epoch must be set to EXEMPT + // in order for its rewrite to be skipped by rent collection. + account.set_rent_epoch(RENT_EXEMPT_RENT_EPOCH); + bank.store_account_and_update_capitalization(&pubkey, &account); + + // Create a new bank that will do rent collection on the account stored in the previous slot + let bank = Arc::new(Bank::new_from_parent( + bank.clone(), + &Pubkey::new_unique(), + bank.slot() + 1, + )); + + // This fn is called within freeze(), but freeze() *consumes* Self::skipped_rewrites! + // For testing, we want to know what's in the skipped rewrites, so we perform + // rent collection manually. + bank.collect_rent_eagerly(); + let actual_skipped_rewrites = bank.skipped_rewrites.lock().unwrap().clone(); + // Ensure skipped rewrites now includes the account we stored above + assert!(actual_skipped_rewrites.contains_key(&pubkey)); + // Ensure the calculated skipped rewrites match the actual ones + let calculated_skipped_rewrites = bank.calculate_skipped_rewrites(); + assert_eq!(calculated_skipped_rewrites, actual_skipped_rewrites); + + // required in order to snapshot the bank + bank.fill_bank_with_ticks_for_tests(); + + // Now take a snapshot! + let (_tmp_dir, accounts_dir) = snapshot_utils::create_tmp_accounts_dir_for_tests(); + let bank_snapshots_dir = TempDir::new().unwrap(); + let full_snapshot_archives_dir = TempDir::new().unwrap(); + let incremental_snapshot_archives_dir = TempDir::new().unwrap(); + let full_snapshot_archive = snapshot_bank_utils::bank_to_full_snapshot_archive( + bank_snapshots_dir.path(), + &bank, + None, + full_snapshot_archives_dir.path(), + incremental_snapshot_archives_dir.path(), + snapshot_utils::ArchiveFormat::Tar, + snapshot_utils::DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN, + snapshot_utils::DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN, + ) + .unwrap(); + + // Rebuild the bank and ensure it passes verification + let (snapshot_bank, _) = snapshot_bank_utils::bank_from_snapshot_archives( + &[accounts_dir], + bank_snapshots_dir.path(), + &full_snapshot_archive, + None, + &genesis_config, + &RuntimeConfig::default(), + None, + None, + AccountSecondaryIndexes::default(), + None, + AccountShrinkThreshold::default(), + false, + false, + false, + false, + Some(ACCOUNTS_DB_CONFIG_FOR_TESTING), + None, + Arc::new(AtomicBool::new(false)), + ) + .unwrap(); + snapshot_bank.wait_for_initial_accounts_hash_verification_completed_for_tests(); + assert_eq!(bank.as_ref(), &snapshot_bank); + + // Ensure the snapshot bank's skipped rewrites match the original bank's + let snapshot_skipped_rewrites = snapshot_bank.calculate_skipped_rewrites(); + assert_eq!(snapshot_skipped_rewrites, actual_skipped_rewrites); +}