Rebuilds skipped rewrites when loading from a snapshot (#34280)

This commit is contained in:
Brooks 2023-12-04 14:23:15 -05:00 committed by GitHub
parent efaec08f88
commit 4bba59f063
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 181 additions and 18 deletions

View File

@ -1903,6 +1903,7 @@ impl Bank {
debug_do_not_add_builtins,
);
bank.fill_missing_sysvar_cache_entries();
bank.rebuild_skipped_rewrites();
// Sanity assertions between bank snapshot and genesis config
// Consider removing from serializable bank state
@ -5712,6 +5713,70 @@ impl Bank {
});
}
/// After deserialize, populate skipped rewrites with accounts that would normally
/// have had their data rewritten in this slot due to rent collection (but didn't).
///
/// This is required when starting up from a snapshot to verify the bank hash.
///
/// A second usage is from the `bank_to_xxx_snapshot_archive()` functions. These fns call
/// `Bank::rehash()` to handle if the user manually modified any accounts and thus requires
/// calculating the bank hash again. Since calculating the bank hash *takes* the skipped
/// rewrites, this second time will not have any skipped rewrites, and thus the hash would be
/// updated to the wrong value. So, rebuild the skipped rewrites before rehashing.
fn rebuild_skipped_rewrites(&self) {
// If the feature gate to *not* add rent collection rewrites to the bank hash is enabled,
// then do *not* add anything to our skipped_rewrites.
if self.bank_hash_skips_rent_rewrites() {
return;
}
let (skipped_rewrites, measure_skipped_rewrites) =
measure!(self.calculate_skipped_rewrites());
info!(
"Rebuilding skipped rewrites of {} accounts{measure_skipped_rewrites}",
skipped_rewrites.len()
);
*self.skipped_rewrites.lock().unwrap() = skipped_rewrites;
}
/// Calculates (and returns) skipped rewrites for this bank
///
/// Refer to `rebuild_skipped_rewrites()` for more documentation.
/// This implementaion is purposely separate to facilitate testing.
///
/// The key observation is that accounts in Bank::skipped_rewrites are only used IFF the
/// specific account is *not* already in the accounts delta hash. If an account is not in
/// the accounts delta hash, then it means the account was not modified. Since (basically)
/// all accounts are rent exempt, this means (basically) all accounts are unmodified by rent
/// collection. So we just need to load the accounts that would've been checked for rent
/// collection, hash them, and add them to Bank::skipped_rewrites.
///
/// As of this writing, there are ~350 million acounts on mainnet-beta.
/// Rent collection almost always collects a single slot at a time.
/// So 1 slot of 432,000, of 350 million accounts, is ~800 accounts per slot.
/// Since we haven't started processing anything yet, it should be fast enough to simply
/// load the accounts directly.
/// Empirically, this takes about 3-4 milliseconds.
fn calculate_skipped_rewrites(&self) -> HashMap<Pubkey, AccountHash> {
// The returned skipped rewrites may include accounts that were actually *not* skipped!
// (This is safe, as per the fn's documentation above.)
HashMap::from_iter(
self.rent_collection_partitions()
.into_iter()
.map(accounts_partition::pubkey_range_from_partition)
.flat_map(|pubkey_range| {
self.rc
.accounts
.load_to_collect_rent_eagerly(&self.ancestors, pubkey_range)
})
.map(|(pubkey, account, _slot)| {
let account_hash = AccountsDb::hash_account(&account, &pubkey);
(pubkey, account_hash)
}),
)
}
fn collect_rent_eagerly(&self) {
if self.lazy_rent_collection.load(Relaxed) {
return;
@ -5971,8 +6036,6 @@ impl Bank {
/// collect rent and update 'account.rent_epoch' as necessary
/// store accounts, whether rent was collected or not (depending on whether we skipping rewrites is enabled)
/// update bank's rewrites set for all rewrites that were skipped
/// if 'just_rewrites', function will only update bank's rewrites set and not actually store any accounts.
/// This flag is used when restoring from a snapshot to calculate and verify the initial bank's delta hash.
fn collect_rent_in_range(
&self,
partition: Partition,
@ -7464,22 +7527,9 @@ impl Bank {
}
});
let (verified_bank, verify_bank_time_us) = measure_us!({
let should_verify_bank = !self
.rc
.accounts
.accounts_db
.test_skip_rewrites_but_include_in_bank_hash;
if should_verify_bank {
info!("Verifying bank...");
let verified = self.verify_hash();
info!("Verifying bank... Done.");
verified
} else {
info!("Verifying bank... Skipped.");
true
}
});
info!("Verifying bank...");
let (verified_bank, verify_bank_time_us) = measure_us!(self.verify_hash());
info!("Verifying bank... Done.");
datapoint_info!(
"verify_snapshot_bank",

View File

@ -16,6 +16,7 @@ use {
create_genesis_config_with_leader, create_genesis_config_with_vote_accounts,
genesis_sysvar_and_builtin_program_lamports, GenesisConfigInfo, ValidatorVoteKeypairs,
},
snapshot_bank_utils, snapshot_utils,
status_cache::MAX_CACHE_ENTRIES,
},
assert_matches::assert_matches,
@ -127,6 +128,7 @@ use {
thread::Builder,
time::{Duration, Instant},
},
tempfile::TempDir,
test_case::test_case,
};
@ -13966,3 +13968,114 @@ fn test_rehash_with_skipped_rewrites() {
let bank_rehash = bank.hash();
assert_eq!(bank_rehash, bank_hash);
}
/// Test that skipped_rewrites are properly rebuilt when booting from a snapshot
/// that was generated by a node skipping rewrites.
#[test]
fn test_rebuild_skipped_rewrites() {
let genesis_config = GenesisConfig::default();
let accounts_db_config = AccountsDbConfig {
test_skip_rewrites_but_include_in_bank_hash: true,
..ACCOUNTS_DB_CONFIG_FOR_TESTING
};
let bank = Arc::new(Bank::new_with_paths(
&genesis_config,
Arc::new(RuntimeConfig::default()),
Vec::default(),
None,
None,
AccountSecondaryIndexes::default(),
AccountShrinkThreshold::default(),
false,
Some(accounts_db_config.clone()),
None,
Arc::new(AtomicBool::new(false)),
));
// This test is only meaningful while the bank hash contains rewrites.
// Once this feature is enabled, it may be possible to remove this test entirely.
assert!(!bank.bank_hash_skips_rent_rewrites());
// Store an account *in this bank* that will be checked for rent collection *in the next bank*
let pubkey = {
let rent_collection_partition = bank
.variable_cycle_partitions_between_slots(bank.slot(), bank.slot() + 1)
.last()
.copied()
.unwrap();
let pubkey_range =
accounts_partition::pubkey_range_from_partition(rent_collection_partition);
*pubkey_range.end()
};
let mut account = AccountSharedData::new(123_456_789, 0, &Pubkey::default());
// The account's rent epoch must be set to EXEMPT
// in order for its rewrite to be skipped by rent collection.
account.set_rent_epoch(RENT_EXEMPT_RENT_EPOCH);
bank.store_account_and_update_capitalization(&pubkey, &account);
// Create a new bank that will do rent collection on the account stored in the previous slot
let bank = Arc::new(Bank::new_from_parent(
bank.clone(),
&Pubkey::new_unique(),
bank.slot() + 1,
));
// This fn is called within freeze(), but freeze() *consumes* Self::skipped_rewrites!
// For testing, we want to know what's in the skipped rewrites, so we perform
// rent collection manually.
bank.collect_rent_eagerly();
let actual_skipped_rewrites = bank.skipped_rewrites.lock().unwrap().clone();
// Ensure skipped rewrites now includes the account we stored above
assert!(actual_skipped_rewrites.contains_key(&pubkey));
// Ensure the calculated skipped rewrites match the actual ones
let calculated_skipped_rewrites = bank.calculate_skipped_rewrites();
assert_eq!(calculated_skipped_rewrites, actual_skipped_rewrites);
// required in order to snapshot the bank
bank.fill_bank_with_ticks_for_tests();
// Now take a snapshot!
let (_tmp_dir, accounts_dir) = snapshot_utils::create_tmp_accounts_dir_for_tests();
let bank_snapshots_dir = TempDir::new().unwrap();
let full_snapshot_archives_dir = TempDir::new().unwrap();
let incremental_snapshot_archives_dir = TempDir::new().unwrap();
let full_snapshot_archive = snapshot_bank_utils::bank_to_full_snapshot_archive(
bank_snapshots_dir.path(),
&bank,
None,
full_snapshot_archives_dir.path(),
incremental_snapshot_archives_dir.path(),
snapshot_utils::ArchiveFormat::Tar,
snapshot_utils::DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
snapshot_utils::DEFAULT_MAX_INCREMENTAL_SNAPSHOT_ARCHIVES_TO_RETAIN,
)
.unwrap();
// Rebuild the bank and ensure it passes verification
let (snapshot_bank, _) = snapshot_bank_utils::bank_from_snapshot_archives(
&[accounts_dir],
bank_snapshots_dir.path(),
&full_snapshot_archive,
None,
&genesis_config,
&RuntimeConfig::default(),
None,
None,
AccountSecondaryIndexes::default(),
None,
AccountShrinkThreshold::default(),
false,
false,
false,
false,
Some(ACCOUNTS_DB_CONFIG_FOR_TESTING),
None,
Arc::new(AtomicBool::new(false)),
)
.unwrap();
snapshot_bank.wait_for_initial_accounts_hash_verification_completed_for_tests();
assert_eq!(bank.as_ref(), &snapshot_bank);
// Ensure the snapshot bank's skipped rewrites match the original bank's
let snapshot_skipped_rewrites = snapshot_bank.calculate_skipped_rewrites();
assert_eq!(snapshot_skipped_rewrites, actual_skipped_rewrites);
}