ledger-tool minimized snapshots (#25334)

* working on local snapshot

* Parallelization for slot storage minimization

* Additional clean-up and fixes

* make --minimize an option of create-snapshot

* remove now unnecessary function

* Parallelize parts of minimized account set generation

* clippy fixes

* Add rent collection accounts and voting node_pubkeys

* Simplify programdata_accounts generation

* Loop over storages to get slot set

* Parallelize minimized slot set generation

* Parallelize adding owners and programdata_accounts

* Remove some now unncessary checks on the blockstore

* Add a warning for minimized snapshots across epoch boundary

* Simplify ledger-tool minimize

* Clarify names of bank's minimization helper functions

* Remove unnecesary funciton, fix line spacing

* Use DashSets instead of HashSets for minimized account and slot sets

* Filter storages uses all threads instead of thread_pool

* Add some additional comments on functions for minimization

* Moved more into bank and parallelized

* Update programs/bpf/Cargo.lock for dashmap in ledger

* Clippy fix

* ledger-tool: convert minimize_bank_for_snapshot Measure into measure!

* bank.rs: convert minimize_bank_for_snapshot Measure into measure!

* accounts_db.rs: convert minimize_accounts_db Measure into measure!

* accounts_db.rs: add comment about use of minimize_accounts_db

* ledger-tool: CLI argument clarification

* minimization functions: make infos unique

* bank.rs: Add test_get_rent_collection_accounts_between_slots

* bank.rs: Add test_minimization_add_vote_accounts

* bank.rs: Add test_minimization_add_stake_accounts

* bank.rs: Add test_minimization_add_owner_accounts

* bank.rs: Add test_minimization_add_programdata_accounts

* accounts_db.rs: Add test_minimize_accounts_db

* bank.rs: Add negative case and comments in test_get_rent_collection_accounts_between_slots

* bank.rs: Negative test in test_minimization_add_programdata_accounts

* use new static runtime and sdk ids

* bank comments to doc comments

* Only need to insert the maximum slot a key is found in

* rename remove_pubkeys to purge_pubkeys

* add comment on builtins::get_pubkeys

* prevent excessive logging of removed dead slots

* don't need to remove slot from shrink slot candidates

* blockstore.rs: get_accounts_used_in_range shouldn't return Result

* blockstore.rs: get_accounts_used_in_range: parallelize slot loop

* report filtering progress on time instead of count

* parallelize loop over snapshot storages

* WIP: move some bank minimization functionality into a new class

* WIP: move some accounts_db minimization functionality into SnapshotMinimizer

* WIP: Use new SnapshotMinimizer

* SnapshotMinimizer: fix use statements

* remove bank and accounts_db minimization code, where possible

* measure! doesn't take a closure

* fix use statement in blockstore

* log_dead_slots does not need pub(crate)

* get_unique_accounts_from_storages does not need pub(crate)

* different way to get stake accounts/nodes

* fix tests

* move rent collection account functionality to snapshot minimizer

* move accounts_db minimize behavior to snapshot minimizer

* clean up

* Use bank reference instead of Arc. Additional comments

* Add a comment to blockstore function

* Additional clarifying comments

* Moved all non-transaction account accumulation into the SnapshotMinimizer.

* transaction_account_set does not need to be mutable now

* Add comment about load_to_collect_rent_eagerly

* Update log_dead_slots comment

* remove duplicate measure/print of get_minimized_slot_set
This commit is contained in:
apfitzge 2022-06-22 13:17:43 -04:00 committed by GitHub
parent cf5ec13f59
commit f4189c0305
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 871 additions and 19 deletions

1
Cargo.lock generated
View File

@ -5218,6 +5218,7 @@ dependencies = [
"chrono",
"chrono-humanize",
"crossbeam-channel",
"dashmap",
"fs_extra",
"futures 0.3.21",
"itertools",

View File

@ -31,7 +31,7 @@ use {
blockstore_processor::{BlockstoreProcessorError, ProcessOptions},
shred::Shred,
},
solana_measure::measure::Measure,
solana_measure::{measure, measure::Measure},
solana_runtime::{
accounts_db::{AccountsDbConfig, FillerAccountsConfig},
accounts_index::{AccountsIndexConfig, IndexLimitMb, ScanConfig},
@ -44,6 +44,7 @@ use {
snapshot_archive_info::SnapshotArchiveInfoGetter,
snapshot_config::SnapshotConfig,
snapshot_hash::StartingSnapshotHashes,
snapshot_minimizer::SnapshotMinimizer,
snapshot_utils::{
self, ArchiveFormat, SnapshotVersion, DEFAULT_ARCHIVE_COMPRESSION,
DEFAULT_MAX_FULL_SNAPSHOT_ARCHIVES_TO_RETAIN,
@ -920,6 +921,25 @@ fn open_genesis_config_by(ledger_path: &Path, matches: &ArgMatches<'_>) -> Genes
open_genesis_config(ledger_path, max_genesis_archive_unpacked_size)
}
/// Finds the accounts needed to replay slots `snapshot_slot` to `ending_slot`.
/// Removes all other accounts from accounts_db, and updates the accounts hash
/// and capitalization. This is used by the --minimize option in create-snapshot
fn minimize_bank_for_snapshot(
blockstore: &Blockstore,
bank: &Bank,
snapshot_slot: Slot,
ending_slot: Slot,
) {
let (transaction_account_set, transaction_accounts_measure) = measure!(
blockstore.get_accounts_used_in_range(snapshot_slot, ending_slot),
"get transaction accounts"
);
let total_accounts_len = transaction_account_set.len();
info!("Added {total_accounts_len} accounts from transactions. {transaction_accounts_measure}");
SnapshotMinimizer::minimize(bank, snapshot_slot, ending_slot, transaction_account_set);
}
fn assert_capitalization(bank: &Bank) {
let debug_verify = true;
assert!(bank.calculate_and_verify_capitalization(debug_verify));
@ -1592,6 +1612,23 @@ fn main() {
base for the incremental snapshot.")
.conflicts_with("no_snapshot")
)
.arg(
Arg::with_name("minimized")
.long("minimized")
.takes_value(false)
.help("Create a minimized snapshot instead of a full snapshot. This snapshot \
will only include information needed to replay the ledger from the \
snapshot slot to the ending slot.")
.conflicts_with("incremental")
.requires("ending_slot")
)
.arg(
Arg::with_name("ending_slot")
.long("ending-slot")
.takes_value(true)
.value_name("ENDING_SLOT")
.help("Ending slot for minimized snapshot creation")
)
.arg(
Arg::with_name("snapshot_archive_format")
.long("snapshot-archive-format")
@ -1600,8 +1637,9 @@ fn main() {
.value_name("ARCHIVE_TYPE")
.takes_value(true)
.help("Snapshot archive format to use.")
.conflicts_with("no_snapshot")
)
).subcommand(
).subcommand(
SubCommand::with_name("accounts")
.about("Print account stats and contents after processing the ledger")
.arg(&no_snapshot_arg)
@ -2334,6 +2372,7 @@ fn main() {
}
("create-snapshot", Some(arg_matches)) => {
let is_incremental = arg_matches.is_present("incremental");
let is_minimized = arg_matches.is_present("minimized");
let output_directory = value_t!(arg_matches, "output_directory", PathBuf)
.unwrap_or_else(|_| {
match (
@ -2420,9 +2459,32 @@ fn main() {
value_t_or_exit!(arg_matches, "snapshot_slot", Slot)
};
let ending_slot = if is_minimized {
let ending_slot = value_t_or_exit!(arg_matches, "ending_slot", Slot);
if ending_slot <= snapshot_slot {
eprintln!(
"ending_slot ({}) must be greater than snapshot_slot ({})",
ending_slot, snapshot_slot
);
exit(1);
}
Some(ending_slot)
} else {
None
};
let snapshot_type_str = if is_incremental {
"incremental "
} else if is_minimized {
"minimized "
} else {
""
};
info!(
"Creating {}snapshot of slot {} in {}",
if is_incremental { "incremental " } else { "" },
snapshot_type_str,
snapshot_slot,
output_directory.display()
);
@ -2654,10 +2716,19 @@ fn main() {
bank
};
if is_minimized {
minimize_bank_for_snapshot(
&blockstore,
&bank,
snapshot_slot,
ending_slot.unwrap(),
);
}
println!(
"Creating a version {} {}snapshot of slot {}",
snapshot_version,
if is_incremental { "incremental " } else { "" },
snapshot_type_str,
bank.slot(),
);
@ -2723,6 +2794,16 @@ fn main() {
bank.hash(),
full_snapshot_archive_info.path().display(),
);
if is_minimized {
let starting_epoch = bank.epoch_schedule().get_epoch(snapshot_slot);
let ending_epoch =
bank.epoch_schedule().get_epoch(ending_slot.unwrap());
if starting_epoch != ending_epoch {
warn!("Minimized snapshot range crosses epoch boundary ({} to {}). Bank hashes after {} will not match replays from a full snapshot",
starting_epoch, ending_epoch, bank.epoch_schedule().get_last_slot_in_epoch(starting_epoch));
}
}
}
println!(

View File

@ -16,6 +16,7 @@ byteorder = "1.4.3"
chrono = { version = "0.4.11", features = ["serde"] }
chrono-humanize = "0.2.1"
crossbeam-channel = "0.5"
dashmap = { version = "4.0.2", features = ["rayon", "raw-api"] }
fs_extra = "1.2.0"
futures = "0.3.21"
itertools = "0.10.3"

View File

@ -1,6 +1,7 @@
//! The `blockstore` module provides functions for parallel verification of the
//! Proof of History ledger as well as iterative read, append write, and random
//! access read to a persistent file-based ledger.
use {
crate::{
ancestor_iterator::AncestorIterator,
@ -22,9 +23,10 @@ use {
},
bincode::deserialize,
crossbeam_channel::{bounded, Receiver, Sender, TrySendError},
dashmap::DashSet,
log::*,
rayon::{
iter::{IntoParallelRefIterator, ParallelIterator},
iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator},
ThreadPool,
},
rocksdb::DBRawIterator,
@ -2749,6 +2751,32 @@ impl Blockstore {
Ok((entries, num_shreds, slot_meta.is_full()))
}
/// Gets accounts used in transactions in the slot range [starting_slot, ending_slot].
/// Used by ledger-tool to create a minimized snapshot
pub fn get_accounts_used_in_range(
&self,
starting_slot: Slot,
ending_slot: Slot,
) -> DashSet<Pubkey> {
let result = DashSet::new();
(starting_slot..=ending_slot)
.into_par_iter()
.for_each(|slot| {
if let Ok(entries) = self.get_slot_entries(slot, 0) {
entries.par_iter().for_each(|entry| {
entry.transactions.iter().for_each(|tx| {
tx.message.static_account_keys().iter().for_each(|pubkey| {
result.insert(*pubkey);
});
});
});
}
});
result
}
fn get_completed_ranges(
&self,
slot: Slot,

View File

@ -4731,6 +4731,7 @@ dependencies = [
"chrono",
"chrono-humanize",
"crossbeam-channel",
"dashmap",
"fs_extra",
"futures 0.3.21",
"itertools",

View File

@ -1144,6 +1144,10 @@ pub struct AccountsDb {
// lower passes = faster total time, higher dynamic memory usage
// passes=2 cuts dynamic memory usage in approximately half.
pub num_hash_scan_passes: Option<usize>,
/// Used to disable logging dead slots during removal.
/// allow disabling noisy log
pub(crate) log_dead_slots: AtomicBool,
}
#[derive(Debug, Default)]
@ -1172,7 +1176,7 @@ pub struct AccountsStats {
}
#[derive(Debug, Default)]
struct PurgeStats {
pub(crate) struct PurgeStats {
last_report: AtomicInterval,
safety_checks_elapsed: AtomicU64,
remove_cache_elapsed: AtomicU64,
@ -1948,6 +1952,7 @@ impl AccountsDb {
filler_accounts_config: FillerAccountsConfig::default(),
filler_account_suffix: None,
num_hash_scan_passes,
log_dead_slots: AtomicBool::new(true),
}
}
@ -2270,7 +2275,7 @@ impl AccountsDb {
self.sender_bg_hasher = Some(sender);
}
fn purge_keys_exact<'a, C: 'a>(
pub(crate) fn purge_keys_exact<'a, C: 'a>(
&'a self,
pubkey_to_slot_set: impl Iterator<Item = &'a (Pubkey, C)>,
) -> Vec<(u64, AccountInfo)>
@ -3032,7 +3037,7 @@ impl AccountsDb {
/// get all accounts in all the storages passed in
/// for duplicate pubkeys, the account with the highest write_value is returned
fn get_unique_accounts_from_storages<'a, I>(
pub(crate) fn get_unique_accounts_from_storages<'a, I>(
&'a self,
stores: I,
) -> (HashMap<Pubkey, FoundStoredAccount>, usize, u64)
@ -3252,7 +3257,7 @@ impl AccountsDb {
/// get stores for 'slot'
/// retain only the stores where 'should_retain(store)' == true
/// for stores not retained, insert in 'dirty_stores' and 'dead_storages'
fn mark_dirty_dead_stores(
pub(crate) fn mark_dirty_dead_stores(
&self,
slot: Slot,
dead_storages: &mut Vec<Arc<AccountStorageEntry>>,
@ -3272,7 +3277,7 @@ impl AccountsDb {
}
}
fn drop_or_recycle_stores(&self, dead_storages: Vec<Arc<AccountStorageEntry>>) {
pub(crate) fn drop_or_recycle_stores(&self, dead_storages: Vec<Arc<AccountStorageEntry>>) {
let mut recycle_stores_write_elapsed = Measure::start("recycle_stores_write_time");
let mut recycle_stores = self.recycle_stores.write().unwrap();
recycle_stores_write_elapsed.stop();
@ -3298,7 +3303,7 @@ impl AccountsDb {
}
/// return a store that can contain 'aligned_total' bytes and the time it took to execute
fn get_store_for_shrink(
pub(crate) fn get_store_for_shrink(
&self,
slot: Slot,
aligned_total: u64,
@ -4742,7 +4747,7 @@ impl AccountsDb {
store
}
fn page_align(size: u64) -> u64 {
pub(crate) fn page_align(size: u64) -> u64 {
(size + (PAGE_SIZE - 1)) & !(PAGE_SIZE - 1)
}
@ -4898,7 +4903,7 @@ impl AccountsDb {
/// Purges every slot in `removed_slots` from both the cache and storage. This includes
/// entries in the accounts index, cache entries, and any backing storage entries.
fn purge_slots_from_cache_and_store<'a>(
pub(crate) fn purge_slots_from_cache_and_store<'a>(
&self,
removed_slots: impl Iterator<Item = &'a Slot> + Clone,
purge_stats: &PurgeStats,
@ -7320,7 +7325,9 @@ impl AccountsDb {
.collect();
measure.stop();
accounts_index_root_stats.clean_dead_slot_us += measure.as_us();
info!("remove_dead_slots_metadata: slots {:?}", dead_slots);
if self.log_dead_slots.load(Ordering::Relaxed) {
info!("remove_dead_slots_metadata: slots {:?}", dead_slots);
}
accounts_index_root_stats.rooted_cleaned_count += rooted_cleaned_count;
accounts_index_root_stats.unrooted_cleaned_count += unrooted_cleaned_count;
@ -7570,7 +7577,7 @@ impl AccountsDb {
);
}
fn store_accounts_frozen<'a, T: ReadableAccount + Sync + ZeroLamport>(
pub(crate) fn store_accounts_frozen<'a, T: ReadableAccount + Sync + ZeroLamport>(
&'a self,
accounts: impl StorableAccounts<'a, T>,
hashes: Option<&[impl Borrow<Hash>]>,

View File

@ -71,7 +71,7 @@ use {
vote_parser,
},
byteorder::{ByteOrder, LittleEndian},
dashmap::DashMap,
dashmap::{DashMap, DashSet},
itertools::Itertools,
log::*,
rand::Rng,
@ -5169,6 +5169,25 @@ impl Bank {
self.collect_rent_eagerly(true);
}
/// Get stake and stake node accounts
pub(crate) fn get_stake_accounts(&self, minimized_account_set: &DashSet<Pubkey>) {
self.stakes_cache
.stakes()
.stake_delegations()
.iter()
.for_each(|(pubkey, _)| {
minimized_account_set.insert(*pubkey);
});
self.stakes_cache
.stakes()
.staked_nodes()
.par_iter()
.for_each(|(pubkey, _)| {
minimized_account_set.insert(*pubkey);
});
}
fn collect_rent_eagerly(&self, just_rewrites: bool) {
if self.lazy_rent_collection.load(Relaxed) {
return;
@ -5707,7 +5726,7 @@ impl Bank {
partitions
}
fn fixed_cycle_partitions_between_slots(
pub(crate) fn fixed_cycle_partitions_between_slots(
&self,
starting_slot: Slot,
ending_slot: Slot,
@ -5748,7 +5767,7 @@ impl Bank {
)
}
fn variable_cycle_partitions_between_slots(
pub(crate) fn variable_cycle_partitions_between_slots(
&self,
starting_slot: Slot,
ending_slot: Slot,
@ -5957,7 +5976,7 @@ impl Bank {
&& self.slot_count_per_normal_epoch() < self.slot_count_in_two_day()
}
fn use_fixed_collection_cycle(&self) -> bool {
pub(crate) fn use_fixed_collection_cycle(&self) -> bool {
// Force normal behavior, disabling fixed collection cycle for manual local testing
#[cfg(not(test))]
if self.slot_count_per_normal_epoch() == solana_sdk::epoch_schedule::MINIMUM_SLOTS_PER_EPOCH

View File

@ -231,3 +231,28 @@ pub(crate) fn get() -> Builtins {
feature_transitions: builtin_feature_transitions(),
}
}
/// Returns the addresses of all builtin programs.
pub fn get_pubkeys() -> Vec<Pubkey> {
let builtins = get();
let mut pubkeys = Vec::new();
pubkeys.extend(builtins.genesis_builtins.iter().map(|b| b.id));
pubkeys.extend(
builtins
.feature_transitions
.iter()
.filter_map(|f| match &f.0 {
InnerBuiltinFeatureTransition::Add {
builtin,
feature_id: _,
} => Some(builtin.id),
InnerBuiltinFeatureTransition::RemoveOrRetain {
previously_added_builtin: _,
addition_feature_id: _,
removal_feature_id: _,
} => None,
}),
);
pubkeys
}

View File

@ -58,6 +58,7 @@ mod shared_buffer_reader;
pub mod snapshot_archive_info;
pub mod snapshot_config;
pub mod snapshot_hash;
pub mod snapshot_minimizer;
pub mod snapshot_package;
pub mod snapshot_utils;
pub mod sorted_storages;

View File

@ -0,0 +1,688 @@
//! Used to create minimal snapshots - separated here to keep accounts_db simpler
use {
crate::{
accounts_db::{AccountStorageEntry, AccountsDb, PurgeStats},
bank::Bank,
builtins, static_ids,
},
dashmap::DashSet,
log::info,
rayon::{
iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator},
prelude::ParallelSlice,
},
solana_measure::measure,
solana_sdk::{
account::ReadableAccount,
account_utils::StateMut,
bpf_loader_upgradeable::{self, UpgradeableLoaderState},
clock::Slot,
pubkey::Pubkey,
sdk_ids,
},
std::{
collections::HashSet,
sync::{
atomic::{AtomicUsize, Ordering},
Arc, Mutex,
},
},
};
/// Used to modify bank and accounts_db to create a minimized snapshot
pub struct SnapshotMinimizer<'a> {
bank: &'a Bank,
starting_slot: Slot,
ending_slot: Slot,
minimized_account_set: DashSet<Pubkey>,
}
impl<'a> SnapshotMinimizer<'a> {
/// Removes all accounts not necessary for replaying slots in the range [starting_slot, ending_slot].
/// `transaction_account_set` should contain accounts used in transactions in the slot range [starting_slot, ending_slot].
/// This function will accumulate other accounts (rent colleciton, builtins, etc) necessary to replay transactions.
///
/// This function will modify accounts_db by removing accounts not needed to replay [starting_slot, ending_slot],
/// and update the bank's capitalization.
pub fn minimize(
bank: &'a Bank,
starting_slot: Slot,
ending_slot: Slot,
transaction_account_set: DashSet<Pubkey>,
) {
let minimizer = SnapshotMinimizer {
bank,
starting_slot,
ending_slot,
minimized_account_set: transaction_account_set,
};
minimizer.add_accounts(Self::get_active_bank_features, "active bank features");
minimizer.add_accounts(Self::get_inactive_bank_features, "inactive bank features");
minimizer.add_accounts(Self::get_builtins, "builtin accounts");
minimizer.add_accounts(Self::get_static_runtime_accounts, "static runtime accounts");
minimizer.add_accounts(Self::get_sdk_accounts, "sdk accounts");
minimizer.add_accounts(
Self::get_rent_collection_accounts,
"rent collection accounts",
);
minimizer.add_accounts(Self::get_vote_accounts, "vote accounts");
minimizer.add_accounts(Self::get_stake_accounts, "stake accounts");
minimizer.add_accounts(Self::get_owner_accounts, "owner accounts");
minimizer.add_accounts(Self::get_programdata_accounts, "programdata accounts");
minimizer.minimize_accounts_db();
// Update accounts_cache and capitalization
minimizer.bank.force_flush_accounts_cache();
minimizer.bank.set_capitalization();
}
/// Helper function to measure time and number of accounts added
fn add_accounts<F>(&self, add_accounts_fn: F, name: &'static str)
where
F: Fn(&SnapshotMinimizer<'a>),
{
let initial_accounts_len = self.minimized_account_set.len();
let (_, measure) = measure!(add_accounts_fn(self), name);
let total_accounts_len = self.minimized_account_set.len();
let added_accounts = total_accounts_len - initial_accounts_len;
info!(
"Added {added_accounts} {name} for total of {total_accounts_len} accounts. get {measure}"
);
}
/// Used to get active bank feature accounts in `minimize`.
fn get_active_bank_features(&self) {
self.bank.feature_set.active.iter().for_each(|(pubkey, _)| {
self.minimized_account_set.insert(*pubkey);
});
}
/// Used to get inactive bank feature accounts in `minimize`
fn get_inactive_bank_features(&self) {
self.bank.feature_set.inactive.iter().for_each(|pubkey| {
self.minimized_account_set.insert(*pubkey);
});
}
/// Used to get builtin accounts in `minimize`
fn get_builtins(&self) {
builtins::get_pubkeys().iter().for_each(|pubkey| {
self.minimized_account_set.insert(*pubkey);
});
}
/// Used to get static runtime accounts in `minimize`
fn get_static_runtime_accounts(&self) {
static_ids::STATIC_IDS.iter().for_each(|pubkey| {
self.minimized_account_set.insert(*pubkey);
});
}
/// Used to get sdk accounts in `minimize`
fn get_sdk_accounts(&self) {
sdk_ids::SDK_IDS.iter().for_each(|pubkey| {
self.minimized_account_set.insert(*pubkey);
});
}
/// Used to get rent collection accounts in `minimize`
/// Add all pubkeys we would collect rent from or rewrite to `minimized_account_set`.
/// related to Bank::rent_collection_partitions
fn get_rent_collection_accounts(&self) {
let partitions = if !self.bank.use_fixed_collection_cycle() {
self.bank
.variable_cycle_partitions_between_slots(self.starting_slot, self.ending_slot)
} else {
self.bank
.fixed_cycle_partitions_between_slots(self.starting_slot, self.ending_slot)
};
partitions.into_iter().for_each(|partition| {
let subrange = Bank::pubkey_range_from_partition(partition);
// This may be overkill since we just need the pubkeys and don't need to actually load the accounts.
// Leaving it for now as this is only used by ledger-tool. If used in runtime, we will need to instead use
// some of the guts of `load_to_collect_rent_eagerly`.
self.bank
.accounts()
.load_to_collect_rent_eagerly(&self.bank.ancestors, subrange)
.into_par_iter()
.for_each(|(pubkey, ..)| {
self.minimized_account_set.insert(pubkey);
})
});
}
/// Used to get vote and node pubkeys in `minimize`
/// Add all pubkeys from vote accounts and nodes to `minimized_account_set`
fn get_vote_accounts(&self) {
self.bank
.vote_accounts()
.par_iter()
.for_each(|(pubkey, (_stake, vote_account))| {
self.minimized_account_set.insert(*pubkey);
if let Ok(vote_state) = vote_account.vote_state().as_ref() {
self.minimized_account_set.insert(vote_state.node_pubkey);
}
});
}
/// Used to get stake accounts in `minimize`
/// Add all pubkeys from stake accounts to `minimized_account_set`
fn get_stake_accounts(&self) {
self.bank.get_stake_accounts(&self.minimized_account_set);
}
/// Used to get owner accounts in `minimize`
/// For each account in `minimized_account_set` adds the owner account's pubkey to `minimized_account_set`.
fn get_owner_accounts(&self) {
let owner_accounts: HashSet<_> = self
.minimized_account_set
.par_iter()
.filter_map(|pubkey| self.bank.get_account(&pubkey))
.map(|account| *account.owner())
.collect();
owner_accounts.into_par_iter().for_each(|pubkey| {
self.minimized_account_set.insert(pubkey);
});
}
/// Used to get program data accounts in `minimize`
/// For each upgradable bpf program, adds the programdata account pubkey to `minimized_account_set`
fn get_programdata_accounts(&self) {
let programdata_accounts: HashSet<_> = self
.minimized_account_set
.par_iter()
.filter_map(|pubkey| self.bank.get_account(&pubkey))
.filter(|account| account.executable())
.filter(|account| bpf_loader_upgradeable::check_id(account.owner()))
.filter_map(|account| {
if let Ok(UpgradeableLoaderState::Program {
programdata_address,
}) = account.state()
{
Some(programdata_address)
} else {
None
}
})
.collect();
programdata_accounts.into_par_iter().for_each(|pubkey| {
self.minimized_account_set.insert(pubkey);
});
}
/// Remove accounts not in `minimized_accoun_set` from accounts_db
fn minimize_accounts_db(&self) {
let (minimized_slot_set, minimized_slot_set_measure) =
measure!(self.get_minimized_slot_set(), "generate minimized slot set");
info!("{minimized_slot_set_measure}");
let ((dead_slots, dead_storages), process_snapshot_storages_measure) = measure!(
self.process_snapshot_storages(minimized_slot_set),
"process snapshot storages"
);
info!("{process_snapshot_storages_measure}");
// Avoid excessive logging
self.accounts_db()
.log_dead_slots
.store(false, Ordering::Relaxed);
let (_, purge_dead_slots_measure) =
measure!(self.purge_dead_slots(dead_slots), "purge dead slots");
info!("{purge_dead_slots_measure}");
let (_, drop_or_recycle_stores_measure) = measure!(
self.accounts_db().drop_or_recycle_stores(dead_storages),
"drop or recycle stores"
);
info!("{drop_or_recycle_stores_measure}");
// Turn logging back on after minimization
self.accounts_db()
.log_dead_slots
.store(true, Ordering::Relaxed);
}
/// Determines minimum set of slots that accounts in `minimized_account_set` are in
fn get_minimized_slot_set(&self) -> DashSet<Slot> {
let minimized_slot_set = DashSet::new();
self.minimized_account_set.par_iter().for_each(|pubkey| {
if let Some(read_entry) = self
.accounts_db()
.accounts_index
.get_account_read_entry(&pubkey)
{
if let Some(max_slot) = read_entry.slot_list().iter().map(|(slot, _)| *slot).max() {
minimized_slot_set.insert(max_slot);
}
}
});
minimized_slot_set
}
/// Process all snapshot storages to during `minimize`
fn process_snapshot_storages(
&self,
minimized_slot_set: DashSet<Slot>,
) -> (Vec<Slot>, Vec<Arc<AccountStorageEntry>>) {
let snapshot_storages = self
.accounts_db()
.get_snapshot_storages(self.starting_slot, None, None)
.0;
let dead_slots = Mutex::new(Vec::new());
let dead_storages = Mutex::new(Vec::new());
snapshot_storages.into_par_iter().for_each(|storages| {
let slot = storages.first().unwrap().slot();
if slot != self.starting_slot {
if minimized_slot_set.contains(&slot) {
self.filter_storages(storages, &dead_storages);
} else {
dead_slots.lock().unwrap().push(slot);
}
}
});
let dead_slots = dead_slots.into_inner().unwrap();
let dead_storages = dead_storages.into_inner().unwrap();
(dead_slots, dead_storages)
}
/// Creates new storage replacing `storages` that contains only accounts in `minimized_account_set`.
fn filter_storages(
&self,
storages: Vec<Arc<AccountStorageEntry>>,
dead_storages: &Mutex<Vec<Arc<AccountStorageEntry>>>,
) {
let slot = storages.first().unwrap().slot();
let (stored_accounts, _, _) = self
.accounts_db()
.get_unique_accounts_from_storages(storages.iter());
let mut stored_accounts = stored_accounts.into_iter().collect::<Vec<_>>();
stored_accounts.sort_unstable_by(|a, b| a.0.cmp(&b.0));
let keep_accounts_collect = Mutex::new(Vec::with_capacity(stored_accounts.len()));
let purge_pubkeys_collect = Mutex::new(Vec::with_capacity(stored_accounts.len()));
let total_bytes_collect = AtomicUsize::new(0);
const CHUNK_SIZE: usize = 50;
stored_accounts.par_chunks(CHUNK_SIZE).for_each(|chunk| {
let mut chunk_bytes = 0;
let mut keep_accounts = Vec::with_capacity(CHUNK_SIZE);
let mut purge_pubkeys = Vec::with_capacity(CHUNK_SIZE);
chunk.iter().for_each(|(pubkey, account)| {
if self.minimized_account_set.contains(pubkey) {
chunk_bytes += account.account_size;
keep_accounts.push((pubkey, account));
} else if self
.accounts_db()
.accounts_index
.get_account_read_entry(pubkey)
.is_some()
{
purge_pubkeys.push(pubkey);
}
});
keep_accounts_collect
.lock()
.unwrap()
.append(&mut keep_accounts);
purge_pubkeys_collect
.lock()
.unwrap()
.append(&mut purge_pubkeys);
total_bytes_collect.fetch_add(chunk_bytes, Ordering::Relaxed);
});
let keep_accounts = keep_accounts_collect.into_inner().unwrap();
let remove_pubkeys = purge_pubkeys_collect.into_inner().unwrap();
let total_bytes = total_bytes_collect.load(Ordering::Relaxed);
let purge_pubkeys: Vec<_> = remove_pubkeys
.into_iter()
.map(|pubkey| (*pubkey, slot))
.collect();
self.accounts_db().purge_keys_exact(purge_pubkeys.iter());
let aligned_total: u64 = AccountsDb::page_align(total_bytes as u64);
if aligned_total > 0 {
let mut accounts = Vec::with_capacity(keep_accounts.len());
let mut hashes = Vec::with_capacity(keep_accounts.len());
let mut write_versions = Vec::with_capacity(keep_accounts.len());
for (pubkey, alive_account) in keep_accounts {
accounts.push((pubkey, &alive_account.account));
hashes.push(alive_account.account.hash);
write_versions.push(alive_account.account.meta.write_version);
}
let (new_storage, _time) = self.accounts_db().get_store_for_shrink(slot, aligned_total);
self.accounts_db().store_accounts_frozen(
(slot, &accounts[..]),
Some(&hashes),
Some(&new_storage),
Some(Box::new(write_versions.into_iter())),
);
new_storage.flush().unwrap();
}
let append_vec_set: HashSet<_> = storages
.iter()
.map(|storage| storage.append_vec_id())
.collect();
self.accounts_db().mark_dirty_dead_stores(
slot,
&mut dead_storages.lock().unwrap(),
|store| !append_vec_set.contains(&store.append_vec_id()),
);
}
/// Purge dead slots from storage and cache
fn purge_dead_slots(&self, dead_slots: Vec<Slot>) {
let stats = PurgeStats::default();
self.accounts_db()
.purge_slots_from_cache_and_store(dead_slots.iter(), &stats, false);
}
/// Convenience function for getting accounts_db
fn accounts_db(&self) -> &AccountsDb {
&self.bank.rc.accounts.accounts_db
}
}
#[cfg(test)]
mod tests {
use {
crate::{
append_vec::AppendVecAccountsIter, bank::Bank,
genesis_utils::create_genesis_config_with_leader,
snapshot_minimizer::SnapshotMinimizer,
},
dashmap::DashSet,
solana_sdk::{
account::{AccountSharedData, ReadableAccount, WritableAccount},
bpf_loader_upgradeable::{self, UpgradeableLoaderState},
genesis_config::create_genesis_config,
pubkey::Pubkey,
signer::Signer,
stake,
},
std::sync::Arc,
};
#[test]
fn test_get_rent_collection_accounts() {
solana_logger::setup();
let (genesis_config, _) = create_genesis_config(1_000_000);
let bank = Arc::new(Bank::new_for_tests(&genesis_config));
// Slots correspond to subrange: A52Kf8KJNVhs1y61uhkzkSF82TXCLxZekqmFwiFXLnHu..=ChWNbfHUHLvFY3uhXj6kQhJ7a9iZB4ykh34WRGS5w9NE
// Initially, there are no existing keys in this range
{
let minimizer = SnapshotMinimizer {
bank: &bank,
starting_slot: 100_000,
ending_slot: 110_000,
minimized_account_set: DashSet::new(),
};
minimizer.get_rent_collection_accounts();
assert!(
minimizer.minimized_account_set.is_empty(),
"rent collection accounts should be empty: len={}",
minimizer.minimized_account_set.len()
);
}
// Add a key in the subrange
let pubkey: Pubkey = "ChWNbfHUHLvFY3uhXj6kQhJ7a9iZB4ykh34WRGS5w9ND"
.parse()
.unwrap();
bank.store_account(&pubkey, &AccountSharedData::new(1, 0, &Pubkey::default()));
{
let minimizer = SnapshotMinimizer {
bank: &bank,
starting_slot: 100_000,
ending_slot: 110_000,
minimized_account_set: DashSet::new(),
};
minimizer.get_rent_collection_accounts();
assert_eq!(
1,
minimizer.minimized_account_set.len(),
"rent collection accounts should have len=1: len={}",
minimizer.minimized_account_set.len()
);
assert!(minimizer.minimized_account_set.contains(&pubkey));
}
// Slots correspond to subrange: ChXFtoKuDvQum4HvtgiqGWrgUYbtP1ZzGFGMnT8FuGaB..=FKzRYCFeCC8e48jP9kSW4xM77quv1BPrdEMktpceXWSa
// The previous key is not contained in this range, so is not added
{
let minimizer = SnapshotMinimizer {
bank: &bank,
starting_slot: 110_001,
ending_slot: 120_000,
minimized_account_set: DashSet::new(),
};
assert!(
minimizer.minimized_account_set.is_empty(),
"rent collection accounts should be empty: len={}",
minimizer.minimized_account_set.len()
);
}
}
#[test]
fn test_minimization_get_vote_accounts() {
solana_logger::setup();
let bootstrap_validator_pubkey = solana_sdk::pubkey::new_rand();
let bootstrap_validator_stake_lamports = 30;
let genesis_config_info = create_genesis_config_with_leader(
10,
&bootstrap_validator_pubkey,
bootstrap_validator_stake_lamports,
);
let bank = Arc::new(Bank::new_for_tests(&genesis_config_info.genesis_config));
let minimizer = SnapshotMinimizer {
bank: &bank,
starting_slot: 0,
ending_slot: 0,
minimized_account_set: DashSet::new(),
};
minimizer.get_vote_accounts();
assert!(minimizer
.minimized_account_set
.contains(&genesis_config_info.voting_keypair.pubkey()));
assert!(minimizer
.minimized_account_set
.contains(&genesis_config_info.validator_pubkey));
}
#[test]
fn test_minimization_get_stake_accounts() {
solana_logger::setup();
let bootstrap_validator_pubkey = solana_sdk::pubkey::new_rand();
let bootstrap_validator_stake_lamports = 30;
let genesis_config_info = create_genesis_config_with_leader(
10,
&bootstrap_validator_pubkey,
bootstrap_validator_stake_lamports,
);
let bank = Arc::new(Bank::new_for_tests(&genesis_config_info.genesis_config));
let minimizer = SnapshotMinimizer {
bank: &bank,
starting_slot: 0,
ending_slot: 0,
minimized_account_set: DashSet::new(),
};
minimizer.get_stake_accounts();
let mut expected_stake_accounts: Vec<_> = genesis_config_info
.genesis_config
.accounts
.iter()
.filter_map(|(pubkey, account)| {
stake::program::check_id(account.owner()).then(|| *pubkey)
})
.collect();
expected_stake_accounts.push(bootstrap_validator_pubkey);
assert_eq!(
minimizer.minimized_account_set.len(),
expected_stake_accounts.len()
);
for stake_pubkey in expected_stake_accounts {
assert!(minimizer.minimized_account_set.contains(&stake_pubkey));
}
}
#[test]
fn test_minimization_get_owner_accounts() {
solana_logger::setup();
let (genesis_config, _) = create_genesis_config(1_000_000);
let bank = Arc::new(Bank::new_for_tests(&genesis_config));
let pubkey = solana_sdk::pubkey::new_rand();
let owner_pubkey = solana_sdk::pubkey::new_rand();
bank.store_account(&pubkey, &AccountSharedData::new(1, 0, &owner_pubkey));
let owner_accounts = DashSet::new();
owner_accounts.insert(pubkey);
let minimizer = SnapshotMinimizer {
bank: &bank,
starting_slot: 0,
ending_slot: 0,
minimized_account_set: owner_accounts,
};
minimizer.get_owner_accounts();
assert!(minimizer.minimized_account_set.contains(&pubkey));
assert!(minimizer.minimized_account_set.contains(&owner_pubkey));
}
#[test]
fn test_minimization_add_programdata_accounts() {
solana_logger::setup();
let (genesis_config, _) = create_genesis_config(1_000_000);
let bank = Arc::new(Bank::new_for_tests(&genesis_config));
let non_program_id = solana_sdk::pubkey::new_rand();
let program_id = solana_sdk::pubkey::new_rand();
let programdata_address = solana_sdk::pubkey::new_rand();
let program = UpgradeableLoaderState::Program {
programdata_address,
};
let non_program_acount = AccountSharedData::new(1, 0, &non_program_id);
let mut program_account =
AccountSharedData::new_data(40, &program, &bpf_loader_upgradeable::id()).unwrap();
program_account.set_executable(true);
bank.store_account(&non_program_id, &non_program_acount);
bank.store_account(&program_id, &program_account);
// Non-program account does not add any additional keys
let programdata_accounts = DashSet::new();
programdata_accounts.insert(non_program_id);
let minimizer = SnapshotMinimizer {
bank: &bank,
starting_slot: 0,
ending_slot: 0,
minimized_account_set: programdata_accounts,
};
minimizer.get_programdata_accounts();
assert_eq!(minimizer.minimized_account_set.len(), 1);
assert!(minimizer.minimized_account_set.contains(&non_program_id));
// Programdata account adds the programdata address to the set
minimizer.minimized_account_set.insert(program_id);
minimizer.get_programdata_accounts();
assert_eq!(minimizer.minimized_account_set.len(), 3);
assert!(minimizer.minimized_account_set.contains(&non_program_id));
assert!(minimizer.minimized_account_set.contains(&program_id));
assert!(minimizer
.minimized_account_set
.contains(&programdata_address));
}
#[test]
fn test_minimize_accounts_db() {
solana_logger::setup();
let (genesis_config, _) = create_genesis_config(1_000_000);
let bank = Arc::new(Bank::new_for_tests(&genesis_config));
let accounts = &bank.accounts().accounts_db;
let num_slots = 5;
let num_accounts_per_slot = 300;
let mut current_slot = 0;
let minimized_account_set = DashSet::new();
for _ in 0..num_slots {
let pubkeys: Vec<_> = (0..num_accounts_per_slot)
.map(|_| solana_sdk::pubkey::new_rand())
.collect();
let some_lamport = 223;
let no_data = 0;
let owner = *AccountSharedData::default().owner();
let account = AccountSharedData::new(some_lamport, no_data, &owner);
current_slot += 1;
for (index, pubkey) in pubkeys.iter().enumerate() {
accounts.store_uncached(current_slot, &[(pubkey, &account)]);
if current_slot % 2 == 0 && index % 100 == 0 {
minimized_account_set.insert(*pubkey);
}
}
accounts.get_accounts_delta_hash(current_slot);
accounts.add_root(current_slot);
}
assert_eq!(minimized_account_set.len(), 6);
let minimizer = SnapshotMinimizer {
bank: &bank,
starting_slot: current_slot,
ending_slot: current_slot,
minimized_account_set,
};
minimizer.minimize_accounts_db();
let snapshot_storages = accounts.get_snapshot_storages(current_slot, None, None).0;
assert_eq!(snapshot_storages.len(), 3);
let mut account_count = 0;
snapshot_storages.into_iter().for_each(|storages| {
storages.into_iter().for_each(|storage| {
account_count += AppendVecAccountsIter::new(&storage.accounts).count();
});
});
assert_eq!(
account_count,
minimizer.minimized_account_set.len() + num_accounts_per_slot
); // snapshot slot is untouched, so still has all 300 accounts
}
}