2022-05-10 14:41:04 -07:00
|
|
|
//! helpers for squashing append vecs into ancient append vecs
|
|
|
|
//! an ancient append vec is:
|
|
|
|
//! 1. a slot that is older than an epoch old
|
|
|
|
//! 2. multiple 'slots' squashed into a single older (ie. ancient) slot for convenience and performance
|
|
|
|
//! Otherwise, an ancient append vec is the same as any other append vec
|
|
|
|
use {
|
2023-02-03 10:53:49 -08:00
|
|
|
crate::{
|
|
|
|
accounts_db::{AccountStorageEntry, AccountsDb},
|
|
|
|
append_vec::{AppendVec, StoredAccountMeta},
|
|
|
|
},
|
|
|
|
rand::{thread_rng, Rng},
|
|
|
|
solana_sdk::{clock::Slot, saturating_add_assign},
|
|
|
|
std::sync::Arc,
|
2022-05-10 14:41:04 -07:00
|
|
|
};
|
|
|
|
|
2023-02-03 10:53:49 -08:00
|
|
|
/// info about a storage eligible to be combined into an ancient append vec.
|
|
|
|
/// Useful to help sort vecs of storages.
|
|
|
|
#[derive(Debug)]
|
|
|
|
#[allow(dead_code)]
|
|
|
|
struct SlotInfo {
|
|
|
|
storage: Arc<AccountStorageEntry>,
|
|
|
|
/// slot of storage
|
|
|
|
slot: Slot,
|
|
|
|
/// total capacity of storage
|
|
|
|
capacity: u64,
|
|
|
|
/// # alive bytes in storage
|
|
|
|
alive_bytes: u64,
|
|
|
|
/// true if this should be shrunk due to ratio
|
|
|
|
should_shrink: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
/// info for all storages in ancient slots
|
|
|
|
/// 'all_infos' contains all slots and storages that are ancient
|
|
|
|
#[derive(Default, Debug)]
|
|
|
|
struct AncientSlotInfos {
|
|
|
|
/// info on all ancient storages
|
|
|
|
all_infos: Vec<SlotInfo>,
|
|
|
|
/// indexes to 'all_info' for storages that should be shrunk because alive ratio is too low.
|
|
|
|
/// subset of all_infos
|
|
|
|
shrink_indexes: Vec<usize>,
|
|
|
|
/// total alive bytes across contents of 'shrink_indexes'
|
|
|
|
total_alive_bytes_shrink: u64,
|
|
|
|
/// total alive bytes across all slots
|
|
|
|
total_alive_bytes: u64,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AncientSlotInfos {
|
|
|
|
/// add info for 'storage'
|
|
|
|
fn add(&mut self, slot: Slot, storage: Arc<AccountStorageEntry>, can_randomly_shrink: bool) {
|
|
|
|
let alive_bytes = storage.alive_bytes() as u64;
|
|
|
|
if alive_bytes > 0 {
|
|
|
|
let capacity = storage.accounts.capacity();
|
|
|
|
let should_shrink = if capacity > 0 {
|
|
|
|
let alive_ratio = alive_bytes * 100 / capacity;
|
|
|
|
(alive_ratio < 90) || (can_randomly_shrink && thread_rng().gen_range(0, 10000) == 0)
|
|
|
|
} else {
|
|
|
|
false
|
|
|
|
};
|
|
|
|
// two criteria we're shrinking by later:
|
|
|
|
// 1. alive ratio so that we don't consume too much disk space with dead accounts
|
|
|
|
// 2. # of active ancient roots, so that we don't consume too many open file handles
|
|
|
|
|
|
|
|
if should_shrink {
|
|
|
|
// alive ratio is too low, so prioritize combining this slot with others
|
|
|
|
// to reduce disk space used
|
|
|
|
saturating_add_assign!(self.total_alive_bytes_shrink, alive_bytes);
|
|
|
|
self.shrink_indexes.push(self.all_infos.len());
|
|
|
|
}
|
|
|
|
self.all_infos.push(SlotInfo {
|
|
|
|
slot,
|
|
|
|
capacity,
|
|
|
|
storage,
|
|
|
|
alive_bytes,
|
|
|
|
should_shrink,
|
|
|
|
});
|
|
|
|
self.total_alive_bytes += alive_bytes;
|
|
|
|
}
|
|
|
|
}
|
2023-02-05 12:14:14 -08:00
|
|
|
|
|
|
|
// sort 'shrink_indexes' by most bytes saved, highest to lowest
|
|
|
|
#[allow(dead_code)]
|
|
|
|
fn sort_shrink_indexes_by_bytes_saved(&mut self) {
|
|
|
|
self.shrink_indexes.sort_unstable_by(|l, r| {
|
|
|
|
let amount_shrunk = |index: &usize| {
|
|
|
|
let item = &self.all_infos[*index];
|
|
|
|
item.capacity - item.alive_bytes
|
|
|
|
};
|
|
|
|
amount_shrunk(r).cmp(&amount_shrunk(l))
|
|
|
|
});
|
|
|
|
}
|
2023-02-06 11:04:53 -08:00
|
|
|
|
|
|
|
/// truncate 'all_infos' such that when the remaining entries in
|
|
|
|
/// 'all_infos' are combined, the total number of storages <= 'max_storages'
|
|
|
|
/// The idea is that 'all_infos' is sorted from smallest capacity to largest,
|
|
|
|
/// but that isn't required for this function to be 'correct'.
|
|
|
|
#[allow(dead_code)]
|
|
|
|
fn truncate_to_max_storages(&mut self, max_storages: usize, ideal_storage_size: u64) {
|
|
|
|
// these indexes into 'all_infos' are useless once we truncate 'all_infos', so make sure they're cleared out to avoid any issues
|
|
|
|
self.shrink_indexes.clear();
|
|
|
|
let total_storages = self.all_infos.len();
|
|
|
|
let mut cumulative_bytes = 0u64;
|
|
|
|
for (i, info) in self.all_infos.iter().enumerate() {
|
|
|
|
saturating_add_assign!(cumulative_bytes, info.alive_bytes);
|
|
|
|
let ancient_storages_required = (cumulative_bytes / ideal_storage_size + 1) as usize;
|
|
|
|
let storages_remaining = total_storages - i - 1;
|
|
|
|
// if the remaining uncombined storages and the # of resulting
|
|
|
|
// combined ancient storages is less than the threshold, then
|
|
|
|
// we've gone too far, so get rid of this entry and all after it.
|
|
|
|
// Every storage after this one is larger.
|
|
|
|
if storages_remaining + ancient_storages_required < max_storages {
|
|
|
|
self.all_infos.truncate(i);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// remove entries from 'all_infos' such that combining
|
|
|
|
/// the remaining entries into storages of 'ideal_storage_size'
|
|
|
|
/// will get us below 'max_storages'
|
|
|
|
/// The entires that are removed will be reconsidered the next time around.
|
|
|
|
/// Combining too many storages costs i/o and cpu so the goal is to find the sweet spot so
|
|
|
|
/// that we make progress in cleaning/shrinking/combining but that we don't cause unnecessary
|
|
|
|
/// churn.
|
|
|
|
#[allow(dead_code)]
|
|
|
|
fn filter_by_smallest_capacity(&mut self, max_storages: usize, ideal_storage_size: u64) {
|
|
|
|
let total_storages = self.all_infos.len();
|
|
|
|
if total_storages <= max_storages {
|
|
|
|
// currently fewer storages than max, so nothing to shrink
|
|
|
|
self.shrink_indexes.clear();
|
|
|
|
self.all_infos.clear();
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// sort by 'should_shrink' then smallest capacity to largest
|
|
|
|
self.all_infos.sort_unstable_by(|l, r| {
|
|
|
|
r.should_shrink
|
|
|
|
.cmp(&l.should_shrink)
|
|
|
|
.then_with(|| l.capacity.cmp(&r.capacity))
|
|
|
|
});
|
|
|
|
|
|
|
|
// remove any storages we don't need to combine this pass to achieve
|
|
|
|
// # resulting storages <= 'max_storages'
|
|
|
|
self.truncate_to_max_storages(max_storages, ideal_storage_size);
|
|
|
|
}
|
2023-02-03 10:53:49 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
impl AccountsDb {
|
|
|
|
/// go through all slots and populate 'SlotInfo', per slot
|
|
|
|
/// This provides the list of possible ancient slots to sort, filter, and then combine.
|
|
|
|
#[allow(dead_code)]
|
|
|
|
fn calc_ancient_slot_info(
|
|
|
|
&self,
|
|
|
|
slots: Vec<Slot>,
|
|
|
|
can_randomly_shrink: bool,
|
|
|
|
) -> AncientSlotInfos {
|
|
|
|
let len = slots.len();
|
|
|
|
let mut infos = AncientSlotInfos {
|
|
|
|
shrink_indexes: Vec::with_capacity(len),
|
|
|
|
all_infos: Vec::with_capacity(len),
|
|
|
|
..AncientSlotInfos::default()
|
|
|
|
};
|
|
|
|
|
|
|
|
for slot in &slots {
|
|
|
|
if let Some(storage) = self.storage.get_slot_storage_entry(*slot) {
|
|
|
|
infos.add(*slot, storage, can_randomly_shrink);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
infos
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-10 14:41:04 -07:00
|
|
|
/// a set of accounts need to be stored.
|
|
|
|
/// If there are too many to fit in 'Primary', the rest are put in 'Overflow'
|
2022-05-22 18:00:42 -07:00
|
|
|
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
|
2022-05-10 14:41:04 -07:00
|
|
|
pub enum StorageSelector {
|
|
|
|
Primary,
|
|
|
|
Overflow,
|
|
|
|
}
|
|
|
|
|
|
|
|
/// reference a set of accounts to store
|
|
|
|
/// The accounts may have to be split between 2 storages (primary and overflow) if there is not enough room in the primary storage.
|
|
|
|
/// The 'store' functions need data stored in a slice of specific type.
|
|
|
|
/// We need 1-2 of these slices constructed based on available bytes and individual account sizes.
|
|
|
|
/// The slice arithmetic accross both hashes and account data gets messy. So, this struct abstracts that.
|
|
|
|
pub struct AccountsToStore<'a> {
|
2023-01-10 13:57:34 -08:00
|
|
|
accounts: &'a [&'a StoredAccountMeta<'a>],
|
2022-05-10 14:41:04 -07:00
|
|
|
/// if 'accounts' contains more items than can be contained in the primary storage, then we have to split these accounts.
|
|
|
|
/// 'index_first_item_overflow' specifies the index of the first item in 'accounts' that will go into the overflow storage
|
|
|
|
index_first_item_overflow: usize,
|
2022-11-18 10:15:41 -08:00
|
|
|
pub slot: Slot,
|
2022-05-10 14:41:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> AccountsToStore<'a> {
|
|
|
|
/// break 'stored_accounts' into primary and overflow
|
|
|
|
/// available_bytes: how many bytes remain in the primary storage. Excess accounts will be directed to an overflow storage
|
|
|
|
pub fn new(
|
|
|
|
mut available_bytes: u64,
|
2023-01-10 13:57:34 -08:00
|
|
|
accounts: &'a [&'a StoredAccountMeta<'a>],
|
2022-11-30 15:46:52 -08:00
|
|
|
alive_total_bytes: usize,
|
2022-05-10 14:41:04 -07:00
|
|
|
slot: Slot,
|
|
|
|
) -> Self {
|
2022-11-30 10:09:23 -08:00
|
|
|
let num_accounts = accounts.len();
|
2022-05-10 14:41:04 -07:00
|
|
|
// index of the first account that doesn't fit in the current append vec
|
|
|
|
let mut index_first_item_overflow = num_accounts; // assume all fit
|
2022-11-30 15:46:52 -08:00
|
|
|
if alive_total_bytes > available_bytes as usize {
|
|
|
|
// not all the alive bytes fit, so we have to find how many accounts fit within available_bytes
|
|
|
|
for (i, account) in accounts.iter().enumerate() {
|
2023-01-10 13:57:34 -08:00
|
|
|
let account_size = account.stored_size as u64;
|
2022-11-30 15:46:52 -08:00
|
|
|
if available_bytes >= account_size {
|
|
|
|
available_bytes = available_bytes.saturating_sub(account_size);
|
|
|
|
} else if index_first_item_overflow == num_accounts {
|
|
|
|
// the # of accounts we have so far seen is the most that will fit in the current ancient append vec
|
|
|
|
index_first_item_overflow = i;
|
|
|
|
break;
|
|
|
|
}
|
2022-05-10 14:41:04 -07:00
|
|
|
}
|
2022-11-30 10:09:23 -08:00
|
|
|
}
|
2022-05-10 14:41:04 -07:00
|
|
|
Self {
|
|
|
|
accounts,
|
|
|
|
index_first_item_overflow,
|
2022-11-18 10:15:41 -08:00
|
|
|
slot,
|
2022-05-10 14:41:04 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-12 08:14:27 -07:00
|
|
|
/// true if a request to 'get' 'Overflow' would return accounts & hashes
|
|
|
|
pub fn has_overflow(&self) -> bool {
|
|
|
|
self.index_first_item_overflow < self.accounts.len()
|
|
|
|
}
|
|
|
|
|
2022-11-22 15:36:57 -08:00
|
|
|
/// get the accounts to store in the given 'storage'
|
2023-01-10 13:57:34 -08:00
|
|
|
pub fn get(&self, storage: StorageSelector) -> &[&'a StoredAccountMeta<'a>] {
|
2022-05-10 14:41:04 -07:00
|
|
|
let range = match storage {
|
|
|
|
StorageSelector::Primary => 0..self.index_first_item_overflow,
|
|
|
|
StorageSelector::Overflow => self.index_first_item_overflow..self.accounts.len(),
|
|
|
|
};
|
2022-11-22 15:36:57 -08:00
|
|
|
&self.accounts[range]
|
2022-05-10 14:41:04 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-05-11 06:02:05 -07:00
|
|
|
/// capacity of an ancient append vec
|
|
|
|
pub fn get_ancient_append_vec_capacity() -> u64 {
|
|
|
|
use crate::append_vec::MAXIMUM_APPEND_VEC_FILE_SIZE;
|
|
|
|
// smaller than max by a bit just in case
|
|
|
|
// some functions add slop on allocation
|
2023-02-01 07:22:34 -08:00
|
|
|
// The bigger an append vec is, the more unwieldy it becomes to shrink, create, write.
|
|
|
|
// 1/10 of max is a reasonable size in practice.
|
2022-07-19 06:51:36 -07:00
|
|
|
MAXIMUM_APPEND_VEC_FILE_SIZE / 10 - 2048
|
2022-05-11 06:02:05 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// is this a max-size append vec designed to be used as an ancient append vec?
|
|
|
|
pub fn is_ancient(storage: &AppendVec) -> bool {
|
|
|
|
storage.capacity() >= get_ancient_append_vec_capacity()
|
|
|
|
}
|
|
|
|
|
2022-05-10 14:41:04 -07:00
|
|
|
#[cfg(test)]
|
|
|
|
pub mod tests {
|
|
|
|
use {
|
|
|
|
super::*,
|
|
|
|
crate::{
|
2023-02-03 10:53:49 -08:00
|
|
|
accounts_db::{
|
|
|
|
get_temp_accounts_paths,
|
|
|
|
tests::{
|
|
|
|
create_db_with_storages_and_index, create_storages_and_update_index,
|
|
|
|
remove_account_for_tests,
|
|
|
|
},
|
|
|
|
},
|
2022-11-30 10:09:23 -08:00
|
|
|
append_vec::{AccountMeta, StoredAccountMeta, StoredMeta},
|
2022-05-10 14:41:04 -07:00
|
|
|
},
|
2022-11-01 07:45:52 -07:00
|
|
|
solana_sdk::{
|
|
|
|
account::{AccountSharedData, ReadableAccount},
|
2022-11-22 15:36:57 -08:00
|
|
|
hash::Hash,
|
2022-11-01 07:45:52 -07:00
|
|
|
pubkey::Pubkey,
|
|
|
|
},
|
2022-05-10 14:41:04 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accounts_to_store_simple() {
|
2022-05-16 13:54:40 -07:00
|
|
|
let map = vec![];
|
2022-05-10 14:41:04 -07:00
|
|
|
let slot = 1;
|
2022-11-30 15:46:52 -08:00
|
|
|
let accounts_to_store = AccountsToStore::new(0, &map, 0, slot);
|
2022-05-10 14:41:04 -07:00
|
|
|
for selector in [StorageSelector::Primary, StorageSelector::Overflow] {
|
2022-11-22 15:36:57 -08:00
|
|
|
let accounts = accounts_to_store.get(selector);
|
2022-05-10 14:41:04 -07:00
|
|
|
assert!(accounts.is_empty());
|
|
|
|
}
|
2022-05-12 08:14:27 -07:00
|
|
|
assert!(!accounts_to_store.has_overflow());
|
2022-05-10 14:41:04 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_accounts_to_store_more() {
|
2023-01-21 10:06:27 -08:00
|
|
|
let pubkey = Pubkey::from([1; 32]);
|
2022-05-10 14:41:04 -07:00
|
|
|
let account_size = 3;
|
|
|
|
|
|
|
|
let account = AccountSharedData::default();
|
|
|
|
|
|
|
|
let account_meta = AccountMeta {
|
|
|
|
lamports: 1,
|
2023-01-21 10:06:27 -08:00
|
|
|
owner: Pubkey::from([2; 32]),
|
2022-05-10 14:41:04 -07:00
|
|
|
executable: false,
|
|
|
|
rent_epoch: 0,
|
|
|
|
};
|
|
|
|
let offset = 3;
|
|
|
|
let hash = Hash::new(&[2; 32]);
|
|
|
|
let stored_meta = StoredMeta {
|
|
|
|
/// global write version
|
2022-12-14 07:43:40 -08:00
|
|
|
write_version_obsolete: 0,
|
2022-05-10 14:41:04 -07:00
|
|
|
/// key for the account
|
|
|
|
pubkey,
|
|
|
|
data_len: 43,
|
|
|
|
};
|
|
|
|
let account = StoredAccountMeta {
|
|
|
|
meta: &stored_meta,
|
|
|
|
/// account data
|
|
|
|
account_meta: &account_meta,
|
|
|
|
data: account.data(),
|
|
|
|
offset,
|
2022-07-07 16:37:14 -07:00
|
|
|
stored_size: account_size,
|
2022-05-10 14:41:04 -07:00
|
|
|
hash: &hash,
|
|
|
|
};
|
2023-01-10 13:57:34 -08:00
|
|
|
let map = vec![&account];
|
2022-05-10 14:41:04 -07:00
|
|
|
for (selector, available_bytes) in [
|
|
|
|
(StorageSelector::Primary, account_size),
|
|
|
|
(StorageSelector::Overflow, account_size - 1),
|
|
|
|
] {
|
|
|
|
let slot = 1;
|
2022-11-30 15:46:52 -08:00
|
|
|
let alive_total_bytes = account_size;
|
|
|
|
let accounts_to_store =
|
|
|
|
AccountsToStore::new(available_bytes as u64, &map, alive_total_bytes, slot);
|
2022-11-22 15:36:57 -08:00
|
|
|
let accounts = accounts_to_store.get(selector);
|
2022-05-10 14:41:04 -07:00
|
|
|
assert_eq!(
|
2023-01-10 13:57:34 -08:00
|
|
|
accounts.iter().collect::<Vec<_>>(),
|
|
|
|
map.iter().collect::<Vec<_>>(),
|
2022-05-10 14:41:04 -07:00
|
|
|
"mismatch"
|
|
|
|
);
|
2022-11-22 15:36:57 -08:00
|
|
|
let accounts = accounts_to_store.get(get_opposite(&selector));
|
2022-05-12 08:14:27 -07:00
|
|
|
assert_eq!(
|
|
|
|
selector == StorageSelector::Overflow,
|
|
|
|
accounts_to_store.has_overflow()
|
|
|
|
);
|
2022-05-10 14:41:04 -07:00
|
|
|
assert!(accounts.is_empty());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fn get_opposite(selector: &StorageSelector) -> StorageSelector {
|
|
|
|
match selector {
|
|
|
|
StorageSelector::Overflow => StorageSelector::Primary,
|
|
|
|
StorageSelector::Primary => StorageSelector::Overflow,
|
|
|
|
}
|
|
|
|
}
|
2022-05-11 06:02:05 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_get_ancient_append_vec_capacity() {
|
|
|
|
assert_eq!(
|
|
|
|
get_ancient_append_vec_capacity(),
|
2022-07-19 06:51:36 -07:00
|
|
|
crate::append_vec::MAXIMUM_APPEND_VEC_FILE_SIZE / 10 - 2048
|
2022-05-11 06:02:05 -07:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_is_ancient() {
|
|
|
|
for (size, expected_ancient) in [
|
|
|
|
(get_ancient_append_vec_capacity() + 1, true),
|
|
|
|
(get_ancient_append_vec_capacity(), true),
|
|
|
|
(get_ancient_append_vec_capacity() - 1, false),
|
|
|
|
] {
|
|
|
|
let tf = crate::append_vec::test_utils::get_append_vec_path("test_is_ancient");
|
|
|
|
let (_temp_dirs, _paths) = get_temp_accounts_paths(1).unwrap();
|
|
|
|
let av = AppendVec::new(&tf.path, true, size as usize);
|
|
|
|
|
|
|
|
assert_eq!(expected_ancient, is_ancient(&av));
|
|
|
|
}
|
|
|
|
}
|
2023-02-03 10:53:49 -08:00
|
|
|
|
2023-02-05 12:13:39 -08:00
|
|
|
fn assert_storage_info(info: &SlotInfo, storage: &AccountStorageEntry, should_shrink: bool) {
|
2023-02-03 10:53:49 -08:00
|
|
|
assert_eq!(storage.append_vec_id(), info.storage.append_vec_id());
|
|
|
|
assert_eq!(storage.slot(), info.slot);
|
|
|
|
assert_eq!(storage.capacity(), info.capacity);
|
|
|
|
assert_eq!(storage.alive_bytes(), info.alive_bytes as usize);
|
|
|
|
assert_eq!(should_shrink, info.should_shrink);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_calc_ancient_slot_info_one_alive() {
|
|
|
|
let can_randomly_shrink = false;
|
|
|
|
let alive = true;
|
|
|
|
let slots = 1;
|
|
|
|
for call_add in [false, true] {
|
|
|
|
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
|
|
|
|
for data_size in [None, Some(1_040_000)] {
|
|
|
|
let (db, slot1) = create_db_with_storages_and_index(alive, slots, data_size);
|
|
|
|
let mut infos = AncientSlotInfos::default();
|
|
|
|
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
|
|
|
|
let alive_bytes_expected = storage.alive_bytes();
|
|
|
|
if call_add {
|
|
|
|
// test lower level 'add'
|
|
|
|
infos.add(slot1, Arc::clone(&storage), can_randomly_shrink);
|
|
|
|
} else {
|
|
|
|
infos = db.calc_ancient_slot_info(vec![slot1], can_randomly_shrink);
|
|
|
|
}
|
|
|
|
assert_eq!(infos.all_infos.len(), 1);
|
|
|
|
let should_shrink = data_size.is_none();
|
|
|
|
assert_storage_info(infos.all_infos.first().unwrap(), &storage, should_shrink);
|
|
|
|
if should_shrink {
|
|
|
|
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
|
|
|
|
assert_eq!(infos.shrink_indexes, vec![0]);
|
|
|
|
assert_eq!(infos.total_alive_bytes, alive_bytes_expected as u64);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected as u64);
|
|
|
|
} else {
|
|
|
|
assert!(infos.shrink_indexes.is_empty());
|
|
|
|
assert_eq!(infos.total_alive_bytes, alive_bytes_expected as u64);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_calc_ancient_slot_info_one_dead() {
|
|
|
|
let can_randomly_shrink = false;
|
|
|
|
let alive = false;
|
|
|
|
let slots = 1;
|
|
|
|
for call_add in [false, true] {
|
|
|
|
let (db, slot1) = create_db_with_storages_and_index(alive, slots, None);
|
|
|
|
let mut infos = AncientSlotInfos::default();
|
|
|
|
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
|
|
|
|
if call_add {
|
|
|
|
infos.add(slot1, Arc::clone(&storage), can_randomly_shrink);
|
|
|
|
} else {
|
|
|
|
infos = db.calc_ancient_slot_info(vec![slot1], can_randomly_shrink);
|
|
|
|
}
|
|
|
|
assert!(infos.all_infos.is_empty());
|
|
|
|
assert!(infos.shrink_indexes.is_empty());
|
|
|
|
assert_eq!(infos.total_alive_bytes, 0);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_calc_ancient_slot_info_several() {
|
|
|
|
let can_randomly_shrink = false;
|
|
|
|
for alive in [true, false] {
|
|
|
|
for slots in 2..4 {
|
|
|
|
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
|
|
|
|
for data_size in [None, Some(1_040_000)] {
|
|
|
|
let (db, slot1) = create_db_with_storages_and_index(alive, slots, data_size);
|
|
|
|
let slot_vec = (slot1..(slot1 + slots as Slot)).collect::<Vec<_>>();
|
|
|
|
let storages = slot_vec
|
|
|
|
.iter()
|
|
|
|
.map(|slot| db.storage.get_slot_storage_entry(*slot).unwrap())
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
let alive_bytes_expected = storages
|
|
|
|
.iter()
|
|
|
|
.map(|storage| storage.alive_bytes() as u64)
|
|
|
|
.sum::<u64>();
|
|
|
|
let infos = db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink);
|
|
|
|
if !alive {
|
|
|
|
assert!(infos.all_infos.is_empty());
|
|
|
|
assert!(infos.shrink_indexes.is_empty());
|
|
|
|
assert_eq!(infos.total_alive_bytes, 0);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, 0);
|
|
|
|
} else {
|
|
|
|
assert_eq!(infos.all_infos.len(), slots);
|
|
|
|
let should_shrink = data_size.is_none();
|
|
|
|
storages
|
|
|
|
.iter()
|
|
|
|
.zip(infos.all_infos.iter())
|
|
|
|
.for_each(|(storage, info)| {
|
|
|
|
assert_storage_info(info, storage, should_shrink);
|
|
|
|
});
|
|
|
|
if should_shrink {
|
|
|
|
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
|
|
|
|
assert_eq!(
|
|
|
|
infos.shrink_indexes,
|
|
|
|
slot_vec
|
|
|
|
.iter()
|
|
|
|
.enumerate()
|
|
|
|
.map(|(i, _)| i)
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
);
|
|
|
|
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected);
|
|
|
|
} else {
|
|
|
|
assert!(infos.shrink_indexes.is_empty());
|
|
|
|
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_calc_ancient_slot_info_one_alive_one_dead() {
|
|
|
|
let can_randomly_shrink = false;
|
|
|
|
for slot1_is_alive in [false, true] {
|
|
|
|
let alives = vec![false /*dummy*/, slot1_is_alive, !slot1_is_alive];
|
|
|
|
let slots = 2;
|
|
|
|
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
|
|
|
|
for data_size in [None, Some(1_040_000)] {
|
|
|
|
let (db, slot1) =
|
|
|
|
create_db_with_storages_and_index(true /*alive*/, slots, data_size);
|
|
|
|
assert_eq!(slot1, 1); // make sure index into alives will be correct
|
|
|
|
assert_eq!(alives[slot1 as usize], slot1_is_alive);
|
|
|
|
let slot_vec = (slot1..(slot1 + slots as Slot)).collect::<Vec<_>>();
|
|
|
|
let storages = slot_vec
|
|
|
|
.iter()
|
|
|
|
.map(|slot| db.storage.get_slot_storage_entry(*slot).unwrap())
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
storages.iter().for_each(|storage| {
|
|
|
|
let slot = storage.slot();
|
|
|
|
let alive = alives[slot as usize];
|
|
|
|
if !alive {
|
|
|
|
// make this storage not alive
|
|
|
|
remove_account_for_tests(storage, storage.written_bytes() as usize, false);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
let alive_storages = storages
|
|
|
|
.iter()
|
|
|
|
.filter_map(|storage| alives[storage.slot() as usize].then_some(storage))
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
let alive_bytes_expected = alive_storages
|
|
|
|
.iter()
|
|
|
|
.map(|storage| storage.alive_bytes() as u64)
|
|
|
|
.sum::<u64>();
|
|
|
|
let infos = db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink);
|
|
|
|
assert_eq!(infos.all_infos.len(), 1);
|
|
|
|
let should_shrink = data_size.is_none();
|
|
|
|
alive_storages
|
|
|
|
.iter()
|
|
|
|
.zip(infos.all_infos.iter())
|
|
|
|
.for_each(|(storage, info)| {
|
|
|
|
assert_storage_info(info, storage, should_shrink);
|
|
|
|
});
|
|
|
|
if should_shrink {
|
|
|
|
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
|
|
|
|
assert_eq!(infos.shrink_indexes, vec![0]);
|
|
|
|
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected);
|
|
|
|
} else {
|
|
|
|
assert!(infos.shrink_indexes.is_empty());
|
|
|
|
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-06 11:04:53 -08:00
|
|
|
fn create_test_infos(count: usize) -> AncientSlotInfos {
|
|
|
|
let (db, slot1) = create_db_with_storages_and_index(true /*alive*/, 1, None);
|
|
|
|
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
|
|
|
|
AncientSlotInfos {
|
|
|
|
all_infos: (0..count)
|
|
|
|
.map(|index| SlotInfo {
|
|
|
|
storage: Arc::clone(&storage),
|
|
|
|
slot: index as Slot,
|
|
|
|
capacity: 1,
|
|
|
|
alive_bytes: 1,
|
|
|
|
should_shrink: false,
|
|
|
|
})
|
|
|
|
.collect(),
|
|
|
|
shrink_indexes: (0..count).collect(),
|
|
|
|
..AncientSlotInfos::default()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_filter_by_smallest_capacity_empty() {
|
|
|
|
for max_storages in 1..3 {
|
|
|
|
// requesting N max storage, has 1 storage, N >= 1 so nothing to do
|
|
|
|
let ideal_storage_size_large = get_ancient_append_vec_capacity();
|
|
|
|
let mut infos = create_test_infos(1);
|
|
|
|
infos.filter_by_smallest_capacity(max_storages, ideal_storage_size_large);
|
|
|
|
assert!(infos.all_infos.is_empty());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_filter_by_smaller_capacity_sort() {
|
|
|
|
// max is 3
|
|
|
|
// 4 storages
|
|
|
|
// storage[3] is big enough to cause us to need another storage
|
|
|
|
// so, storage[0] and [1] can be combined into 1, resulting in 3 remaining storages, which is
|
|
|
|
// the goal, so we only have to combine the first 2 to hit the goal
|
|
|
|
let ideal_storage_size_large = get_ancient_append_vec_capacity();
|
|
|
|
for reorder in [false, true] {
|
|
|
|
let mut infos = create_test_infos(4);
|
|
|
|
infos
|
|
|
|
.all_infos
|
|
|
|
.iter_mut()
|
|
|
|
.enumerate()
|
|
|
|
.for_each(|(i, info)| info.capacity = 1 + i as u64);
|
|
|
|
if reorder {
|
|
|
|
infos.all_infos[3].capacity = 0; // sort to beginning
|
|
|
|
}
|
|
|
|
infos.all_infos[3].alive_bytes = ideal_storage_size_large;
|
|
|
|
let max_storages = 3;
|
|
|
|
infos.filter_by_smallest_capacity(max_storages, ideal_storage_size_large);
|
|
|
|
assert_eq!(
|
|
|
|
infos
|
|
|
|
.all_infos
|
|
|
|
.iter()
|
|
|
|
.map(|info| info.slot)
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
if reorder { vec![3, 0, 1] } else { vec![0, 1] },
|
|
|
|
"reorder: {reorder}"
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_truncate_to_max_storages() {
|
|
|
|
for filter in [false, true] {
|
|
|
|
let test = |infos: &mut AncientSlotInfos, max_storages, ideal_storage_size| {
|
|
|
|
if filter {
|
|
|
|
infos.filter_by_smallest_capacity(max_storages, ideal_storage_size);
|
|
|
|
} else {
|
|
|
|
infos.truncate_to_max_storages(max_storages, ideal_storage_size);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
let ideal_storage_size_large = get_ancient_append_vec_capacity();
|
|
|
|
let mut infos = create_test_infos(1);
|
|
|
|
let max_storages = 1;
|
|
|
|
// 1 storage, 1 max, but 1 storage does not fill the entire new combined storage, so truncate nothing
|
|
|
|
test(&mut infos, max_storages, ideal_storage_size_large);
|
|
|
|
assert_eq!(infos.all_infos.len(), usize::from(!filter));
|
|
|
|
|
|
|
|
let mut infos = create_test_infos(1);
|
|
|
|
let max_storages = 1;
|
|
|
|
infos.all_infos[0].alive_bytes = ideal_storage_size_large + 1; // too big for 1 ideal storage
|
|
|
|
// 1 storage, 1 max, but 1 overflows the entire new combined storage, so truncate nothing
|
|
|
|
test(&mut infos, max_storages, ideal_storage_size_large);
|
|
|
|
assert_eq!(infos.all_infos.len(), usize::from(!filter));
|
|
|
|
|
|
|
|
let mut infos = create_test_infos(1);
|
|
|
|
let max_storages = 2;
|
|
|
|
// all truncated because these infos will fit into the # storages
|
|
|
|
test(&mut infos, max_storages, ideal_storage_size_large);
|
|
|
|
assert!(infos.all_infos.is_empty());
|
|
|
|
|
|
|
|
let mut infos = create_test_infos(1);
|
|
|
|
infos.all_infos[0].alive_bytes = ideal_storage_size_large + 1;
|
|
|
|
let max_storages = 2;
|
|
|
|
// none truncated because the one storage calculates to be larger than 1 ideal storage, so we need to
|
|
|
|
// combine
|
|
|
|
test(&mut infos, max_storages, ideal_storage_size_large);
|
|
|
|
assert_eq!(
|
|
|
|
infos
|
|
|
|
.all_infos
|
|
|
|
.iter()
|
|
|
|
.map(|info| info.slot)
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
if filter { Vec::default() } else { vec![0] }
|
|
|
|
);
|
|
|
|
|
|
|
|
// both need to be combined to reach '1'
|
|
|
|
let max_storages = 1;
|
|
|
|
for ideal_storage_size in [1, 2] {
|
|
|
|
let mut infos = create_test_infos(2);
|
|
|
|
test(&mut infos, max_storages, ideal_storage_size);
|
|
|
|
assert_eq!(infos.all_infos.len(), 2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// max is 3
|
|
|
|
// 4 storages
|
|
|
|
// storage[3] is big enough to cause us to need another storage
|
|
|
|
// so, storage[0] and [1] can be combined into 1, resulting in 3 remaining storages, which is
|
|
|
|
// the goal, so we only have to combine the first 2 to hit the goal
|
|
|
|
let mut infos = create_test_infos(4);
|
|
|
|
infos.all_infos[3].alive_bytes = ideal_storage_size_large;
|
|
|
|
let max_storages = 3;
|
|
|
|
test(&mut infos, max_storages, ideal_storage_size_large);
|
|
|
|
assert_eq!(
|
|
|
|
infos
|
|
|
|
.all_infos
|
|
|
|
.iter()
|
|
|
|
.map(|info| info.slot)
|
|
|
|
.collect::<Vec<_>>(),
|
|
|
|
vec![0, 1]
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-03 10:53:49 -08:00
|
|
|
#[test]
|
|
|
|
fn test_calc_ancient_slot_info_one_shrink_one_not() {
|
|
|
|
let can_randomly_shrink = false;
|
|
|
|
for slot1_shrink in [false, true] {
|
|
|
|
let shrinks = vec![false /*dummy*/, slot1_shrink, !slot1_shrink];
|
|
|
|
let slots = 2;
|
|
|
|
// 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered
|
|
|
|
let data_sizes = shrinks
|
|
|
|
.iter()
|
|
|
|
.map(|shrink| (!shrink).then_some(1_040_000))
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
let (db, slot1) =
|
|
|
|
create_db_with_storages_and_index(true /*alive*/, 1, data_sizes[1]);
|
|
|
|
let dead_bytes = 184; // constant based on None data size
|
|
|
|
create_storages_and_update_index(
|
|
|
|
&db,
|
|
|
|
None,
|
|
|
|
slot1 + 1,
|
|
|
|
1,
|
|
|
|
true,
|
|
|
|
data_sizes[(slot1 + 1) as usize],
|
|
|
|
);
|
|
|
|
|
|
|
|
assert_eq!(slot1, 1); // make sure index into shrinks will be correct
|
|
|
|
assert_eq!(shrinks[slot1 as usize], slot1_shrink);
|
|
|
|
let slot_vec = (slot1..(slot1 + slots as Slot)).collect::<Vec<_>>();
|
|
|
|
let storages = slot_vec
|
|
|
|
.iter()
|
|
|
|
.map(|slot| {
|
|
|
|
let storage = db.storage.get_slot_storage_entry(*slot).unwrap();
|
|
|
|
assert_eq!(*slot, storage.slot());
|
|
|
|
storage
|
|
|
|
})
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
let alive_bytes_expected = storages
|
|
|
|
.iter()
|
|
|
|
.map(|storage| storage.alive_bytes() as u64)
|
|
|
|
.sum::<u64>();
|
|
|
|
let infos = db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink);
|
|
|
|
assert_eq!(infos.all_infos.len(), 2);
|
|
|
|
storages
|
|
|
|
.iter()
|
|
|
|
.zip(infos.all_infos.iter())
|
|
|
|
.for_each(|(storage, info)| {
|
|
|
|
assert_storage_info(info, storage, shrinks[storage.slot() as usize]);
|
|
|
|
});
|
|
|
|
// data size is so small compared to min aligned file size that the storage is marked as should_shrink
|
|
|
|
assert_eq!(
|
|
|
|
infos.shrink_indexes,
|
|
|
|
shrinks
|
|
|
|
.iter()
|
|
|
|
.skip(1)
|
|
|
|
.enumerate()
|
|
|
|
.filter_map(|(i, shrink)| shrink.then_some(i))
|
|
|
|
.collect::<Vec<_>>()
|
|
|
|
);
|
|
|
|
assert_eq!(infos.total_alive_bytes, alive_bytes_expected);
|
|
|
|
assert_eq!(infos.total_alive_bytes_shrink, dead_bytes);
|
|
|
|
}
|
|
|
|
}
|
2023-02-05 12:14:14 -08:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_sort_shrink_indexes_by_bytes_saved() {
|
|
|
|
let (db, slot1) = create_db_with_storages_and_index(true /*alive*/, 1, None);
|
|
|
|
let storage = db.storage.get_slot_storage_entry(slot1).unwrap();
|
|
|
|
// ignored
|
|
|
|
let slot = 0;
|
|
|
|
|
|
|
|
// info1 is first, equal, last
|
|
|
|
for info1_capacity in [0, 1, 2] {
|
|
|
|
let info1 = SlotInfo {
|
|
|
|
storage: storage.clone(),
|
|
|
|
slot,
|
|
|
|
capacity: info1_capacity,
|
|
|
|
alive_bytes: 0,
|
|
|
|
should_shrink: false,
|
|
|
|
};
|
|
|
|
let info2 = SlotInfo {
|
|
|
|
storage: storage.clone(),
|
|
|
|
slot,
|
|
|
|
capacity: 2,
|
|
|
|
alive_bytes: 1,
|
|
|
|
should_shrink: false,
|
|
|
|
};
|
|
|
|
let mut infos = AncientSlotInfos {
|
|
|
|
all_infos: vec![info1, info2],
|
|
|
|
shrink_indexes: vec![0, 1],
|
|
|
|
..AncientSlotInfos::default()
|
|
|
|
};
|
|
|
|
infos.sort_shrink_indexes_by_bytes_saved();
|
|
|
|
let first = &infos.all_infos[infos.shrink_indexes[0]];
|
|
|
|
let second = &infos.all_infos[infos.shrink_indexes[1]];
|
|
|
|
let first_capacity = first.capacity - first.alive_bytes;
|
|
|
|
let second_capacity = second.capacity - second.alive_bytes;
|
|
|
|
assert!(first_capacity >= second_capacity);
|
|
|
|
}
|
|
|
|
}
|
2022-05-10 14:41:04 -07:00
|
|
|
}
|