From 785a6e3a691971de0b56cff222968f75909e4475 Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" Date: Fri, 3 Feb 2023 12:53:49 -0600 Subject: [PATCH] AncientSlotInfos, piece of packed ancient storage (#30119) --- runtime/src/accounts_db.rs | 8 + runtime/src/ancient_append_vecs.rs | 348 ++++++++++++++++++++++++++++- 2 files changed, 353 insertions(+), 3 deletions(-) diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 7e431713c3..dd0527fb66 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -17430,6 +17430,14 @@ pub mod tests { }) } + pub(crate) fn remove_account_for_tests( + storage: &Arc, + num_bytes: usize, + reset_accounts: bool, + ) { + storage.remove_account(num_bytes, reset_accounts); + } + pub(crate) fn create_storages_and_update_index( db: &AccountsDb, tf: Option<&TempFile>, diff --git a/runtime/src/ancient_append_vecs.rs b/runtime/src/ancient_append_vecs.rs index f747716229..ff4ddcdedd 100644 --- a/runtime/src/ancient_append_vecs.rs +++ b/runtime/src/ancient_append_vecs.rs @@ -4,10 +4,105 @@ //! 2. multiple 'slots' squashed into a single older (ie. ancient) slot for convenience and performance //! Otherwise, an ancient append vec is the same as any other append vec use { - crate::append_vec::{AppendVec, StoredAccountMeta}, - solana_sdk::clock::Slot, + crate::{ + accounts_db::{AccountStorageEntry, AccountsDb}, + append_vec::{AppendVec, StoredAccountMeta}, + }, + rand::{thread_rng, Rng}, + solana_sdk::{clock::Slot, saturating_add_assign}, + std::sync::Arc, }; +/// info about a storage eligible to be combined into an ancient append vec. +/// Useful to help sort vecs of storages. +#[derive(Debug)] +#[allow(dead_code)] +struct SlotInfo { + storage: Arc, + /// slot of storage + slot: Slot, + /// total capacity of storage + capacity: u64, + /// # alive bytes in storage + alive_bytes: u64, + /// true if this should be shrunk due to ratio + should_shrink: bool, +} + +/// info for all storages in ancient slots +/// 'all_infos' contains all slots and storages that are ancient +#[derive(Default, Debug)] +struct AncientSlotInfos { + /// info on all ancient storages + all_infos: Vec, + /// indexes to 'all_info' for storages that should be shrunk because alive ratio is too low. + /// subset of all_infos + shrink_indexes: Vec, + /// total alive bytes across contents of 'shrink_indexes' + total_alive_bytes_shrink: u64, + /// total alive bytes across all slots + total_alive_bytes: u64, +} + +impl AncientSlotInfos { + /// add info for 'storage' + fn add(&mut self, slot: Slot, storage: Arc, can_randomly_shrink: bool) { + let alive_bytes = storage.alive_bytes() as u64; + if alive_bytes > 0 { + let capacity = storage.accounts.capacity(); + let should_shrink = if capacity > 0 { + let alive_ratio = alive_bytes * 100 / capacity; + (alive_ratio < 90) || (can_randomly_shrink && thread_rng().gen_range(0, 10000) == 0) + } else { + false + }; + // two criteria we're shrinking by later: + // 1. alive ratio so that we don't consume too much disk space with dead accounts + // 2. # of active ancient roots, so that we don't consume too many open file handles + + if should_shrink { + // alive ratio is too low, so prioritize combining this slot with others + // to reduce disk space used + saturating_add_assign!(self.total_alive_bytes_shrink, alive_bytes); + self.shrink_indexes.push(self.all_infos.len()); + } + self.all_infos.push(SlotInfo { + slot, + capacity, + storage, + alive_bytes, + should_shrink, + }); + self.total_alive_bytes += alive_bytes; + } + } +} + +impl AccountsDb { + /// go through all slots and populate 'SlotInfo', per slot + /// This provides the list of possible ancient slots to sort, filter, and then combine. + #[allow(dead_code)] + fn calc_ancient_slot_info( + &self, + slots: Vec, + can_randomly_shrink: bool, + ) -> AncientSlotInfos { + let len = slots.len(); + let mut infos = AncientSlotInfos { + shrink_indexes: Vec::with_capacity(len), + all_infos: Vec::with_capacity(len), + ..AncientSlotInfos::default() + }; + + for slot in &slots { + if let Some(storage) = self.storage.get_slot_storage_entry(*slot) { + infos.add(*slot, storage, can_randomly_shrink); + } + } + infos + } +} + /// a set of accounts need to be stored. /// If there are too many to fit in 'Primary', the rest are put in 'Overflow' #[derive(Copy, Clone, Debug, PartialEq, Eq)] @@ -96,7 +191,13 @@ pub mod tests { use { super::*, crate::{ - accounts_db::get_temp_accounts_paths, + accounts_db::{ + get_temp_accounts_paths, + tests::{ + create_db_with_storages_and_index, create_storages_and_update_index, + remove_account_for_tests, + }, + }, append_vec::{AccountMeta, StoredAccountMeta, StoredMeta}, }, solana_sdk::{ @@ -201,4 +302,245 @@ pub mod tests { assert_eq!(expected_ancient, is_ancient(&av)); } } + + fn assert_storage_info( + info: &SlotInfo, + storage: &Arc, + should_shrink: bool, + ) { + assert_eq!(storage.append_vec_id(), info.storage.append_vec_id()); + assert_eq!(storage.slot(), info.slot); + assert_eq!(storage.capacity(), info.capacity); + assert_eq!(storage.alive_bytes(), info.alive_bytes as usize); + assert_eq!(should_shrink, info.should_shrink); + } + + #[test] + fn test_calc_ancient_slot_info_one_alive() { + let can_randomly_shrink = false; + let alive = true; + let slots = 1; + for call_add in [false, true] { + // 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered + for data_size in [None, Some(1_040_000)] { + let (db, slot1) = create_db_with_storages_and_index(alive, slots, data_size); + let mut infos = AncientSlotInfos::default(); + let storage = db.storage.get_slot_storage_entry(slot1).unwrap(); + let alive_bytes_expected = storage.alive_bytes(); + if call_add { + // test lower level 'add' + infos.add(slot1, Arc::clone(&storage), can_randomly_shrink); + } else { + infos = db.calc_ancient_slot_info(vec![slot1], can_randomly_shrink); + } + assert_eq!(infos.all_infos.len(), 1); + let should_shrink = data_size.is_none(); + assert_storage_info(infos.all_infos.first().unwrap(), &storage, should_shrink); + if should_shrink { + // data size is so small compared to min aligned file size that the storage is marked as should_shrink + assert_eq!(infos.shrink_indexes, vec![0]); + assert_eq!(infos.total_alive_bytes, alive_bytes_expected as u64); + assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected as u64); + } else { + assert!(infos.shrink_indexes.is_empty()); + assert_eq!(infos.total_alive_bytes, alive_bytes_expected as u64); + assert_eq!(infos.total_alive_bytes_shrink, 0); + } + } + } + } + + #[test] + fn test_calc_ancient_slot_info_one_dead() { + let can_randomly_shrink = false; + let alive = false; + let slots = 1; + for call_add in [false, true] { + // 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered + let (db, slot1) = create_db_with_storages_and_index(alive, slots, None); + let mut infos = AncientSlotInfos::default(); + let storage = db.storage.get_slot_storage_entry(slot1).unwrap(); + if call_add { + infos.add(slot1, Arc::clone(&storage), can_randomly_shrink); + } else { + infos = db.calc_ancient_slot_info(vec![slot1], can_randomly_shrink); + } + assert!(infos.all_infos.is_empty()); + assert!(infos.shrink_indexes.is_empty()); + assert_eq!(infos.total_alive_bytes, 0); + assert_eq!(infos.total_alive_bytes_shrink, 0); + } + } + + #[test] + fn test_calc_ancient_slot_info_several() { + let can_randomly_shrink = false; + for alive in [true, false] { + for slots in 2..4 { + // 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered + for data_size in [None, Some(1_040_000)] { + let (db, slot1) = create_db_with_storages_and_index(alive, slots, data_size); + let slot_vec = (slot1..(slot1 + slots as Slot)).collect::>(); + let storages = slot_vec + .iter() + .map(|slot| db.storage.get_slot_storage_entry(*slot).unwrap()) + .collect::>(); + let alive_bytes_expected = storages + .iter() + .map(|storage| storage.alive_bytes() as u64) + .sum::(); + let infos = db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink); + if !alive { + assert!(infos.all_infos.is_empty()); + assert!(infos.shrink_indexes.is_empty()); + assert_eq!(infos.total_alive_bytes, 0); + assert_eq!(infos.total_alive_bytes_shrink, 0); + } else { + assert_eq!(infos.all_infos.len(), slots); + let should_shrink = data_size.is_none(); + storages + .iter() + .zip(infos.all_infos.iter()) + .for_each(|(storage, info)| { + assert_storage_info(info, storage, should_shrink); + }); + if should_shrink { + // data size is so small compared to min aligned file size that the storage is marked as should_shrink + assert_eq!( + infos.shrink_indexes, + slot_vec + .iter() + .enumerate() + .map(|(i, _)| i) + .collect::>() + ); + assert_eq!(infos.total_alive_bytes, alive_bytes_expected); + assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected); + } else { + assert!(infos.shrink_indexes.is_empty()); + assert_eq!(infos.total_alive_bytes, alive_bytes_expected); + assert_eq!(infos.total_alive_bytes_shrink, 0); + } + } + } + } + } + } + + #[test] + fn test_calc_ancient_slot_info_one_alive_one_dead() { + let can_randomly_shrink = false; + for slot1_is_alive in [false, true] { + let alives = vec![false /*dummy*/, slot1_is_alive, !slot1_is_alive]; + let slots = 2; + // 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered + for data_size in [None, Some(1_040_000)] { + let (db, slot1) = + create_db_with_storages_and_index(true /*alive*/, slots, data_size); + assert_eq!(slot1, 1); // make sure index into alives will be correct + assert_eq!(alives[slot1 as usize], slot1_is_alive); + let slot_vec = (slot1..(slot1 + slots as Slot)).collect::>(); + let storages = slot_vec + .iter() + .map(|slot| db.storage.get_slot_storage_entry(*slot).unwrap()) + .collect::>(); + storages.iter().for_each(|storage| { + let slot = storage.slot(); + let alive = alives[slot as usize]; + if !alive { + // make this storage not alive + remove_account_for_tests(storage, storage.written_bytes() as usize, false); + } + }); + let alive_storages = storages + .iter() + .filter_map(|storage| alives[storage.slot() as usize].then_some(storage)) + .collect::>(); + let alive_bytes_expected = alive_storages + .iter() + .map(|storage| storage.alive_bytes() as u64) + .sum::(); + let infos = db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink); + assert_eq!(infos.all_infos.len(), 1); + let should_shrink = data_size.is_none(); + alive_storages + .iter() + .zip(infos.all_infos.iter()) + .for_each(|(storage, info)| { + assert_storage_info(info, storage, should_shrink); + }); + if should_shrink { + // data size is so small compared to min aligned file size that the storage is marked as should_shrink + assert_eq!(infos.shrink_indexes, vec![0]); + assert_eq!(infos.total_alive_bytes, alive_bytes_expected); + assert_eq!(infos.total_alive_bytes_shrink, alive_bytes_expected); + } else { + assert!(infos.shrink_indexes.is_empty()); + assert_eq!(infos.total_alive_bytes, alive_bytes_expected); + assert_eq!(infos.total_alive_bytes_shrink, 0); + } + } + } + } + + #[test] + fn test_calc_ancient_slot_info_one_shrink_one_not() { + let can_randomly_shrink = false; + for slot1_shrink in [false, true] { + let shrinks = vec![false /*dummy*/, slot1_shrink, !slot1_shrink]; + let slots = 2; + // 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered + let data_sizes = shrinks + .iter() + .map(|shrink| (!shrink).then_some(1_040_000)) + .collect::>(); + let (db, slot1) = + create_db_with_storages_and_index(true /*alive*/, 1, data_sizes[1]); + let dead_bytes = 184; // constant based on None data size + create_storages_and_update_index( + &db, + None, + slot1 + 1, + 1, + true, + data_sizes[(slot1 + 1) as usize], + ); + + assert_eq!(slot1, 1); // make sure index into shrinks will be correct + assert_eq!(shrinks[slot1 as usize], slot1_shrink); + let slot_vec = (slot1..(slot1 + slots as Slot)).collect::>(); + let storages = slot_vec + .iter() + .map(|slot| { + let storage = db.storage.get_slot_storage_entry(*slot).unwrap(); + assert_eq!(*slot, storage.slot()); + storage + }) + .collect::>(); + let alive_bytes_expected = storages + .iter() + .map(|storage| storage.alive_bytes() as u64) + .sum::(); + let infos = db.calc_ancient_slot_info(slot_vec.clone(), can_randomly_shrink); + assert_eq!(infos.all_infos.len(), 2); + storages + .iter() + .zip(infos.all_infos.iter()) + .for_each(|(storage, info)| { + assert_storage_info(info, storage, shrinks[storage.slot() as usize]); + }); + // data size is so small compared to min aligned file size that the storage is marked as should_shrink + assert_eq!( + infos.shrink_indexes, + shrinks + .iter() + .skip(1) + .enumerate() + .filter_map(|(i, shrink)| shrink.then_some(i)) + .collect::>() + ); + assert_eq!(infos.total_alive_bytes, alive_bytes_expected); + assert_eq!(infos.total_alive_bytes_shrink, dead_bytes); + } + } }