From e155d9c44580141d7cb8d9f8120d67254f3eb588 Mon Sep 17 00:00:00 2001 From: Brooks Date: Thu, 25 Jan 2024 16:58:56 -0500 Subject: [PATCH] Adds cache hash data deletion policy enum (#34956) --- accounts-db/src/accounts_db.rs | 19 +++++++---- accounts-db/src/cache_hash_data.rs | 54 +++++++++++++++++++----------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 4c8d479cd..ded01efa8 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -58,7 +58,9 @@ use { append_vec::{ aligned_stored_size, AppendVec, APPEND_VEC_MMAPPED_FILES_OPEN, STORE_META_OVERHEAD, }, - cache_hash_data::{CacheHashData, CacheHashDataFileReference}, + cache_hash_data::{ + CacheHashData, CacheHashDataFileReference, DeletionPolicy as CacheHashDeletionPolicy, + }, contains::Contains, epoch_accounts_hash::EpochAccountsHashManager, in_mem_accounts_index::StartupStats, @@ -7549,10 +7551,13 @@ impl AccountsDb { _ = std::fs::remove_dir_all(&failed_dir); failed_dir }; - CacheHashData::new( - accounts_hash_cache_path, - (kind == CalcAccountsHashKind::Incremental).then_some(storages_start_slot), - ) + let deletion_policy = match kind { + CalcAccountsHashKind::Full => CacheHashDeletionPolicy::AllUnused, + CalcAccountsHashKind::Incremental => { + CacheHashDeletionPolicy::UnusedAtLeast(storages_start_slot) + } + }; + CacheHashData::new(accounts_hash_cache_path, deletion_policy) } // modeled after calculate_accounts_delta_hash @@ -9775,7 +9780,7 @@ pub mod tests { let temp_dir = TempDir::new().unwrap(); let accounts_hash_cache_path = temp_dir.path().to_path_buf(); self.scan_snapshot_stores_with_cache( - &CacheHashData::new(accounts_hash_cache_path, None), + &CacheHashData::new(accounts_hash_cache_path, CacheHashDeletionPolicy::AllUnused), storage, stats, bins, @@ -10843,7 +10848,7 @@ pub mod tests { }; let result = accounts_db.scan_account_storage_no_bank( - &CacheHashData::new(accounts_hash_cache_path, None), + &CacheHashData::new(accounts_hash_cache_path, CacheHashDeletionPolicy::AllUnused), &CalcAccountsHashConfig::default(), &get_storage_refs(&[storage]), test_scan, diff --git a/accounts-db/src/cache_hash_data.rs b/accounts-db/src/cache_hash_data.rs index fbd24e19f..e9675b9fd 100644 --- a/accounts-db/src/cache_hash_data.rs +++ b/accounts-db/src/cache_hash_data.rs @@ -193,8 +193,7 @@ impl CacheHashDataFile { pub(crate) struct CacheHashData { cache_dir: PathBuf, pre_existing_cache_files: Arc>>, - /// Decides which old cache files to delete. See `delete_old_cache_files()` for more info. - storages_start_slot: Option, + deletion_policy: DeletionPolicy, pub stats: Arc, } @@ -206,7 +205,7 @@ impl Drop for CacheHashData { } impl CacheHashData { - pub(crate) fn new(cache_dir: PathBuf, storages_start_slot: Option) -> CacheHashData { + pub(crate) fn new(cache_dir: PathBuf, deletion_policy: DeletionPolicy) -> CacheHashData { std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| { panic!("error creating cache dir {}: {err}", cache_dir.display()) }); @@ -214,7 +213,7 @@ impl CacheHashData { let result = CacheHashData { cache_dir, pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())), - storages_start_slot, + deletion_policy, stats: Arc::default(), }; @@ -229,21 +228,24 @@ impl CacheHashData { let mut old_cache_files = std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap()); - // If `storages_start_slot` is None, we're doing a full accounts hash calculation, and thus - // all unused cache files can be deleted. - // If `storages_start_slot` is Some, we're doing an incremental accounts hash calculation, - // and we only want to delete the unused cache files *that IAH considered*. - if let Some(storages_start_slot) = self.storages_start_slot { - old_cache_files.retain(|old_cache_file| { - let Some(parsed_filename) = parse_filename(old_cache_file) else { - // if parsing the cache filename fails, we *do* want to delete it - return true; - }; + match self.deletion_policy { + DeletionPolicy::AllUnused => { + // no additional work to do here; we will delete everything in `old_cache_files` + } + DeletionPolicy::UnusedAtLeast(storages_start_slot) => { + // when calculating an incremental accounts hash, we only want to delete the unused + // cache files *that IAH considered* + old_cache_files.retain(|old_cache_file| { + let Some(parsed_filename) = parse_filename(old_cache_file) else { + // if parsing the cache filename fails, we *do* want to delete it + return true; + }; - // if the old cache file is in the incremental accounts hash calculation range, - // then delete it - parsed_filename.slot_range_start >= storages_start_slot - }); + // if the old cache file is in the incremental accounts hash calculation range, + // then delete it + parsed_filename.slot_range_start >= storages_start_slot + }); + } } if !old_cache_files.is_empty() { @@ -410,6 +412,19 @@ fn parse_filename(cache_filename: impl AsRef) -> Option { }) } +/// Decides which old cache files to delete +/// +/// See `delete_old_cache_files()` for more info. +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum DeletionPolicy { + /// Delete *all* the unused cache files + /// Should be used when calculating full accounts hash + AllUnused, + /// Delete *only* the unused cache files with starting slot range *at least* this slot + /// Should be used when calculating incremental accounts hash + UnusedAtLeast(Slot), +} + #[cfg(test)] mod tests { use {super::*, crate::accounts_hash::AccountHash, rand::Rng}; @@ -477,7 +492,8 @@ mod tests { data_this_pass.push(this_bin_data); } } - let cache = CacheHashData::new(cache_dir.clone(), None); + let cache = + CacheHashData::new(cache_dir.clone(), DeletionPolicy::AllUnused); let file_name = PathBuf::from("test"); cache.save(&file_name, &data_this_pass).unwrap(); cache.get_cache_files();