Adds cache hash data deletion policy enum (#34956)

This commit is contained in:
Brooks 2024-01-25 16:58:56 -05:00 committed by GitHub
parent 1e68ba5f8a
commit e155d9c445
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 47 additions and 26 deletions

View File

@ -58,7 +58,9 @@ use {
append_vec::{ append_vec::{
aligned_stored_size, AppendVec, APPEND_VEC_MMAPPED_FILES_OPEN, STORE_META_OVERHEAD, aligned_stored_size, AppendVec, APPEND_VEC_MMAPPED_FILES_OPEN, STORE_META_OVERHEAD,
}, },
cache_hash_data::{CacheHashData, CacheHashDataFileReference}, cache_hash_data::{
CacheHashData, CacheHashDataFileReference, DeletionPolicy as CacheHashDeletionPolicy,
},
contains::Contains, contains::Contains,
epoch_accounts_hash::EpochAccountsHashManager, epoch_accounts_hash::EpochAccountsHashManager,
in_mem_accounts_index::StartupStats, in_mem_accounts_index::StartupStats,
@ -7549,10 +7551,13 @@ impl AccountsDb {
_ = std::fs::remove_dir_all(&failed_dir); _ = std::fs::remove_dir_all(&failed_dir);
failed_dir failed_dir
}; };
CacheHashData::new( let deletion_policy = match kind {
accounts_hash_cache_path, CalcAccountsHashKind::Full => CacheHashDeletionPolicy::AllUnused,
(kind == CalcAccountsHashKind::Incremental).then_some(storages_start_slot), CalcAccountsHashKind::Incremental => {
) CacheHashDeletionPolicy::UnusedAtLeast(storages_start_slot)
}
};
CacheHashData::new(accounts_hash_cache_path, deletion_policy)
} }
// modeled after calculate_accounts_delta_hash // modeled after calculate_accounts_delta_hash
@ -9775,7 +9780,7 @@ pub mod tests {
let temp_dir = TempDir::new().unwrap(); let temp_dir = TempDir::new().unwrap();
let accounts_hash_cache_path = temp_dir.path().to_path_buf(); let accounts_hash_cache_path = temp_dir.path().to_path_buf();
self.scan_snapshot_stores_with_cache( self.scan_snapshot_stores_with_cache(
&CacheHashData::new(accounts_hash_cache_path, None), &CacheHashData::new(accounts_hash_cache_path, CacheHashDeletionPolicy::AllUnused),
storage, storage,
stats, stats,
bins, bins,
@ -10843,7 +10848,7 @@ pub mod tests {
}; };
let result = accounts_db.scan_account_storage_no_bank( let result = accounts_db.scan_account_storage_no_bank(
&CacheHashData::new(accounts_hash_cache_path, None), &CacheHashData::new(accounts_hash_cache_path, CacheHashDeletionPolicy::AllUnused),
&CalcAccountsHashConfig::default(), &CalcAccountsHashConfig::default(),
&get_storage_refs(&[storage]), &get_storage_refs(&[storage]),
test_scan, test_scan,

View File

@ -193,8 +193,7 @@ impl CacheHashDataFile {
pub(crate) struct CacheHashData { pub(crate) struct CacheHashData {
cache_dir: PathBuf, cache_dir: PathBuf,
pre_existing_cache_files: Arc<Mutex<HashSet<PathBuf>>>, pre_existing_cache_files: Arc<Mutex<HashSet<PathBuf>>>,
/// Decides which old cache files to delete. See `delete_old_cache_files()` for more info. deletion_policy: DeletionPolicy,
storages_start_slot: Option<Slot>,
pub stats: Arc<CacheHashDataStats>, pub stats: Arc<CacheHashDataStats>,
} }
@ -206,7 +205,7 @@ impl Drop for CacheHashData {
} }
impl CacheHashData { impl CacheHashData {
pub(crate) fn new(cache_dir: PathBuf, storages_start_slot: Option<Slot>) -> CacheHashData { pub(crate) fn new(cache_dir: PathBuf, deletion_policy: DeletionPolicy) -> CacheHashData {
std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| { std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| {
panic!("error creating cache dir {}: {err}", cache_dir.display()) panic!("error creating cache dir {}: {err}", cache_dir.display())
}); });
@ -214,7 +213,7 @@ impl CacheHashData {
let result = CacheHashData { let result = CacheHashData {
cache_dir, cache_dir,
pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())), pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())),
storages_start_slot, deletion_policy,
stats: Arc::default(), stats: Arc::default(),
}; };
@ -229,21 +228,24 @@ impl CacheHashData {
let mut old_cache_files = let mut old_cache_files =
std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap()); std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap());
// If `storages_start_slot` is None, we're doing a full accounts hash calculation, and thus match self.deletion_policy {
// all unused cache files can be deleted. DeletionPolicy::AllUnused => {
// If `storages_start_slot` is Some, we're doing an incremental accounts hash calculation, // no additional work to do here; we will delete everything in `old_cache_files`
// and we only want to delete the unused cache files *that IAH considered*. }
if let Some(storages_start_slot) = self.storages_start_slot { DeletionPolicy::UnusedAtLeast(storages_start_slot) => {
old_cache_files.retain(|old_cache_file| { // when calculating an incremental accounts hash, we only want to delete the unused
let Some(parsed_filename) = parse_filename(old_cache_file) else { // cache files *that IAH considered*
// if parsing the cache filename fails, we *do* want to delete it old_cache_files.retain(|old_cache_file| {
return true; let Some(parsed_filename) = parse_filename(old_cache_file) else {
}; // if parsing the cache filename fails, we *do* want to delete it
return true;
};
// if the old cache file is in the incremental accounts hash calculation range, // if the old cache file is in the incremental accounts hash calculation range,
// then delete it // then delete it
parsed_filename.slot_range_start >= storages_start_slot parsed_filename.slot_range_start >= storages_start_slot
}); });
}
} }
if !old_cache_files.is_empty() { if !old_cache_files.is_empty() {
@ -410,6 +412,19 @@ fn parse_filename(cache_filename: impl AsRef<Path>) -> Option<ParsedFilename> {
}) })
} }
/// Decides which old cache files to delete
///
/// See `delete_old_cache_files()` for more info.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum DeletionPolicy {
/// Delete *all* the unused cache files
/// Should be used when calculating full accounts hash
AllUnused,
/// Delete *only* the unused cache files with starting slot range *at least* this slot
/// Should be used when calculating incremental accounts hash
UnusedAtLeast(Slot),
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use {super::*, crate::accounts_hash::AccountHash, rand::Rng}; use {super::*, crate::accounts_hash::AccountHash, rand::Rng};
@ -477,7 +492,8 @@ mod tests {
data_this_pass.push(this_bin_data); data_this_pass.push(this_bin_data);
} }
} }
let cache = CacheHashData::new(cache_dir.clone(), None); let cache =
CacheHashData::new(cache_dir.clone(), DeletionPolicy::AllUnused);
let file_name = PathBuf::from("test"); let file_name = PathBuf::from("test");
cache.save(&file_name, &data_this_pass).unwrap(); cache.save(&file_name, &data_this_pass).unwrap();
cache.get_cache_files(); cache.get_cache_files();