in hash calc, delete old cache files that will not be used earlier (#33432)

* in hash calc, delete old cache files that will not be used earlier

* only delete if supposed to

* fmt
This commit is contained in:
Jeff Washington (jwash) 2023-10-09 11:47:39 -07:00 committed by GitHub
parent c924719040
commit 052677595c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 124 additions and 89 deletions

View File

@ -181,6 +181,13 @@ impl<'a> StoreTo<'a> {
}
}
enum ScanAccountStorageResult {
/// this data has already been scanned and cached
CacheFileAlreadyExists(CacheHashDataFileReference),
/// this data needs to be scanned and cached
CacheFileNeedsToBeCreated((String, Range<Slot>)),
}
#[derive(Default, Debug)]
/// hold alive accounts
/// alive means in the accounts index
@ -7222,14 +7229,12 @@ impl AccountsDb {
.saturating_sub(slots_per_epoch);
stats.scan_chunks = splitter.chunk_count;
(0..splitter.chunk_count)
.into_par_iter()
.map(|chunk| {
let mut scanner = scanner.clone();
let cache_files = (0..splitter.chunk_count)
.into_par_iter()
.filter_map(|chunk| {
let range_this_chunk = splitter.get_slot_range(chunk)?;
let file_name = {
let mut load_from_cache = true;
let mut hasher = hash_map::DefaultHasher::new();
bin_range.start.hash(&mut hasher);
@ -7237,7 +7242,9 @@ impl AccountsDb {
let is_first_scan_pass = bin_range.start == 0;
// calculate hash representing all storages in this chunk
let mut empty = true;
for (slot, storage) in snapshot_storages.iter_range(&range_this_chunk) {
empty = false;
if is_first_scan_pass && slot < one_epoch_old {
self.update_old_slot_stats(stats, storage);
}
@ -7246,6 +7253,9 @@ impl AccountsDb {
break;
}
}
if empty {
return None;
}
// we have a hash value for the storages in this chunk
// so, build a file name:
let hash = hasher.finish();
@ -7261,14 +7271,33 @@ impl AccountsDb {
if let Ok(mapped_file) =
cache_hash_data.get_file_reference_to_map_later(&file_name)
{
return Some(mapped_file);
return Some(ScanAccountStorageResult::CacheFileAlreadyExists(
mapped_file,
));
}
}
// fall through and load normally - we failed to load from a cache file
file_name
};
// fall through and load normally - we failed to load from a cache file but there are storages present
Some(ScanAccountStorageResult::CacheFileNeedsToBeCreated((
file_name,
range_this_chunk,
)))
})
.collect::<Vec<_>>();
// deletes the old files that will not be used before creating new ones
cache_hash_data.delete_old_cache_files();
cache_files
.into_par_iter()
.map(|chunk| {
match chunk {
ScanAccountStorageResult::CacheFileAlreadyExists(file) => Some(file),
ScanAccountStorageResult::CacheFileNeedsToBeCreated((
file_name,
range_this_chunk,
)) => {
let mut scanner = scanner.clone();
let mut init_accum = true;
// load from cache failed, so create the cache file for this chunk
for (slot, storage) in snapshot_storages.iter_range(&range_this_chunk) {
@ -7306,6 +7335,8 @@ impl AccountsDb {
})
})
.flatten()
}
}
})
.filter_map(|x| x)
.collect()

View File

@ -198,9 +198,7 @@ pub(crate) struct CacheHashData {
impl Drop for CacheHashData {
fn drop(&mut self) {
if self.should_delete_old_cache_files_on_drop {
self.delete_old_cache_files();
}
self.stats.report();
}
}
@ -224,8 +222,12 @@ impl CacheHashData {
result.get_cache_files();
result
}
fn delete_old_cache_files(&self) {
let old_cache_files = std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap());
/// delete all pre-existing files that will not be used
pub(crate) fn delete_old_cache_files(&self) {
if self.should_delete_old_cache_files_on_drop {
let old_cache_files =
std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap());
if !old_cache_files.is_empty() {
self.stats
.unused_cache_files
@ -236,6 +238,8 @@ impl CacheHashData {
}
}
}
}
fn get_cache_files(&self) {
if self.cache_dir.is_dir() {
let dir = fs::read_dir(&self.cache_dir);