Cleans up CacheHashData (#29267)

This commit is contained in:
Brooks Prumo 2022-12-15 14:16:27 -05:00 committed by GitHub
parent 78a04ed432
commit 1e0a0e0ced
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 36 deletions

View File

@ -7169,7 +7169,7 @@ impl AccountsDb {
hash hash
); );
if load_from_cache { if load_from_cache {
if let Ok(mapped_file) = cache_hash_data.load_map(&Path::new(&file_name)) { if let Ok(mapped_file) = cache_hash_data.load_map(&file_name) {
return Some(mapped_file); return Some(mapped_file);
} }
} }
@ -7211,8 +7211,7 @@ impl AccountsDb {
assert!(!file_name.is_empty()); assert!(!file_name.is_empty());
(!r.is_empty() && r.iter().any(|b| !b.is_empty())).then(|| { (!r.is_empty() && r.iter().any(|b| !b.is_empty())).then(|| {
// error if we can't write this // error if we can't write this
let file_name = Path::new(&file_name); cache_hash_data.save(&file_name, &r).unwrap();
cache_hash_data.save(Path::new(&file_name), &r).unwrap();
cache_hash_data.load_map(&file_name).unwrap() cache_hash_data.load_map(&file_name).unwrap()
}) })
}) })
@ -9500,7 +9499,7 @@ pub mod tests {
let temp_dir = TempDir::new().unwrap(); let temp_dir = TempDir::new().unwrap();
let accounts_hash_cache_path = temp_dir.path(); let accounts_hash_cache_path = temp_dir.path();
self.scan_snapshot_stores_with_cache( self.scan_snapshot_stores_with_cache(
&CacheHashData::new(&accounts_hash_cache_path), &CacheHashData::new(accounts_hash_cache_path),
storage, storage,
stats, stats,
bins, bins,
@ -10389,7 +10388,7 @@ pub mod tests {
}; };
let result = accounts_db.scan_account_storage_no_bank( let result = accounts_db.scan_account_storage_no_bank(
&CacheHashData::new(&accounts_hash_cache_path), &CacheHashData::new(accounts_hash_cache_path),
&CalcAccountsHashConfig::default(), &CalcAccountsHashConfig::default(),
&get_storage_refs(&storages), &get_storage_refs(&storages),
test_scan, test_scan,

View File

@ -112,7 +112,7 @@ impl CacheHashDataFile {
} }
} }
fn new_map(file: &Path, capacity: u64) -> Result<MmapMut, std::io::Error> { fn new_map(file: impl AsRef<Path>, capacity: u64) -> Result<MmapMut, std::io::Error> {
let mut data = OpenOptions::new() let mut data = OpenOptions::new()
.read(true) .read(true)
.write(true) .write(true)
@ -129,7 +129,7 @@ impl CacheHashDataFile {
Ok(unsafe { MmapMut::map_mut(&data).unwrap() }) Ok(unsafe { MmapMut::map_mut(&data).unwrap() })
} }
fn load_map(file: &Path) -> Result<MmapMut, std::io::Error> { fn load_map(file: impl AsRef<Path>) -> Result<MmapMut, std::io::Error> {
let data = OpenOptions::new() let data = OpenOptions::new()
.read(true) .read(true)
.write(true) .write(true)
@ -140,7 +140,7 @@ impl CacheHashDataFile {
} }
} }
pub type PreExistingCacheFiles = HashSet<String>; pub type PreExistingCacheFiles = HashSet<PathBuf>;
pub struct CacheHashData { pub struct CacheHashData {
cache_folder: PathBuf, cache_folder: PathBuf,
pre_existing_cache_files: Arc<Mutex<PreExistingCacheFiles>>, pre_existing_cache_files: Arc<Mutex<PreExistingCacheFiles>>,
@ -155,11 +155,11 @@ impl Drop for CacheHashData {
} }
impl CacheHashData { impl CacheHashData {
pub fn new<P: AsRef<Path> + std::fmt::Debug>(parent_folder: &P) -> CacheHashData { pub fn new(parent_folder: impl AsRef<Path>) -> CacheHashData {
let cache_folder = Self::get_cache_root_path(parent_folder); let cache_folder = Self::get_cache_root_path(parent_folder);
std::fs::create_dir_all(cache_folder.clone()) std::fs::create_dir_all(&cache_folder)
.unwrap_or_else(|_| panic!("error creating cache dir: {cache_folder:?}")); .unwrap_or_else(|_| panic!("error creating cache dir: {}", cache_folder.display()));
let result = CacheHashData { let result = CacheHashData {
cache_folder, cache_folder,
@ -182,12 +182,12 @@ impl CacheHashData {
} }
fn get_cache_files(&self) { fn get_cache_files(&self) {
if self.cache_folder.is_dir() { if self.cache_folder.is_dir() {
let dir = fs::read_dir(self.cache_folder.clone()); let dir = fs::read_dir(&self.cache_folder);
if let Ok(dir) = dir { if let Ok(dir) = dir {
let mut pre_existing = self.pre_existing_cache_files.lock().unwrap(); let mut pre_existing = self.pre_existing_cache_files.lock().unwrap();
for entry in dir.flatten() { for entry in dir.flatten() {
if let Some(name) = entry.path().file_name() { if let Some(name) = entry.path().file_name() {
pre_existing.insert(name.to_str().unwrap().to_string()); pre_existing.insert(PathBuf::from(name));
} }
} }
self.stats.lock().unwrap().cache_file_count += pre_existing.len(); self.stats.lock().unwrap().cache_file_count += pre_existing.len();
@ -195,15 +195,15 @@ impl CacheHashData {
} }
} }
fn get_cache_root_path<P: AsRef<Path>>(parent_folder: &P) -> PathBuf { fn get_cache_root_path(parent_folder: impl AsRef<Path>) -> PathBuf {
parent_folder.as_ref().join("calculate_accounts_hash_cache") parent_folder.as_ref().join("calculate_accounts_hash_cache")
} }
#[cfg(test)] #[cfg(test)]
/// load from 'file_name' into 'accumulator' /// load from 'file_name' into 'accumulator'
pub(crate) fn load<P: AsRef<Path> + std::fmt::Debug>( pub(crate) fn load(
&self, &self,
file_name: &P, file_name: impl AsRef<Path>,
accumulator: &mut SavedType, accumulator: &mut SavedType,
start_bin_index: usize, start_bin_index: usize,
bin_calculator: &PubkeyBinCalculator24, bin_calculator: &PubkeyBinCalculator24,
@ -218,9 +218,9 @@ impl CacheHashData {
} }
/// map 'file_name' into memory /// map 'file_name' into memory
pub(crate) fn load_map<P: AsRef<Path> + std::fmt::Debug>( pub(crate) fn load_map(
&self, &self,
file_name: &P, file_name: impl AsRef<Path>,
) -> Result<CacheHashDataFile, std::io::Error> { ) -> Result<CacheHashDataFile, std::io::Error> {
let mut stats = CacheHashDataStats::default(); let mut stats = CacheHashDataStats::default();
let result = self.map(file_name, &mut stats); let result = self.map(file_name, &mut stats);
@ -229,13 +229,13 @@ impl CacheHashData {
} }
/// create and return a MappedCacheFile for a cache file path /// create and return a MappedCacheFile for a cache file path
fn map<P: AsRef<Path> + std::fmt::Debug>( fn map(
&self, &self,
file_name: &P, file_name: impl AsRef<Path>,
stats: &mut CacheHashDataStats, stats: &mut CacheHashDataStats,
) -> Result<CacheHashDataFile, std::io::Error> { ) -> Result<CacheHashDataFile, std::io::Error> {
let path = self.cache_folder.join(file_name); let path = self.cache_folder.join(&file_name);
let file_len = std::fs::metadata(path.clone())?.len(); let file_len = std::fs::metadata(&path)?.len();
let mut m1 = Measure::start("read_file"); let mut m1 = Measure::start("read_file");
let mmap = CacheHashDataFile::load_map(&path)?; let mmap = CacheHashDataFile::load_map(&path)?;
m1.stop(); m1.stop();
@ -269,17 +269,16 @@ impl CacheHashData {
cache_file.capacity = capacity; cache_file.capacity = capacity;
assert_eq!( assert_eq!(
capacity, file_len, capacity, file_len,
"expected: {capacity}, len on disk: {file_len} {path:?}, entries: {entries}, cell_size: {cell_size}" "expected: {capacity}, len on disk: {file_len} {}, entries: {entries}, cell_size: {cell_size}", path.display(),
); );
stats.total_entries = entries; stats.total_entries = entries;
stats.cache_file_size += capacity as usize; stats.cache_file_size += capacity as usize;
let file_name_lookup = file_name.as_ref().to_str().unwrap().to_string();
self.pre_existing_cache_files self.pre_existing_cache_files
.lock() .lock()
.unwrap() .unwrap()
.remove(&file_name_lookup); .remove(file_name.as_ref());
stats.loaded_from_cache += 1; stats.loaded_from_cache += 1;
stats.entries_loaded_from_cache += entries; stats.entries_loaded_from_cache += entries;
@ -288,7 +287,11 @@ impl CacheHashData {
} }
/// save 'data' to 'file_name' /// save 'data' to 'file_name'
pub fn save(&self, file_name: &Path, data: &SavedTypeSlice) -> Result<(), std::io::Error> { pub fn save(
&self,
file_name: impl AsRef<Path>,
data: &SavedTypeSlice,
) -> Result<(), std::io::Error> {
let mut stats = CacheHashDataStats::default(); let mut stats = CacheHashDataStats::default();
let result = self.save_internal(file_name, data, &mut stats); let result = self.save_internal(file_name, data, &mut stats);
self.stats.lock().unwrap().accumulate(&stats); self.stats.lock().unwrap().accumulate(&stats);
@ -297,16 +300,14 @@ impl CacheHashData {
fn save_internal( fn save_internal(
&self, &self,
file_name: &Path, file_name: impl AsRef<Path>,
data: &SavedTypeSlice, data: &SavedTypeSlice,
stats: &mut CacheHashDataStats, stats: &mut CacheHashDataStats,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
let mut m = Measure::start("save"); let mut m = Measure::start("save");
let cache_path = self.cache_folder.join(file_name); let cache_path = self.cache_folder.join(file_name);
let create = true; // overwrite any existing file at this path
if create { let _ignored = remove_file(&cache_path);
let _ignored = remove_file(&cache_path);
}
let cell_size = std::mem::size_of::<EntryType>() as u64; let cell_size = std::mem::size_of::<EntryType>() as u64;
let mut m1 = Measure::start("create save"); let mut m1 = Measure::start("create save");
let entries = data let entries = data
@ -390,9 +391,8 @@ pub mod tests {
} }
} }
let cache = CacheHashData::new(&tmpdir); let cache = CacheHashData::new(&tmpdir);
let file_name = "test"; let file_name = PathBuf::from("test");
let file = Path::new(file_name).to_path_buf(); cache.save(&file_name, &data_this_pass).unwrap();
cache.save(&file, &data_this_pass).unwrap();
cache.get_cache_files(); cache.get_cache_files();
assert_eq!( assert_eq!(
cache cache
@ -401,11 +401,11 @@ pub mod tests {
.unwrap() .unwrap()
.iter() .iter()
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
vec![file_name] vec![&file_name],
); );
let mut accum = (0..bins_per_pass).into_iter().map(|_| vec![]).collect(); let mut accum = (0..bins_per_pass).into_iter().map(|_| vec![]).collect();
cache cache
.load(&file, &mut accum, start_bin_this_pass, &bin_calculator) .load(&file_name, &mut accum, start_bin_this_pass, &bin_calculator)
.unwrap(); .unwrap();
if flatten_data { if flatten_data {
bin_data( bin_data(