Cleans up CacheHashData (#29267)

This commit is contained in:
Brooks Prumo 2022-12-15 14:16:27 -05:00 committed by GitHub
parent 78a04ed432
commit 1e0a0e0ced
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 35 additions and 36 deletions

View File

@ -7169,7 +7169,7 @@ impl AccountsDb {
hash
);
if load_from_cache {
if let Ok(mapped_file) = cache_hash_data.load_map(&Path::new(&file_name)) {
if let Ok(mapped_file) = cache_hash_data.load_map(&file_name) {
return Some(mapped_file);
}
}
@ -7211,8 +7211,7 @@ impl AccountsDb {
assert!(!file_name.is_empty());
(!r.is_empty() && r.iter().any(|b| !b.is_empty())).then(|| {
// error if we can't write this
let file_name = Path::new(&file_name);
cache_hash_data.save(Path::new(&file_name), &r).unwrap();
cache_hash_data.save(&file_name, &r).unwrap();
cache_hash_data.load_map(&file_name).unwrap()
})
})
@ -9500,7 +9499,7 @@ pub mod tests {
let temp_dir = TempDir::new().unwrap();
let accounts_hash_cache_path = temp_dir.path();
self.scan_snapshot_stores_with_cache(
&CacheHashData::new(&accounts_hash_cache_path),
&CacheHashData::new(accounts_hash_cache_path),
storage,
stats,
bins,
@ -10389,7 +10388,7 @@ pub mod tests {
};
let result = accounts_db.scan_account_storage_no_bank(
&CacheHashData::new(&accounts_hash_cache_path),
&CacheHashData::new(accounts_hash_cache_path),
&CalcAccountsHashConfig::default(),
&get_storage_refs(&storages),
test_scan,

View File

@ -112,7 +112,7 @@ impl CacheHashDataFile {
}
}
fn new_map(file: &Path, capacity: u64) -> Result<MmapMut, std::io::Error> {
fn new_map(file: impl AsRef<Path>, capacity: u64) -> Result<MmapMut, std::io::Error> {
let mut data = OpenOptions::new()
.read(true)
.write(true)
@ -129,7 +129,7 @@ impl CacheHashDataFile {
Ok(unsafe { MmapMut::map_mut(&data).unwrap() })
}
fn load_map(file: &Path) -> Result<MmapMut, std::io::Error> {
fn load_map(file: impl AsRef<Path>) -> Result<MmapMut, std::io::Error> {
let data = OpenOptions::new()
.read(true)
.write(true)
@ -140,7 +140,7 @@ impl CacheHashDataFile {
}
}
pub type PreExistingCacheFiles = HashSet<String>;
pub type PreExistingCacheFiles = HashSet<PathBuf>;
pub struct CacheHashData {
cache_folder: PathBuf,
pre_existing_cache_files: Arc<Mutex<PreExistingCacheFiles>>,
@ -155,11 +155,11 @@ impl Drop for CacheHashData {
}
impl CacheHashData {
pub fn new<P: AsRef<Path> + std::fmt::Debug>(parent_folder: &P) -> CacheHashData {
pub fn new(parent_folder: impl AsRef<Path>) -> CacheHashData {
let cache_folder = Self::get_cache_root_path(parent_folder);
std::fs::create_dir_all(cache_folder.clone())
.unwrap_or_else(|_| panic!("error creating cache dir: {cache_folder:?}"));
std::fs::create_dir_all(&cache_folder)
.unwrap_or_else(|_| panic!("error creating cache dir: {}", cache_folder.display()));
let result = CacheHashData {
cache_folder,
@ -182,12 +182,12 @@ impl CacheHashData {
}
fn get_cache_files(&self) {
if self.cache_folder.is_dir() {
let dir = fs::read_dir(self.cache_folder.clone());
let dir = fs::read_dir(&self.cache_folder);
if let Ok(dir) = dir {
let mut pre_existing = self.pre_existing_cache_files.lock().unwrap();
for entry in dir.flatten() {
if let Some(name) = entry.path().file_name() {
pre_existing.insert(name.to_str().unwrap().to_string());
pre_existing.insert(PathBuf::from(name));
}
}
self.stats.lock().unwrap().cache_file_count += pre_existing.len();
@ -195,15 +195,15 @@ impl CacheHashData {
}
}
fn get_cache_root_path<P: AsRef<Path>>(parent_folder: &P) -> PathBuf {
fn get_cache_root_path(parent_folder: impl AsRef<Path>) -> PathBuf {
parent_folder.as_ref().join("calculate_accounts_hash_cache")
}
#[cfg(test)]
/// load from 'file_name' into 'accumulator'
pub(crate) fn load<P: AsRef<Path> + std::fmt::Debug>(
pub(crate) fn load(
&self,
file_name: &P,
file_name: impl AsRef<Path>,
accumulator: &mut SavedType,
start_bin_index: usize,
bin_calculator: &PubkeyBinCalculator24,
@ -218,9 +218,9 @@ impl CacheHashData {
}
/// map 'file_name' into memory
pub(crate) fn load_map<P: AsRef<Path> + std::fmt::Debug>(
pub(crate) fn load_map(
&self,
file_name: &P,
file_name: impl AsRef<Path>,
) -> Result<CacheHashDataFile, std::io::Error> {
let mut stats = CacheHashDataStats::default();
let result = self.map(file_name, &mut stats);
@ -229,13 +229,13 @@ impl CacheHashData {
}
/// create and return a MappedCacheFile for a cache file path
fn map<P: AsRef<Path> + std::fmt::Debug>(
fn map(
&self,
file_name: &P,
file_name: impl AsRef<Path>,
stats: &mut CacheHashDataStats,
) -> Result<CacheHashDataFile, std::io::Error> {
let path = self.cache_folder.join(file_name);
let file_len = std::fs::metadata(path.clone())?.len();
let path = self.cache_folder.join(&file_name);
let file_len = std::fs::metadata(&path)?.len();
let mut m1 = Measure::start("read_file");
let mmap = CacheHashDataFile::load_map(&path)?;
m1.stop();
@ -269,17 +269,16 @@ impl CacheHashData {
cache_file.capacity = capacity;
assert_eq!(
capacity, file_len,
"expected: {capacity}, len on disk: {file_len} {path:?}, entries: {entries}, cell_size: {cell_size}"
"expected: {capacity}, len on disk: {file_len} {}, entries: {entries}, cell_size: {cell_size}", path.display(),
);
stats.total_entries = entries;
stats.cache_file_size += capacity as usize;
let file_name_lookup = file_name.as_ref().to_str().unwrap().to_string();
self.pre_existing_cache_files
.lock()
.unwrap()
.remove(&file_name_lookup);
.remove(file_name.as_ref());
stats.loaded_from_cache += 1;
stats.entries_loaded_from_cache += entries;
@ -288,7 +287,11 @@ impl CacheHashData {
}
/// save 'data' to 'file_name'
pub fn save(&self, file_name: &Path, data: &SavedTypeSlice) -> Result<(), std::io::Error> {
pub fn save(
&self,
file_name: impl AsRef<Path>,
data: &SavedTypeSlice,
) -> Result<(), std::io::Error> {
let mut stats = CacheHashDataStats::default();
let result = self.save_internal(file_name, data, &mut stats);
self.stats.lock().unwrap().accumulate(&stats);
@ -297,16 +300,14 @@ impl CacheHashData {
fn save_internal(
&self,
file_name: &Path,
file_name: impl AsRef<Path>,
data: &SavedTypeSlice,
stats: &mut CacheHashDataStats,
) -> Result<(), std::io::Error> {
let mut m = Measure::start("save");
let cache_path = self.cache_folder.join(file_name);
let create = true;
if create {
let _ignored = remove_file(&cache_path);
}
// overwrite any existing file at this path
let _ignored = remove_file(&cache_path);
let cell_size = std::mem::size_of::<EntryType>() as u64;
let mut m1 = Measure::start("create save");
let entries = data
@ -390,9 +391,8 @@ pub mod tests {
}
}
let cache = CacheHashData::new(&tmpdir);
let file_name = "test";
let file = Path::new(file_name).to_path_buf();
cache.save(&file, &data_this_pass).unwrap();
let file_name = PathBuf::from("test");
cache.save(&file_name, &data_this_pass).unwrap();
cache.get_cache_files();
assert_eq!(
cache
@ -401,11 +401,11 @@ pub mod tests {
.unwrap()
.iter()
.collect::<Vec<_>>(),
vec![file_name]
vec![&file_name],
);
let mut accum = (0..bins_per_pass).into_iter().map(|_| vec![]).collect();
cache
.load(&file, &mut accum, start_bin_this_pass, &bin_calculator)
.load(&file_name, &mut accum, start_bin_this_pass, &bin_calculator)
.unwrap();
if flatten_data {
bin_data(