Refactors accounts hash cache (#29625)

This commit is contained in:
Brooks 2023-01-12 10:43:50 -05:00 committed by GitHub
parent 3412928cad
commit d0aa93de21
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 42 additions and 32 deletions

View File

@ -46,7 +46,9 @@ use {
AbsRequestHandlers, AbsRequestSender, AccountsBackgroundService, AbsRequestHandlers, AbsRequestSender, AccountsBackgroundService,
PrunedBanksRequestHandler, SnapshotRequestHandler, PrunedBanksRequestHandler, SnapshotRequestHandler,
}, },
accounts_db::{AccountsDbConfig, CalcAccountsHashDataSource, FillerAccountsConfig}, accounts_db::{
AccountsDb, AccountsDbConfig, CalcAccountsHashDataSource, FillerAccountsConfig,
},
accounts_index::{AccountsIndexConfig, IndexLimitMb, ScanConfig}, accounts_index::{AccountsIndexConfig, IndexLimitMb, ScanConfig},
accounts_update_notifier_interface::AccountsUpdateNotifier, accounts_update_notifier_interface::AccountsUpdateNotifier,
bank::{Bank, RewardCalculationEvent, TotalAccountsStats}, bank::{Bank, RewardCalculationEvent, TotalAccountsStats},
@ -2719,7 +2721,9 @@ fn main() {
let accounts_db_config = Some(AccountsDbConfig { let accounts_db_config = Some(AccountsDbConfig {
index: Some(accounts_index_config), index: Some(accounts_index_config),
accounts_hash_cache_path: Some(ledger_path.clone()), accounts_hash_cache_path: Some(
ledger_path.join(AccountsDb::ACCOUNTS_HASH_CACHE_DIR),
),
filler_accounts_config, filler_accounts_config,
ancient_append_vec_offset: value_t!( ancient_append_vec_offset: value_t!(
matches, matches,

View File

@ -2205,6 +2205,8 @@ impl<'a> AppendVecScan for ScanState<'a> {
} }
impl AccountsDb { impl AccountsDb {
pub const ACCOUNTS_HASH_CACHE_DIR: &str = "calculate_accounts_hash_cache";
pub fn default_for_tests() -> Self { pub fn default_for_tests() -> Self {
Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None) Self::default_with_accounts_index(AccountInfoAccountsIndex::default_for_tests(), None)
} }
@ -7256,22 +7258,30 @@ impl AccountsDb {
); );
} }
/// normal code path returns the common cache path
/// when called after a failure has been detected, redirect the cache storage to a separate folder for debugging later
fn get_cache_hash_data( fn get_cache_hash_data(
&self, &self,
config: &CalcAccountsHashConfig<'_>, config: &CalcAccountsHashConfig<'_>,
slot: Slot, slot: Slot,
) -> CacheHashData {
Self::_get_cache_hash_data(self.accounts_hash_cache_path.clone(), config, slot)
}
/// normal code path returns the common cache path
/// when called after a failure has been detected, redirect the cache storage to a separate folder for debugging later
fn _get_cache_hash_data(
accounts_hash_cache_path: PathBuf,
config: &CalcAccountsHashConfig<'_>,
slot: Slot,
) -> CacheHashData { ) -> CacheHashData {
if !config.store_detailed_debug_info_on_failure { if !config.store_detailed_debug_info_on_failure {
CacheHashData::new(&self.accounts_hash_cache_path) CacheHashData::new(accounts_hash_cache_path)
} else { } else {
// this path executes when we are failing with a hash mismatch // this path executes when we are failing with a hash mismatch
let mut new = self.accounts_hash_cache_path.clone(); let failed_dir = accounts_hash_cache_path
new.push("failed_calculate_accounts_hash_cache"); .join("failed_calculate_accounts_hash_cache")
new.push(slot.to_string()); .join(slot.to_string());
let _ = std::fs::remove_dir_all(&new); let _ = std::fs::remove_dir_all(&failed_dir);
CacheHashData::new(&new) CacheHashData::new(failed_dir)
} }
} }
@ -9226,7 +9236,7 @@ pub mod tests {
check_hash: bool, check_hash: bool,
) -> Result<Vec<CacheHashDataFile>, BankHashVerificationError> { ) -> Result<Vec<CacheHashDataFile>, BankHashVerificationError> {
let temp_dir = TempDir::new().unwrap(); let temp_dir = TempDir::new().unwrap();
let accounts_hash_cache_path = temp_dir.path(); let accounts_hash_cache_path = temp_dir.path().to_path_buf();
self.scan_snapshot_stores_with_cache( self.scan_snapshot_stores_with_cache(
&CacheHashData::new(accounts_hash_cache_path), &CacheHashData::new(accounts_hash_cache_path),
storage, storage,
@ -10085,7 +10095,7 @@ pub mod tests {
let calls = Arc::new(AtomicU64::new(0)); let calls = Arc::new(AtomicU64::new(0));
let temp_dir = TempDir::new().unwrap(); let temp_dir = TempDir::new().unwrap();
let accounts_hash_cache_path = temp_dir.path(); let accounts_hash_cache_path = temp_dir.path().to_path_buf();
let accounts_db = AccountsDb::new_single_for_tests(); let accounts_db = AccountsDb::new_single_for_tests();
let test_scan = TestScan { let test_scan = TestScan {

View File

@ -142,7 +142,7 @@ impl CacheHashDataFile {
pub type PreExistingCacheFiles = HashSet<PathBuf>; pub type PreExistingCacheFiles = HashSet<PathBuf>;
pub struct CacheHashData { pub struct CacheHashData {
cache_folder: PathBuf, cache_dir: PathBuf,
pre_existing_cache_files: Arc<Mutex<PreExistingCacheFiles>>, pre_existing_cache_files: Arc<Mutex<PreExistingCacheFiles>>,
pub stats: Arc<Mutex<CacheHashDataStats>>, pub stats: Arc<Mutex<CacheHashDataStats>>,
} }
@ -155,14 +155,13 @@ impl Drop for CacheHashData {
} }
impl CacheHashData { impl CacheHashData {
pub fn new(parent_folder: impl AsRef<Path>) -> CacheHashData { pub fn new(cache_dir: PathBuf) -> CacheHashData {
let cache_folder = Self::get_cache_root_path(parent_folder); std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| {
panic!("error creating cache dir {}: {err}", cache_dir.display())
std::fs::create_dir_all(&cache_folder) });
.unwrap_or_else(|_| panic!("error creating cache dir: {}", cache_folder.display()));
let result = CacheHashData { let result = CacheHashData {
cache_folder, cache_dir,
pre_existing_cache_files: Arc::new(Mutex::new(PreExistingCacheFiles::default())), pre_existing_cache_files: Arc::new(Mutex::new(PreExistingCacheFiles::default())),
stats: Arc::new(Mutex::new(CacheHashDataStats::default())), stats: Arc::new(Mutex::new(CacheHashDataStats::default())),
}; };
@ -175,14 +174,14 @@ impl CacheHashData {
if !pre_existing_cache_files.is_empty() { if !pre_existing_cache_files.is_empty() {
self.stats.lock().unwrap().unused_cache_files += pre_existing_cache_files.len(); self.stats.lock().unwrap().unused_cache_files += pre_existing_cache_files.len();
for file_name in pre_existing_cache_files.iter() { for file_name in pre_existing_cache_files.iter() {
let result = self.cache_folder.join(file_name); let result = self.cache_dir.join(file_name);
let _ = fs::remove_file(result); let _ = fs::remove_file(result);
} }
} }
} }
fn get_cache_files(&self) { fn get_cache_files(&self) {
if self.cache_folder.is_dir() { if self.cache_dir.is_dir() {
let dir = fs::read_dir(&self.cache_folder); let dir = fs::read_dir(&self.cache_dir);
if let Ok(dir) = dir { if let Ok(dir) = dir {
let mut pre_existing = self.pre_existing_cache_files.lock().unwrap(); let mut pre_existing = self.pre_existing_cache_files.lock().unwrap();
for entry in dir.flatten() { for entry in dir.flatten() {
@ -195,10 +194,6 @@ impl CacheHashData {
} }
} }
fn get_cache_root_path(parent_folder: impl AsRef<Path>) -> PathBuf {
parent_folder.as_ref().join("calculate_accounts_hash_cache")
}
#[cfg(test)] #[cfg(test)]
/// load from 'file_name' into 'accumulator' /// load from 'file_name' into 'accumulator'
pub(crate) fn load( pub(crate) fn load(
@ -234,7 +229,7 @@ impl CacheHashData {
file_name: impl AsRef<Path>, file_name: impl AsRef<Path>,
stats: &mut CacheHashDataStats, stats: &mut CacheHashDataStats,
) -> Result<CacheHashDataFile, std::io::Error> { ) -> Result<CacheHashDataFile, std::io::Error> {
let path = self.cache_folder.join(&file_name); let path = self.cache_dir.join(&file_name);
let file_len = std::fs::metadata(&path)?.len(); let file_len = std::fs::metadata(&path)?.len();
let mut m1 = Measure::start("read_file"); let mut m1 = Measure::start("read_file");
let mmap = CacheHashDataFile::load_map(&path)?; let mmap = CacheHashDataFile::load_map(&path)?;
@ -305,7 +300,7 @@ impl CacheHashData {
stats: &mut CacheHashDataStats, stats: &mut CacheHashDataStats,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
let mut m = Measure::start("save"); let mut m = Measure::start("save");
let cache_path = self.cache_folder.join(file_name); let cache_path = self.cache_dir.join(file_name);
// overwrite any existing file at this path // overwrite any existing file at this path
let _ignored = remove_file(&cache_path); let _ignored = remove_file(&cache_path);
let cell_size = std::mem::size_of::<EntryType>() as u64; let cell_size = std::mem::size_of::<EntryType>() as u64;
@ -363,7 +358,8 @@ pub mod tests {
// compare // compare
use tempfile::TempDir; use tempfile::TempDir;
let tmpdir = TempDir::new().unwrap(); let tmpdir = TempDir::new().unwrap();
std::fs::create_dir_all(&tmpdir).unwrap(); let cache_dir = tmpdir.path().to_path_buf();
std::fs::create_dir_all(&cache_dir).unwrap();
for bins in [1, 2, 4] { for bins in [1, 2, 4] {
let bin_calculator = PubkeyBinCalculator24::new(bins); let bin_calculator = PubkeyBinCalculator24::new(bins);
@ -390,7 +386,7 @@ pub mod tests {
data_this_pass.push(this_bin_data); data_this_pass.push(this_bin_data);
} }
} }
let cache = CacheHashData::new(&tmpdir); let cache = CacheHashData::new(cache_dir.clone());
let file_name = PathBuf::from("test"); let file_name = PathBuf::from("test");
cache.save(&file_name, &data_this_pass).unwrap(); cache.save(&file_name, &data_this_pass).unwrap();
cache.get_cache_files(); cache.get_cache_files();

View File

@ -27,7 +27,7 @@ use {
solana_rpc_client::rpc_client::RpcClient, solana_rpc_client::rpc_client::RpcClient,
solana_rpc_client_api::config::RpcLeaderScheduleConfig, solana_rpc_client_api::config::RpcLeaderScheduleConfig,
solana_runtime::{ solana_runtime::{
accounts_db::{AccountShrinkThreshold, AccountsDbConfig, FillerAccountsConfig}, accounts_db::{AccountShrinkThreshold, AccountsDb, AccountsDbConfig, FillerAccountsConfig},
accounts_index::{ accounts_index::{
AccountIndex, AccountSecondaryIndexes, AccountSecondaryIndexesIncludeExclude, AccountIndex, AccountSecondaryIndexes, AccountSecondaryIndexesIncludeExclude,
AccountsIndexConfig, IndexLimitMb, AccountsIndexConfig, IndexLimitMb,
@ -1021,7 +1021,7 @@ pub fn main() {
let accounts_db_config = AccountsDbConfig { let accounts_db_config = AccountsDbConfig {
index: Some(accounts_index_config), index: Some(accounts_index_config),
accounts_hash_cache_path: Some(ledger_path.clone()), accounts_hash_cache_path: Some(ledger_path.join(AccountsDb::ACCOUNTS_HASH_CACHE_DIR)),
filler_accounts_config, filler_accounts_config,
write_cache_limit_bytes: value_t!(matches, "accounts_db_cache_limit_mb", u64) write_cache_limit_bytes: value_t!(matches, "accounts_db_cache_limit_mb", u64)
.ok() .ok()