AcctIdx: resize in-mem after startup for disk index (#21676)

This commit is contained in:
Jeff Washington (jwash) 2021-12-08 16:52:22 -06:00 committed by GitHub
parent 824994db69
commit 181c0092d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 46 additions and 1 deletions

View File

@ -905,6 +905,11 @@ impl<T: IndexValue> AccountsIndex<T> {
AccountsIndexIterator::new(self, range, collect_all_unsorted)
}
/// is the accounts index using disk as a backing store
pub fn is_disk_index_enabled(&self) -> bool {
self.storage.storage.is_disk_index_enabled()
}
fn do_checked_scan_accounts<F, R>(
&self,
metric_name: &'static str,

View File

@ -102,11 +102,18 @@ impl<T: IndexValue> AccountsIndexStorage<T> {
}
self.storage.set_startup(value);
if !value {
// transitioning from startup to !startup (ie. steady state)
// shutdown the bg threads
*self.startup_worker_threads.lock().unwrap() = None;
// maybe shrink hashmaps
self.shrink_to_fit();
}
}
fn shrink_to_fit(&self) {
self.in_mem.iter().for_each(|mem| mem.shrink_to_fit())
}
fn num_threads() -> usize {
std::cmp::max(2, num_cpus::get() / 4)
}

View File

@ -57,6 +57,11 @@ impl<T: IndexValue> Debug for BucketMapHolder<T> {
#[allow(clippy::mutex_atomic)]
impl<T: IndexValue> BucketMapHolder<T> {
/// is the accounts index using disk as a backing store
pub fn is_disk_index_enabled(&self) -> bool {
self.disk.is_some()
}
pub fn increment_age(&self) {
// since we are about to change age, there are now 0 buckets that have been flushed at this age
// this should happen before the age.fetch_add
@ -352,6 +357,7 @@ pub mod tests {
solana_logger::setup();
let bins = 100;
let test = BucketMapHolder::<u64>::new(bins, &Some(AccountsIndexConfig::default()), 1);
assert!(!test.is_disk_index_enabled());
let bins = test.bins as u64;
let interval_ms = test.age_interval_ms();
// 90% of time elapsed, all but 1 bins flushed, should not wait since we'll end up right on time
@ -376,6 +382,17 @@ pub mod tests {
assert_eq!(result, None);
}
#[test]
fn test_disk_index_enabled() {
let bins = 1;
let config = AccountsIndexConfig {
index_limit_mb: Some(0),
..AccountsIndexConfig::default()
};
let test = BucketMapHolder::<u64>::new(bins, &Some(config), 1);
assert!(test.is_disk_index_enabled());
}
#[test]
fn test_age_time() {
solana_logger::setup();

View File

@ -28,13 +28,15 @@ type K = Pubkey;
type CacheRangesHeld = RwLock<Vec<Option<RangeInclusive<Pubkey>>>>;
pub type SlotT<T> = (Slot, T);
type InMemMap<T> = HashMap<Pubkey, AccountMapEntry<T>>;
#[allow(dead_code)] // temporary during staging
// one instance of this represents one bin of the accounts index.
pub struct InMemAccountsIndex<T: IndexValue> {
last_age_flushed: AtomicU8,
// backing store
map_internal: RwLock<HashMap<Pubkey, AccountMapEntry<T>>>,
map_internal: RwLock<InMemMap<T>>,
storage: Arc<BucketMapHolder<T>>,
bin: usize,
@ -104,6 +106,16 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
&self.map_internal
}
/// Release entire in-mem hashmap to free all memory associated with it.
/// Idea is that during startup we needed a larger map than we need during runtime.
/// When using disk-buckets, in-mem index grows over time with dynamic use and then shrinks, in theory back to 0.
pub fn shrink_to_fit(&self) {
// shrink_to_fit could be quite expensive on large map sizes, which 'no disk buckets' could produce, so avoid shrinking in case we end up here
if self.storage.is_disk_index_enabled() {
self.map_internal.write().unwrap().shrink_to_fit();
}
}
pub fn items<R>(&self, range: &Option<&R>) -> Vec<(K, AccountMapEntry<T>)>
where
R: RangeBounds<Pubkey> + std::fmt::Debug,
@ -898,6 +910,10 @@ impl<T: IndexValue> InMemAccountsIndex<T> {
occupied.remove();
}
}
if map.is_empty() {
map.shrink_to_fit();
}
drop(map);
self.stats()
.insert_or_delete_mem_count(false, self.bin, removed);
Self::update_stat(&self.stats().flush_entries_removed_from_mem, removed as u64);