Shrink mmap leakage (#19373)

* Putting stores satisfying future shrink into the candidate list.

* Fixed unit tests for select_shrink_candidates

* Added metrics on stores readded back for next round

* Ehance the metrics on accountsdb to report the total bytes and alive bytes and alive ratio

* Enhance select_shrink_candidates metrics
This commit is contained in:
Lijun Wang 2021-08-31 00:55:16 -07:00 committed by GitHub
parent c21bf29ce7
commit 09458cc802
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 292 additions and 88 deletions

View File

@ -2483,12 +2483,13 @@ impl AccountsDb {
/// Given the input `ShrinkCandidates`, this function sorts the stores by their alive ratio /// Given the input `ShrinkCandidates`, this function sorts the stores by their alive ratio
/// in increasing order with the most sparse entries in the front. It will then simulate the /// in increasing order with the most sparse entries in the front. It will then simulate the
/// shrinking by working on the most sparse entries first and if the overall alive ratio is /// shrinking by working on the most sparse entries first and if the overall alive ratio is
/// achieved, it will stop and return the filtered-down candidates. /// achieved, it will stop and return the filtered-down candidates and the candidates which
/// are skipped in this round and might be eligible for the future shrink.
fn select_candidates_by_total_usage( fn select_candidates_by_total_usage(
&self, &self,
shrink_slots: &ShrinkCandidates, shrink_slots: &ShrinkCandidates,
shrink_ratio: f64, shrink_ratio: f64,
) -> ShrinkCandidates { ) -> (ShrinkCandidates, ShrinkCandidates) {
struct StoreUsageInfo { struct StoreUsageInfo {
slot: Slot, slot: Slot,
alive_ratio: f64, alive_ratio: f64,
@ -2499,6 +2500,7 @@ impl AccountsDb {
let mut total_alive_bytes: u64 = 0; let mut total_alive_bytes: u64 = 0;
let mut candidates_count: usize = 0; let mut candidates_count: usize = 0;
let mut total_bytes: u64 = 0; let mut total_bytes: u64 = 0;
let mut total_candidate_stores: usize = 0;
for (slot, slot_shrink_candidates) in shrink_slots { for (slot, slot_shrink_candidates) in shrink_slots {
candidates_count += slot_shrink_candidates.len(); candidates_count += slot_shrink_candidates.len();
for store in slot_shrink_candidates.values() { for store in slot_shrink_candidates.values() {
@ -2511,6 +2513,7 @@ impl AccountsDb {
alive_ratio, alive_ratio,
store: store.clone(), store: store.clone(),
}); });
total_candidate_stores += 1;
} }
} }
store_usage.sort_by(|a, b| { store_usage.sort_by(|a, b| {
@ -2522,8 +2525,12 @@ impl AccountsDb {
// Working from the beginning of store_usage which are the most sparse and see when we can stop // Working from the beginning of store_usage which are the most sparse and see when we can stop
// shrinking while still achieving the overall goals. // shrinking while still achieving the overall goals.
let mut shrink_slots: ShrinkCandidates = HashMap::new(); let mut shrink_slots: ShrinkCandidates = HashMap::new();
let mut shrink_slots_next_batch: ShrinkCandidates = HashMap::new();
for usage in &store_usage { for usage in &store_usage {
let store = &usage.store;
let alive_ratio = (total_alive_bytes as f64) / (total_bytes as f64); let alive_ratio = (total_alive_bytes as f64) / (total_bytes as f64);
debug!("alive_ratio: {:?} store_id: {:?}, store_ratio: {:?} requirment: {:?}, total_bytes: {:?} total_alive_bytes: {:?}",
alive_ratio, usage.store.append_vec_id(), usage.alive_ratio, shrink_ratio, total_bytes, total_alive_bytes);
if alive_ratio > shrink_ratio { if alive_ratio > shrink_ratio {
// we have reached our goal, stop // we have reached our goal, stop
debug!( debug!(
@ -2531,35 +2538,52 @@ impl AccountsDb {
total_bytes: {:?}, alive_ratio: {:}, shrink_ratio: {:?}", total_bytes: {:?}, alive_ratio: {:}, shrink_ratio: {:?}",
usage.slot, total_alive_bytes, total_bytes, alive_ratio, shrink_ratio usage.slot, total_alive_bytes, total_bytes, alive_ratio, shrink_ratio
); );
break; if usage.alive_ratio < shrink_ratio {
shrink_slots_next_batch
.entry(usage.slot)
.or_default()
.insert(store.append_vec_id(), store.clone());
} else {
break;
}
} else {
let current_store_size = store.total_bytes();
let after_shrink_size = Self::page_align(store.alive_bytes() as u64);
let bytes_saved = current_store_size.saturating_sub(after_shrink_size);
total_bytes -= bytes_saved;
shrink_slots
.entry(usage.slot)
.or_default()
.insert(store.append_vec_id(), store.clone());
} }
let store = &usage.store;
let current_store_size = store.total_bytes();
let after_shrink_size = Self::page_align(store.alive_bytes() as u64);
let bytes_saved = current_store_size.saturating_sub(after_shrink_size);
total_bytes -= bytes_saved;
shrink_slots
.entry(usage.slot)
.or_default()
.insert(store.append_vec_id(), store.clone());
} }
measure.stop(); measure.stop();
inc_new_counter_info!( inc_new_counter_info!(
"select_top_sparse_storage_entries-ms", "shrink_select_top_sparse_storage_entries-ms",
measure.as_ms() as usize measure.as_ms() as usize
); );
inc_new_counter_info!("select_top_sparse_storage_entries-seeds", candidates_count); inc_new_counter_info!(
shrink_slots "shrink_select_top_sparse_storage_entries-seeds",
candidates_count
);
inc_new_counter_info!(
"shrink_total_preliminary_candidate_stores",
total_candidate_stores
);
(shrink_slots, shrink_slots_next_batch)
} }
pub fn shrink_candidate_slots(&self) -> usize { pub fn shrink_candidate_slots(&self) -> usize {
let shrink_candidates_slots = let shrink_candidates_slots =
std::mem::take(&mut *self.shrink_candidate_slots.lock().unwrap()); std::mem::take(&mut *self.shrink_candidate_slots.lock().unwrap());
let shrink_slots = { let (shrink_slots, shrink_slots_next_batch) = {
if let AccountShrinkThreshold::TotalSpace { shrink_ratio } = self.shrink_ratio { if let AccountShrinkThreshold::TotalSpace { shrink_ratio } = self.shrink_ratio {
self.select_candidates_by_total_usage(&shrink_candidates_slots, shrink_ratio) let (shrink_slots, shrink_slots_next_batch) =
self.select_candidates_by_total_usage(&shrink_candidates_slots, shrink_ratio);
(shrink_slots, Some(shrink_slots_next_batch))
} else { } else {
shrink_candidates_slots (shrink_candidates_slots, None)
} }
}; };
@ -2579,6 +2603,16 @@ impl AccountsDb {
measure_shrink_all_candidates.as_ms() as usize measure_shrink_all_candidates.as_ms() as usize
); );
inc_new_counter_info!("shrink_all_candidate_slots-count", shrink_candidates_count); inc_new_counter_info!("shrink_all_candidate_slots-count", shrink_candidates_count);
let mut pended_counts: usize = 0;
if let Some(shrink_slots_next_batch) = shrink_slots_next_batch {
let mut shrink_slots = self.shrink_candidate_slots.lock().unwrap();
for (slot, stores) in shrink_slots_next_batch {
pended_counts += stores.len();
shrink_slots.entry(slot).or_default().extend(stores);
}
}
inc_new_counter_info!("shrink_pended_stores-count", pended_counts);
num_candidates num_candidates
} }
@ -4516,6 +4550,8 @@ impl AccountsDb {
let mut max_slot = 0; let mut max_slot = 0;
let mut newest_slot = 0; let mut newest_slot = 0;
let mut oldest_slot = std::u64::MAX; let mut oldest_slot = std::u64::MAX;
let mut total_bytes = 0;
let mut total_alive_bytes = 0;
for iter_item in self.storage.0.iter() { for iter_item in self.storage.0.iter() {
let slot = iter_item.key(); let slot = iter_item.key();
let slot_stores = iter_item.value().read().unwrap(); let slot_stores = iter_item.value().read().unwrap();
@ -4536,9 +4572,21 @@ impl AccountsDb {
if *slot < oldest_slot { if *slot < oldest_slot {
oldest_slot = *slot; oldest_slot = *slot;
} }
for store in slot_stores.values() {
total_alive_bytes += Self::page_align(store.alive_bytes() as u64);
total_bytes += store.total_bytes();
}
} }
info!("total_stores: {}, newest_slot: {}, oldest_slot: {}, max_slot: {} (num={}), min_slot: {} (num={})", info!("total_stores: {}, newest_slot: {}, oldest_slot: {}, max_slot: {} (num={}), min_slot: {} (num={})",
total_count, newest_slot, oldest_slot, max_slot, max, min_slot, min); total_count, newest_slot, oldest_slot, max_slot, max, min_slot, min);
let total_alive_ratio = if total_bytes > 0 {
total_alive_bytes as f64 / total_bytes as f64
} else {
0.
};
datapoint_info!( datapoint_info!(
"accounts_db-stores", "accounts_db-stores",
("total_count", total_count, i64), ("total_count", total_count, i64),
@ -4547,6 +4595,9 @@ impl AccountsDb {
self.recycle_stores.read().unwrap().entry_count() as u64, self.recycle_stores.read().unwrap().entry_count() as u64,
i64 i64
), ),
("total_bytes", total_bytes, i64),
("total_alive_bytes", total_alive_bytes, i64),
("total_alive_ratio", total_alive_ratio, f64),
); );
datapoint_info!( datapoint_info!(
"accounts_db-perf-stats", "accounts_db-perf-stats",
@ -9916,87 +9967,229 @@ pub mod tests {
} }
#[test] #[test]
fn test_select_candidates_by_total_usage() { fn test_select_candidates_by_total_usage_no_candidates() {
// no input candidates -- none should be selected
solana_logger::setup(); solana_logger::setup();
// case 1: no candidates
let accounts = AccountsDb::new_single_for_tests(); let accounts = AccountsDb::new_single_for_tests();
let candidates: ShrinkCandidates = HashMap::new();
let (selected_candidates, next_candidates) =
accounts.select_candidates_by_total_usage(&candidates, DEFAULT_ACCOUNTS_SHRINK_RATIO);
assert_eq!(0, selected_candidates.len());
assert_eq!(0, next_candidates.len());
}
#[test]
fn test_select_candidates_by_total_usage_3_way_split_condition() {
// three candidates, one selected for shrink, one is put back to the candidate list and one is ignored
solana_logger::setup();
let accounts = AccountsDb::new_single_for_tests();
let mut candidates: ShrinkCandidates = HashMap::new(); let mut candidates: ShrinkCandidates = HashMap::new();
let output_candidates =
accounts.select_candidates_by_total_usage(&candidates, DEFAULT_ACCOUNTS_SHRINK_RATIO);
assert_eq!(0, output_candidates.len()); let common_store_path = Path::new("");
let common_slot_id = 12;
let store_file_size = 2 * PAGE_SIZE;
// case 2: two candidates, only one selected let store1_id = 22;
let dummy_path = Path::new(""); let store1 = Arc::new(AccountStorageEntry::new(
let dummy_slot = 12; common_store_path,
let dummy_size = 2 * PAGE_SIZE; common_slot_id,
store1_id,
let dummy_id1 = 22; store_file_size,
let entry1 = Arc::new(AccountStorageEntry::new(
dummy_path, dummy_slot, dummy_id1, dummy_size,
)); ));
entry1.alive_bytes.store(8000, Ordering::Relaxed); store1.alive_bytes.store(0, Ordering::Relaxed);
candidates candidates
.entry(dummy_slot) .entry(common_slot_id)
.or_default() .or_default()
.insert(entry1.append_vec_id(), entry1.clone()); .insert(store1.append_vec_id(), store1.clone());
let dummy_id2 = 44; let store2_id = 44;
let entry2 = Arc::new(AccountStorageEntry::new( let store2 = Arc::new(AccountStorageEntry::new(
dummy_path, dummy_slot, dummy_id2, dummy_size, common_store_path,
common_slot_id,
store2_id,
store_file_size,
)); ));
entry2.alive_bytes.store(3000, Ordering::Relaxed);
// The store2's alive_ratio is 0.5: as its page aligned alive size is 1 page.
let store2_alive_bytes = (PAGE_SIZE - 1) as usize;
store2
.alive_bytes
.store(store2_alive_bytes, Ordering::Relaxed);
candidates candidates
.entry(dummy_slot) .entry(common_slot_id)
.or_default() .or_default()
.insert(entry2.append_vec_id(), entry2.clone()); .insert(store2.append_vec_id(), store2.clone());
let output_candidates = let store3_id = 55;
accounts.select_candidates_by_total_usage(&candidates, DEFAULT_ACCOUNTS_SHRINK_RATIO); let entry3 = Arc::new(AccountStorageEntry::new(
assert_eq!(1, output_candidates.len()); common_store_path,
assert_eq!(1, output_candidates[&dummy_slot].len()); common_slot_id,
assert!(output_candidates[&dummy_slot].contains(&entry2.append_vec_id())); store3_id,
store_file_size,
// case 3: two candidates, both are selected
candidates.clear();
let dummy_size = 4 * PAGE_SIZE;
let dummy_id1 = 22;
let entry1 = Arc::new(AccountStorageEntry::new(
dummy_path, dummy_slot, dummy_id1, dummy_size,
)); ));
entry1.alive_bytes.store(3500, Ordering::Relaxed);
// The store3's alive ratio is 1.0 as its page-aligned alive size is 2 pages
let store3_alive_bytes = (PAGE_SIZE + 1) as usize;
entry3
.alive_bytes
.store(store3_alive_bytes, Ordering::Relaxed);
candidates candidates
.entry(dummy_slot) .entry(common_slot_id)
.or_default() .or_default()
.insert(entry1.append_vec_id(), entry1.clone()); .insert(entry3.append_vec_id(), entry3.clone());
let dummy_id2 = 44; // Set the target alive ratio to 0.6 so that we can just get rid of store1, the remaining two stores
let dummy_slot2 = 44; // alive ratio can be > the target ratio: the actual ratio is 0.75 because of 3 alive pages / 4 total pages.
let entry2 = Arc::new(AccountStorageEntry::new( // The target ratio is also set to larger than store2's alive ratio: 0.5 so that it would be added
dummy_path, // to the candidates list for next round.
dummy_slot2, let target_alive_ratio = 0.6;
dummy_id2, let (selected_candidates, next_candidates) =
dummy_size, accounts.select_candidates_by_total_usage(&candidates, target_alive_ratio);
assert_eq!(1, selected_candidates.len());
assert_eq!(1, selected_candidates[&common_slot_id].len());
assert!(selected_candidates[&common_slot_id].contains(&store1.append_vec_id()));
assert_eq!(1, next_candidates.len());
assert!(next_candidates[&common_slot_id].contains(&store2.append_vec_id()));
}
#[test]
fn test_select_candidates_by_total_usage_2_way_split_condition() {
// three candidates, 2 are selected for shrink, one is ignored
solana_logger::setup();
let accounts = AccountsDb::new_single_for_tests();
let mut candidates: ShrinkCandidates = HashMap::new();
let common_store_path = Path::new("");
let common_slot_id = 12;
let store_file_size = 2 * PAGE_SIZE;
let store1_id = 22;
let store1 = Arc::new(AccountStorageEntry::new(
common_store_path,
common_slot_id,
store1_id,
store_file_size,
)); ));
entry2.alive_bytes.store(3000, Ordering::Relaxed); store1.alive_bytes.store(0, Ordering::Relaxed);
candidates candidates
.entry(dummy_slot2) .entry(common_slot_id)
.or_default() .or_default()
.insert(entry2.append_vec_id(), entry2.clone()); .insert(store1.append_vec_id(), store1.clone());
let output_candidates = let store2_id = 44;
accounts.select_candidates_by_total_usage(&candidates, DEFAULT_ACCOUNTS_SHRINK_RATIO); let store2 = Arc::new(AccountStorageEntry::new(
assert_eq!(2, output_candidates.len()); common_store_path,
assert_eq!(1, output_candidates[&dummy_slot].len()); common_slot_id,
assert_eq!(1, output_candidates[&dummy_slot2].len()); store2_id,
store_file_size,
));
assert!(output_candidates[&dummy_slot].contains(&entry1.append_vec_id())); // The store2's alive_ratio is 0.5: as its page aligned alive size is 1 page.
assert!(output_candidates[&dummy_slot2].contains(&entry2.append_vec_id())); let store2_alive_bytes = (PAGE_SIZE - 1) as usize;
store2
.alive_bytes
.store(store2_alive_bytes, Ordering::Relaxed);
candidates
.entry(common_slot_id)
.or_default()
.insert(store2.append_vec_id(), store2.clone());
let store3_id = 55;
let entry3 = Arc::new(AccountStorageEntry::new(
common_store_path,
common_slot_id,
store3_id,
store_file_size,
));
// The store3's alive ratio is 1.0 as its page-aligned alive size is 2 pages
let store3_alive_bytes = (PAGE_SIZE + 1) as usize;
entry3
.alive_bytes
.store(store3_alive_bytes, Ordering::Relaxed);
candidates
.entry(common_slot_id)
.or_default()
.insert(entry3.append_vec_id(), entry3.clone());
// Set the target ratio to default (0.8), both store1 and store2 must be selected and store3 is ignored.
let target_alive_ratio = DEFAULT_ACCOUNTS_SHRINK_RATIO;
let (selected_candidates, next_candidates) =
accounts.select_candidates_by_total_usage(&candidates, target_alive_ratio);
assert_eq!(1, selected_candidates.len());
assert_eq!(2, selected_candidates[&common_slot_id].len());
assert!(selected_candidates[&common_slot_id].contains(&store1.append_vec_id()));
assert!(selected_candidates[&common_slot_id].contains(&store2.append_vec_id()));
assert_eq!(0, next_candidates.len());
}
#[test]
fn test_select_candidates_by_total_usage_all_clean() {
// 2 candidates, they must be selected to achieve the target alive ratio
solana_logger::setup();
let accounts = AccountsDb::new_single_for_tests();
let mut candidates: ShrinkCandidates = HashMap::new();
let slot1 = 12;
let common_store_path = Path::new("");
let store_file_size = 4 * PAGE_SIZE;
let store1_id = 22;
let store1 = Arc::new(AccountStorageEntry::new(
common_store_path,
slot1,
store1_id,
store_file_size,
));
// store1 has 1 page-aligned alive bytes, its alive ratio is 1/4: 0.25
let store1_alive_bytes = (PAGE_SIZE - 1) as usize;
store1
.alive_bytes
.store(store1_alive_bytes, Ordering::Relaxed);
candidates
.entry(slot1)
.or_default()
.insert(store1.append_vec_id(), store1.clone());
let store2_id = 44;
let slot2 = 44;
let store2 = Arc::new(AccountStorageEntry::new(
common_store_path,
slot2,
store2_id,
store_file_size,
));
// store2 has 2 page-aligned bytes, its alive ratio is 2/4: 0.5
let store2_alive_bytes = (PAGE_SIZE + 1) as usize;
store2
.alive_bytes
.store(store2_alive_bytes, Ordering::Relaxed);
candidates
.entry(slot2)
.or_default()
.insert(store2.append_vec_id(), store2.clone());
// Set the target ratio to default (0.8), both stores from the two different slots must be selected.
let target_alive_ratio = DEFAULT_ACCOUNTS_SHRINK_RATIO;
let (selected_candidates, next_candidates) =
accounts.select_candidates_by_total_usage(&candidates, target_alive_ratio);
assert_eq!(2, selected_candidates.len());
assert_eq!(1, selected_candidates[&slot1].len());
assert_eq!(1, selected_candidates[&slot2].len());
assert!(selected_candidates[&slot1].contains(&store1.append_vec_id()));
assert!(selected_candidates[&slot2].contains(&store2.append_vec_id()));
assert_eq!(0, next_candidates.len());
} }
#[test] #[test]
@ -11501,18 +11694,24 @@ pub mod tests {
fn test_recycle_stores_expiration() { fn test_recycle_stores_expiration() {
solana_logger::setup(); solana_logger::setup();
let dummy_path = Path::new(""); let common_store_path = Path::new("");
let dummy_slot = 12; let common_slot_id = 12;
let dummy_size = 1000; let store_file_size = 1000;
let dummy_id1 = 22; let store1_id = 22;
let entry1 = Arc::new(AccountStorageEntry::new( let entry1 = Arc::new(AccountStorageEntry::new(
dummy_path, dummy_slot, dummy_id1, dummy_size, common_store_path,
common_slot_id,
store1_id,
store_file_size,
)); ));
let dummy_id2 = 44; let store2_id = 44;
let entry2 = Arc::new(AccountStorageEntry::new( let entry2 = Arc::new(AccountStorageEntry::new(
dummy_path, dummy_slot, dummy_id2, dummy_size, common_store_path,
common_slot_id,
store2_id,
store_file_size,
)); ));
let mut recycle_stores = RecycleStores::default(); let mut recycle_stores = RecycleStores::default();
@ -11534,10 +11733,10 @@ pub mod tests {
.iter() .iter()
.map(|(_, e)| e.append_vec_id()) .map(|(_, e)| e.append_vec_id())
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
vec![dummy_id1, dummy_id2] vec![store1_id, store2_id]
); );
assert_eq!(recycle_stores.entry_count(), 2); assert_eq!(recycle_stores.entry_count(), 2);
assert_eq!(recycle_stores.total_bytes(), dummy_size * 2); assert_eq!(recycle_stores.total_bytes(), store_file_size * 2);
// expiration for only too old entries // expiration for only too old entries
recycle_stores.entries[0].0 = recycle_stores.entries[0].0 =
@ -11548,17 +11747,17 @@ pub mod tests {
.iter() .iter()
.map(|e| e.append_vec_id()) .map(|e| e.append_vec_id())
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
vec![dummy_id1] vec![store1_id]
); );
assert_eq!( assert_eq!(
recycle_stores recycle_stores
.iter() .iter()
.map(|(_, e)| e.append_vec_id()) .map(|(_, e)| e.append_vec_id())
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
vec![dummy_id2] vec![store2_id]
); );
assert_eq!(recycle_stores.entry_count(), 1); assert_eq!(recycle_stores.entry_count(), 1);
assert_eq!(recycle_stores.total_bytes(), dummy_size); assert_eq!(recycle_stores.total_bytes(), store_file_size);
} }
const RACY_SLEEP_MS: u64 = 10; const RACY_SLEEP_MS: u64 = 10;
@ -12122,9 +12321,14 @@ pub mod tests {
solana_logger::setup(); solana_logger::setup();
let mut accounts = AccountsDb::new_single_for_tests(); let mut accounts = AccountsDb::new_single_for_tests();
let dummy_path = Path::new(""); let common_store_path = Path::new("");
let dummy_size = 2 * PAGE_SIZE; let store_file_size = 2 * PAGE_SIZE;
let entry = Arc::new(AccountStorageEntry::new(dummy_path, 0, 1, dummy_size)); let entry = Arc::new(AccountStorageEntry::new(
common_store_path,
0,
1,
store_file_size,
));
match accounts.shrink_ratio { match accounts.shrink_ratio {
AccountShrinkThreshold::TotalSpace { shrink_ratio } => { AccountShrinkThreshold::TotalSpace { shrink_ratio } => {
assert_eq!( assert_eq!(