diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index d2a260cd99..d6b6ae1b39 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -2483,12 +2483,13 @@ impl AccountsDb { /// Given the input `ShrinkCandidates`, this function sorts the stores by their alive ratio /// in increasing order with the most sparse entries in the front. It will then simulate the /// shrinking by working on the most sparse entries first and if the overall alive ratio is - /// achieved, it will stop and return the filtered-down candidates. + /// achieved, it will stop and return the filtered-down candidates and the candidates which + /// are skipped in this round and might be eligible for the future shrink. fn select_candidates_by_total_usage( &self, shrink_slots: &ShrinkCandidates, shrink_ratio: f64, - ) -> ShrinkCandidates { + ) -> (ShrinkCandidates, ShrinkCandidates) { struct StoreUsageInfo { slot: Slot, alive_ratio: f64, @@ -2499,6 +2500,7 @@ impl AccountsDb { let mut total_alive_bytes: u64 = 0; let mut candidates_count: usize = 0; let mut total_bytes: u64 = 0; + let mut total_candidate_stores: usize = 0; for (slot, slot_shrink_candidates) in shrink_slots { candidates_count += slot_shrink_candidates.len(); for store in slot_shrink_candidates.values() { @@ -2511,6 +2513,7 @@ impl AccountsDb { alive_ratio, store: store.clone(), }); + total_candidate_stores += 1; } } store_usage.sort_by(|a, b| { @@ -2522,8 +2525,12 @@ impl AccountsDb { // Working from the beginning of store_usage which are the most sparse and see when we can stop // shrinking while still achieving the overall goals. let mut shrink_slots: ShrinkCandidates = HashMap::new(); + let mut shrink_slots_next_batch: ShrinkCandidates = HashMap::new(); for usage in &store_usage { + let store = &usage.store; let alive_ratio = (total_alive_bytes as f64) / (total_bytes as f64); + debug!("alive_ratio: {:?} store_id: {:?}, store_ratio: {:?} requirment: {:?}, total_bytes: {:?} total_alive_bytes: {:?}", + alive_ratio, usage.store.append_vec_id(), usage.alive_ratio, shrink_ratio, total_bytes, total_alive_bytes); if alive_ratio > shrink_ratio { // we have reached our goal, stop debug!( @@ -2531,35 +2538,52 @@ impl AccountsDb { total_bytes: {:?}, alive_ratio: {:}, shrink_ratio: {:?}", usage.slot, total_alive_bytes, total_bytes, alive_ratio, shrink_ratio ); - break; + if usage.alive_ratio < shrink_ratio { + shrink_slots_next_batch + .entry(usage.slot) + .or_default() + .insert(store.append_vec_id(), store.clone()); + } else { + break; + } + } else { + let current_store_size = store.total_bytes(); + let after_shrink_size = Self::page_align(store.alive_bytes() as u64); + let bytes_saved = current_store_size.saturating_sub(after_shrink_size); + total_bytes -= bytes_saved; + shrink_slots + .entry(usage.slot) + .or_default() + .insert(store.append_vec_id(), store.clone()); } - let store = &usage.store; - let current_store_size = store.total_bytes(); - let after_shrink_size = Self::page_align(store.alive_bytes() as u64); - let bytes_saved = current_store_size.saturating_sub(after_shrink_size); - total_bytes -= bytes_saved; - shrink_slots - .entry(usage.slot) - .or_default() - .insert(store.append_vec_id(), store.clone()); } measure.stop(); inc_new_counter_info!( - "select_top_sparse_storage_entries-ms", + "shrink_select_top_sparse_storage_entries-ms", measure.as_ms() as usize ); - inc_new_counter_info!("select_top_sparse_storage_entries-seeds", candidates_count); - shrink_slots + inc_new_counter_info!( + "shrink_select_top_sparse_storage_entries-seeds", + candidates_count + ); + inc_new_counter_info!( + "shrink_total_preliminary_candidate_stores", + total_candidate_stores + ); + + (shrink_slots, shrink_slots_next_batch) } pub fn shrink_candidate_slots(&self) -> usize { let shrink_candidates_slots = std::mem::take(&mut *self.shrink_candidate_slots.lock().unwrap()); - let shrink_slots = { + let (shrink_slots, shrink_slots_next_batch) = { if let AccountShrinkThreshold::TotalSpace { shrink_ratio } = self.shrink_ratio { - self.select_candidates_by_total_usage(&shrink_candidates_slots, shrink_ratio) + let (shrink_slots, shrink_slots_next_batch) = + self.select_candidates_by_total_usage(&shrink_candidates_slots, shrink_ratio); + (shrink_slots, Some(shrink_slots_next_batch)) } else { - shrink_candidates_slots + (shrink_candidates_slots, None) } }; @@ -2579,6 +2603,16 @@ impl AccountsDb { measure_shrink_all_candidates.as_ms() as usize ); inc_new_counter_info!("shrink_all_candidate_slots-count", shrink_candidates_count); + let mut pended_counts: usize = 0; + if let Some(shrink_slots_next_batch) = shrink_slots_next_batch { + let mut shrink_slots = self.shrink_candidate_slots.lock().unwrap(); + for (slot, stores) in shrink_slots_next_batch { + pended_counts += stores.len(); + shrink_slots.entry(slot).or_default().extend(stores); + } + } + inc_new_counter_info!("shrink_pended_stores-count", pended_counts); + num_candidates } @@ -4516,6 +4550,8 @@ impl AccountsDb { let mut max_slot = 0; let mut newest_slot = 0; let mut oldest_slot = std::u64::MAX; + let mut total_bytes = 0; + let mut total_alive_bytes = 0; for iter_item in self.storage.0.iter() { let slot = iter_item.key(); let slot_stores = iter_item.value().read().unwrap(); @@ -4536,9 +4572,21 @@ impl AccountsDb { if *slot < oldest_slot { oldest_slot = *slot; } + + for store in slot_stores.values() { + total_alive_bytes += Self::page_align(store.alive_bytes() as u64); + total_bytes += store.total_bytes(); + } } info!("total_stores: {}, newest_slot: {}, oldest_slot: {}, max_slot: {} (num={}), min_slot: {} (num={})", total_count, newest_slot, oldest_slot, max_slot, max, min_slot, min); + + let total_alive_ratio = if total_bytes > 0 { + total_alive_bytes as f64 / total_bytes as f64 + } else { + 0. + }; + datapoint_info!( "accounts_db-stores", ("total_count", total_count, i64), @@ -4547,6 +4595,9 @@ impl AccountsDb { self.recycle_stores.read().unwrap().entry_count() as u64, i64 ), + ("total_bytes", total_bytes, i64), + ("total_alive_bytes", total_alive_bytes, i64), + ("total_alive_ratio", total_alive_ratio, f64), ); datapoint_info!( "accounts_db-perf-stats", @@ -9916,87 +9967,229 @@ pub mod tests { } #[test] - fn test_select_candidates_by_total_usage() { + fn test_select_candidates_by_total_usage_no_candidates() { + // no input candidates -- none should be selected solana_logger::setup(); - - // case 1: no candidates let accounts = AccountsDb::new_single_for_tests(); + let candidates: ShrinkCandidates = HashMap::new(); + let (selected_candidates, next_candidates) = + accounts.select_candidates_by_total_usage(&candidates, DEFAULT_ACCOUNTS_SHRINK_RATIO); + + assert_eq!(0, selected_candidates.len()); + assert_eq!(0, next_candidates.len()); + } + + #[test] + fn test_select_candidates_by_total_usage_3_way_split_condition() { + // three candidates, one selected for shrink, one is put back to the candidate list and one is ignored + solana_logger::setup(); + let accounts = AccountsDb::new_single_for_tests(); let mut candidates: ShrinkCandidates = HashMap::new(); - let output_candidates = - accounts.select_candidates_by_total_usage(&candidates, DEFAULT_ACCOUNTS_SHRINK_RATIO); - assert_eq!(0, output_candidates.len()); + let common_store_path = Path::new(""); + let common_slot_id = 12; + let store_file_size = 2 * PAGE_SIZE; - // case 2: two candidates, only one selected - let dummy_path = Path::new(""); - let dummy_slot = 12; - let dummy_size = 2 * PAGE_SIZE; - - let dummy_id1 = 22; - let entry1 = Arc::new(AccountStorageEntry::new( - dummy_path, dummy_slot, dummy_id1, dummy_size, + let store1_id = 22; + let store1 = Arc::new(AccountStorageEntry::new( + common_store_path, + common_slot_id, + store1_id, + store_file_size, )); - entry1.alive_bytes.store(8000, Ordering::Relaxed); + store1.alive_bytes.store(0, Ordering::Relaxed); candidates - .entry(dummy_slot) + .entry(common_slot_id) .or_default() - .insert(entry1.append_vec_id(), entry1.clone()); + .insert(store1.append_vec_id(), store1.clone()); - let dummy_id2 = 44; - let entry2 = Arc::new(AccountStorageEntry::new( - dummy_path, dummy_slot, dummy_id2, dummy_size, + let store2_id = 44; + let store2 = Arc::new(AccountStorageEntry::new( + common_store_path, + common_slot_id, + store2_id, + store_file_size, )); - entry2.alive_bytes.store(3000, Ordering::Relaxed); + + // The store2's alive_ratio is 0.5: as its page aligned alive size is 1 page. + let store2_alive_bytes = (PAGE_SIZE - 1) as usize; + store2 + .alive_bytes + .store(store2_alive_bytes, Ordering::Relaxed); candidates - .entry(dummy_slot) + .entry(common_slot_id) .or_default() - .insert(entry2.append_vec_id(), entry2.clone()); + .insert(store2.append_vec_id(), store2.clone()); - let output_candidates = - accounts.select_candidates_by_total_usage(&candidates, DEFAULT_ACCOUNTS_SHRINK_RATIO); - assert_eq!(1, output_candidates.len()); - assert_eq!(1, output_candidates[&dummy_slot].len()); - assert!(output_candidates[&dummy_slot].contains(&entry2.append_vec_id())); - - // case 3: two candidates, both are selected - candidates.clear(); - let dummy_size = 4 * PAGE_SIZE; - let dummy_id1 = 22; - let entry1 = Arc::new(AccountStorageEntry::new( - dummy_path, dummy_slot, dummy_id1, dummy_size, + let store3_id = 55; + let entry3 = Arc::new(AccountStorageEntry::new( + common_store_path, + common_slot_id, + store3_id, + store_file_size, )); - entry1.alive_bytes.store(3500, Ordering::Relaxed); + + // The store3's alive ratio is 1.0 as its page-aligned alive size is 2 pages + let store3_alive_bytes = (PAGE_SIZE + 1) as usize; + entry3 + .alive_bytes + .store(store3_alive_bytes, Ordering::Relaxed); candidates - .entry(dummy_slot) + .entry(common_slot_id) .or_default() - .insert(entry1.append_vec_id(), entry1.clone()); + .insert(entry3.append_vec_id(), entry3.clone()); - let dummy_id2 = 44; - let dummy_slot2 = 44; - let entry2 = Arc::new(AccountStorageEntry::new( - dummy_path, - dummy_slot2, - dummy_id2, - dummy_size, + // Set the target alive ratio to 0.6 so that we can just get rid of store1, the remaining two stores + // alive ratio can be > the target ratio: the actual ratio is 0.75 because of 3 alive pages / 4 total pages. + // The target ratio is also set to larger than store2's alive ratio: 0.5 so that it would be added + // to the candidates list for next round. + let target_alive_ratio = 0.6; + let (selected_candidates, next_candidates) = + accounts.select_candidates_by_total_usage(&candidates, target_alive_ratio); + assert_eq!(1, selected_candidates.len()); + assert_eq!(1, selected_candidates[&common_slot_id].len()); + assert!(selected_candidates[&common_slot_id].contains(&store1.append_vec_id())); + assert_eq!(1, next_candidates.len()); + assert!(next_candidates[&common_slot_id].contains(&store2.append_vec_id())); + } + + #[test] + fn test_select_candidates_by_total_usage_2_way_split_condition() { + // three candidates, 2 are selected for shrink, one is ignored + solana_logger::setup(); + let accounts = AccountsDb::new_single_for_tests(); + let mut candidates: ShrinkCandidates = HashMap::new(); + + let common_store_path = Path::new(""); + let common_slot_id = 12; + let store_file_size = 2 * PAGE_SIZE; + + let store1_id = 22; + let store1 = Arc::new(AccountStorageEntry::new( + common_store_path, + common_slot_id, + store1_id, + store_file_size, )); - entry2.alive_bytes.store(3000, Ordering::Relaxed); + store1.alive_bytes.store(0, Ordering::Relaxed); candidates - .entry(dummy_slot2) + .entry(common_slot_id) .or_default() - .insert(entry2.append_vec_id(), entry2.clone()); + .insert(store1.append_vec_id(), store1.clone()); - let output_candidates = - accounts.select_candidates_by_total_usage(&candidates, DEFAULT_ACCOUNTS_SHRINK_RATIO); - assert_eq!(2, output_candidates.len()); - assert_eq!(1, output_candidates[&dummy_slot].len()); - assert_eq!(1, output_candidates[&dummy_slot2].len()); + let store2_id = 44; + let store2 = Arc::new(AccountStorageEntry::new( + common_store_path, + common_slot_id, + store2_id, + store_file_size, + )); - assert!(output_candidates[&dummy_slot].contains(&entry1.append_vec_id())); - assert!(output_candidates[&dummy_slot2].contains(&entry2.append_vec_id())); + // The store2's alive_ratio is 0.5: as its page aligned alive size is 1 page. + let store2_alive_bytes = (PAGE_SIZE - 1) as usize; + store2 + .alive_bytes + .store(store2_alive_bytes, Ordering::Relaxed); + candidates + .entry(common_slot_id) + .or_default() + .insert(store2.append_vec_id(), store2.clone()); + + let store3_id = 55; + let entry3 = Arc::new(AccountStorageEntry::new( + common_store_path, + common_slot_id, + store3_id, + store_file_size, + )); + + // The store3's alive ratio is 1.0 as its page-aligned alive size is 2 pages + let store3_alive_bytes = (PAGE_SIZE + 1) as usize; + entry3 + .alive_bytes + .store(store3_alive_bytes, Ordering::Relaxed); + + candidates + .entry(common_slot_id) + .or_default() + .insert(entry3.append_vec_id(), entry3.clone()); + + // Set the target ratio to default (0.8), both store1 and store2 must be selected and store3 is ignored. + let target_alive_ratio = DEFAULT_ACCOUNTS_SHRINK_RATIO; + let (selected_candidates, next_candidates) = + accounts.select_candidates_by_total_usage(&candidates, target_alive_ratio); + assert_eq!(1, selected_candidates.len()); + assert_eq!(2, selected_candidates[&common_slot_id].len()); + assert!(selected_candidates[&common_slot_id].contains(&store1.append_vec_id())); + assert!(selected_candidates[&common_slot_id].contains(&store2.append_vec_id())); + assert_eq!(0, next_candidates.len()); + } + + #[test] + fn test_select_candidates_by_total_usage_all_clean() { + // 2 candidates, they must be selected to achieve the target alive ratio + solana_logger::setup(); + let accounts = AccountsDb::new_single_for_tests(); + let mut candidates: ShrinkCandidates = HashMap::new(); + + let slot1 = 12; + let common_store_path = Path::new(""); + + let store_file_size = 4 * PAGE_SIZE; + let store1_id = 22; + let store1 = Arc::new(AccountStorageEntry::new( + common_store_path, + slot1, + store1_id, + store_file_size, + )); + + // store1 has 1 page-aligned alive bytes, its alive ratio is 1/4: 0.25 + let store1_alive_bytes = (PAGE_SIZE - 1) as usize; + store1 + .alive_bytes + .store(store1_alive_bytes, Ordering::Relaxed); + + candidates + .entry(slot1) + .or_default() + .insert(store1.append_vec_id(), store1.clone()); + + let store2_id = 44; + let slot2 = 44; + let store2 = Arc::new(AccountStorageEntry::new( + common_store_path, + slot2, + store2_id, + store_file_size, + )); + + // store2 has 2 page-aligned bytes, its alive ratio is 2/4: 0.5 + let store2_alive_bytes = (PAGE_SIZE + 1) as usize; + store2 + .alive_bytes + .store(store2_alive_bytes, Ordering::Relaxed); + + candidates + .entry(slot2) + .or_default() + .insert(store2.append_vec_id(), store2.clone()); + + // Set the target ratio to default (0.8), both stores from the two different slots must be selected. + let target_alive_ratio = DEFAULT_ACCOUNTS_SHRINK_RATIO; + let (selected_candidates, next_candidates) = + accounts.select_candidates_by_total_usage(&candidates, target_alive_ratio); + assert_eq!(2, selected_candidates.len()); + assert_eq!(1, selected_candidates[&slot1].len()); + assert_eq!(1, selected_candidates[&slot2].len()); + + assert!(selected_candidates[&slot1].contains(&store1.append_vec_id())); + assert!(selected_candidates[&slot2].contains(&store2.append_vec_id())); + assert_eq!(0, next_candidates.len()); } #[test] @@ -11501,18 +11694,24 @@ pub mod tests { fn test_recycle_stores_expiration() { solana_logger::setup(); - let dummy_path = Path::new(""); - let dummy_slot = 12; - let dummy_size = 1000; + let common_store_path = Path::new(""); + let common_slot_id = 12; + let store_file_size = 1000; - let dummy_id1 = 22; + let store1_id = 22; let entry1 = Arc::new(AccountStorageEntry::new( - dummy_path, dummy_slot, dummy_id1, dummy_size, + common_store_path, + common_slot_id, + store1_id, + store_file_size, )); - let dummy_id2 = 44; + let store2_id = 44; let entry2 = Arc::new(AccountStorageEntry::new( - dummy_path, dummy_slot, dummy_id2, dummy_size, + common_store_path, + common_slot_id, + store2_id, + store_file_size, )); let mut recycle_stores = RecycleStores::default(); @@ -11534,10 +11733,10 @@ pub mod tests { .iter() .map(|(_, e)| e.append_vec_id()) .collect::>(), - vec![dummy_id1, dummy_id2] + vec![store1_id, store2_id] ); assert_eq!(recycle_stores.entry_count(), 2); - assert_eq!(recycle_stores.total_bytes(), dummy_size * 2); + assert_eq!(recycle_stores.total_bytes(), store_file_size * 2); // expiration for only too old entries recycle_stores.entries[0].0 = @@ -11548,17 +11747,17 @@ pub mod tests { .iter() .map(|e| e.append_vec_id()) .collect::>(), - vec![dummy_id1] + vec![store1_id] ); assert_eq!( recycle_stores .iter() .map(|(_, e)| e.append_vec_id()) .collect::>(), - vec![dummy_id2] + vec![store2_id] ); assert_eq!(recycle_stores.entry_count(), 1); - assert_eq!(recycle_stores.total_bytes(), dummy_size); + assert_eq!(recycle_stores.total_bytes(), store_file_size); } const RACY_SLEEP_MS: u64 = 10; @@ -12122,9 +12321,14 @@ pub mod tests { solana_logger::setup(); let mut accounts = AccountsDb::new_single_for_tests(); - let dummy_path = Path::new(""); - let dummy_size = 2 * PAGE_SIZE; - let entry = Arc::new(AccountStorageEntry::new(dummy_path, 0, 1, dummy_size)); + let common_store_path = Path::new(""); + let store_file_size = 2 * PAGE_SIZE; + let entry = Arc::new(AccountStorageEntry::new( + common_store_path, + 0, + 1, + store_file_size, + )); match accounts.shrink_ratio { AccountShrinkThreshold::TotalSpace { shrink_ratio } => { assert_eq!(