From 9cd3bb0c4de929d636ba6350141ee948d829fd12 Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" Date: Tue, 10 Jan 2023 14:01:37 -0600 Subject: [PATCH] never try to shrink a slot that is in the write cache (#29615) --- runtime/src/accounts_cache.rs | 4 ++++ runtime/src/accounts_db.rs | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/runtime/src/accounts_cache.rs b/runtime/src/accounts_cache.rs index d7ecb6d42d..51fd8286ed 100644 --- a/runtime/src/accounts_cache.rs +++ b/runtime/src/accounts_cache.rs @@ -320,6 +320,10 @@ impl AccountsCache { slots } + pub fn contains(&self, slot: Slot) -> bool { + self.cache.contains_key(&slot) + } + pub fn num_slots(&self) -> usize { self.cache.len() } diff --git a/runtime/src/accounts_db.rs b/runtime/src/accounts_db.rs index 65984c6013..d60022dc0d 100644 --- a/runtime/src/accounts_db.rs +++ b/runtime/src/accounts_db.rs @@ -3818,6 +3818,20 @@ impl AccountsDb { } fn do_shrink_slot_store(&self, slot: Slot, store: &Arc) { + if self.accounts_cache.contains(slot) { + // It is not correct to shrink a slot while it it is in the write cache until flush is complete and the slot is removed from the write cache. + // There can exist a window after a slot is made a root and before the write cache flushing for that slot begins and then completes. + // There can also exist a window after a slot is being flushed from the write cache until the index is updated and the slot is removed from the write cache. + // During the second window, once an append vec has been created for the slot, it could be possible to try to shrink that slot. + // Shrink no-ops before this function if there is no store for the slot (notice this function requires 'store' to be passed). + // So, if we enter this function but the slot is still in the write cache, reasonable behavior is to skip shrinking this slot. + // Flush will ONLY write alive accounts to the append vec, which is what shrink does anyway. + // Flush then adds the slot to 'uncleaned_roots', which causes clean to take a look at the slot. + // Clean causes us to mark accounts as dead, which causes shrink to later take a look at the slot. + // This could be an assert, but it could lead to intermittency in tests. + // It is 'correct' to ignore calls to shrink when a slot is still in the write cache. + return; + } let mut stored_accounts = Vec::default(); debug!("do_shrink_slot_store: slot: {}", slot); let shrink_collect = self.shrink_collect(store, &mut stored_accounts, &self.shrink_stats);