From 23d67e4ac70d42c0e88956bb71378af89143a9ef Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" <75863576+jeffwashington@users.noreply.github.com> Date: Thu, 29 Apr 2021 09:11:28 -0500 Subject: [PATCH] stretchy roots tracker (#16830) * stretchy roots tracker * rename hash to hash_set in tests * update comment * try 2 widths in test * bool iter * add assert * helper function for bitfield insert/remove * introduce RollingBitFieldTester * another bool iter replacement * map cleanup * map to cloned --- runtime/src/accounts_index.rs | 410 ++++++++++++++++++++++++---------- 1 file changed, 297 insertions(+), 113 deletions(-) diff --git a/runtime/src/accounts_index.rs b/runtime/src/accounts_index.rs index 771bd75b8f..baa11e239c 100644 --- a/runtime/src/accounts_index.rs +++ b/runtime/src/accounts_index.rs @@ -187,6 +187,10 @@ pub struct RollingBitField { max: u64, // exclusive bits: BitVec, count: usize, + // These are items that are true and lower than min. + // They would cause us to exceed max_width if we stored them in our bit field. + // We only expect these items in conditions where there is some other bug in the system. + excess: HashSet, } // functionally similar to a hashset // Relies on there being a sliding window of key values. The key values continue to increase. @@ -202,6 +206,7 @@ impl RollingBitField { count: 0, min: 0, max: 0, + excess: HashSet::new(), } } @@ -210,52 +215,86 @@ impl RollingBitField { key % self.max_width } - fn check_range(&self, key: u64) { - assert!( - self.count == 0 - || (self.max.saturating_sub(key) <= self.max_width as u64 - && key.saturating_sub(self.min) < self.max_width as u64), - "out of range: count: {}, min: {}, max: {}, width: {}, key: {}", - self.count, - self.min, - self.max, - self.max_width, - key - ); - } - pub fn range_width(&self) -> u64 { // note that max isn't updated on remove, so it can be above the current max self.max - self.min } pub fn insert(&mut self, key: u64) { - self.check_range(key); - let address = self.get_address(&key); - let value = self.bits.get(address); - if !value { - self.bits.set(address, true); - if self.count == 0 { - self.min = key; - self.max = key + 1; - } else { - self.min = std::cmp::min(self.min, key); - self.max = std::cmp::max(self.max, key + 1); + let bits_empty = self.count == 0 || self.count == self.excess.len(); + let update_bits = if bits_empty { + true // nothing in bits, so in range + } else if key < self.min { + // bits not empty and this insert is before min, so add to excess + if self.excess.insert(key) { + self.count += 1; + } + false + } else if key < self.max { + true // fits current bit field range + } else { + // key is >= max + let new_max = key + 1; + loop { + let new_width = new_max.saturating_sub(self.min); + if new_width <= self.max_width { + // this key will fit the max range + break; + } + + // move the min item from bits to excess and then purge from min to make room for this new max + let inserted = self.excess.insert(self.min); + assert!(inserted); + + let key = self.min; + let address = self.get_address(&key); + self.bits.set(address, false); + self.purge(&key); + } + + true // moved things to excess if necessary, so update bits with the new entry + }; + + if update_bits { + let address = self.get_address(&key); + let value = self.bits.get(address); + if !value { + self.bits.set(address, true); + if bits_empty { + self.min = key; + self.max = key + 1; + } else { + self.min = std::cmp::min(self.min, key); + self.max = std::cmp::max(self.max, key + 1); + } + self.count += 1; } - self.count += 1; } } pub fn remove(&mut self, key: &u64) -> bool { - self.check_range(*key); - let address = self.get_address(key); - let value = self.bits.get(address); - if value { - self.count -= 1; - self.bits.set(address, false); - self.purge(key); + if key >= &self.min { + // if asked to remove something bigger than max, then no-op + if key < &self.max { + let address = self.get_address(key); + let get = self.bits.get(address); + if get { + self.count -= 1; + self.bits.set(address, false); + self.purge(key); + } + get + } else { + false + } + } else { + // asked to remove something < min. would be in excess if it exists + let remove = self.excess.remove(key); + if remove { + self.count -= 1; + } + remove } - value } // after removing 'key' where 'key' = min, make min the correct new min value @@ -283,10 +322,15 @@ impl RollingBitField { } pub fn contains(&self, key: &u64) -> bool { - let result = self.contains_assume_in_range(key); - // A contains call outside min and max is allowed. The answer will be false. - // Only need to do range check if we found true. - result && (key >= &self.min && key < &self.max) + if key < &self.max { + if key >= &self.min { + self.contains_assume_in_range(key) + } else { + self.excess.contains(key) + } + } else { + false + } } pub fn len(&self) -> usize { @@ -304,6 +348,7 @@ impl RollingBitField { pub fn get_all(&self) -> Vec { let mut all = Vec::with_capacity(self.count); + self.excess.iter().for_each(|slot| all.push(*slot)); for key in self.min..self.max { if self.contains_assume_in_range(&key) { all.push(key); @@ -1469,50 +1514,77 @@ pub mod tests { let _ = RollingBitField::new(0); } - fn setup_wide(width: u64, start: u64) -> (RollingBitField, HashSet) { - let mut bitfield = RollingBitField::new(width); - let mut hash = HashSet::new(); + fn setup_empty(width: u64) -> RollingBitFieldTester { + let bitfield = RollingBitField::new(width); + let hash_set = HashSet::new(); + RollingBitFieldTester { bitfield, hash_set } + } - compare(&hash, &bitfield); + struct RollingBitFieldTester { + pub bitfield: RollingBitField, + pub hash_set: HashSet, + } - let mut slot = start; - bitfield.insert(slot); - hash.insert(slot); + impl RollingBitFieldTester { + fn insert(&mut self, slot: u64) { + self.bitfield.insert(slot); + self.hash_set.insert(slot); + assert!(self.bitfield.contains(&slot)); + compare(&self.hash_set, &self.bitfield); + } + fn remove(&mut self, slot: &u64) -> bool { + let result = self.bitfield.remove(slot); + assert_eq!(result, self.hash_set.remove(slot)); + assert!(!self.bitfield.contains(&slot)); + self.compare(); + result + } + fn compare(&self) { + compare(&self.hash_set, &self.bitfield); + } + } - compare(&hash, &bitfield); + fn setup_wide(width: u64, start: u64) -> RollingBitFieldTester { + let mut tester = setup_empty(width); - slot += 1; - bitfield.insert(slot); - hash.insert(slot); - - compare(&hash, &bitfield); - (bitfield, hash) + tester.compare(); + tester.insert(start); + tester.insert(start + 1); + tester } #[test] - #[should_panic(expected = "out of range")] fn test_bitfield_insert_wide() { solana_logger::setup(); let width = 16; let start = 0; - let (mut bitfield, _hash) = setup_wide(width, start); + let mut tester = setup_wide(width, start); let slot = start + width; - // assert here -- higher than max range - bitfield.insert(slot); + let all = tester.bitfield.get_all(); + // higher than max range by 1 + tester.insert(slot); + let bitfield = tester.bitfield; + for slot in all { + assert!(bitfield.contains(&slot)); + } + assert_eq!(bitfield.excess.len(), 1); + assert_eq!(bitfield.count, 3); } #[test] - #[should_panic(expected = "out of range")] fn test_bitfield_insert_wide_before() { solana_logger::setup(); let width = 16; let start = 100; - let (mut bitfield, _hash) = setup_wide(width, start); + let mut bitfield = setup_wide(width, start).bitfield; let slot = start + 1 - width; // assert here - would make min too low, causing too wide of a range bitfield.insert(slot); + assert_eq!(1, bitfield.excess.len()); + assert_eq!(3, bitfield.count); + assert!(bitfield.contains(&slot)); } #[test] @@ -1520,11 +1592,13 @@ pub mod tests { solana_logger::setup(); let width = 16; let start = 100; - let (mut bitfield, _hash) = setup_wide(width, start); + let mut bitfield = setup_wide(width, start).bitfield; - let slot = start + 2 - width; // this item would make our width exactly equal to what is allowed + let slot = start + 2 - width; // this item would make our width exactly equal to what is allowed, but it is also inserting prior to min bitfield.insert(slot); + assert_eq!(1, bitfield.excess.len()); assert!(bitfield.contains(&slot)); + assert_eq!(3, bitfield.count); } #[test] @@ -1532,7 +1606,7 @@ pub mod tests { { let width = 16; let start = 0; - let (bitfield, _hash) = setup_wide(width, start); + let bitfield = setup_wide(width, start).bitfield; let mut slot = width; assert!(!bitfield.contains(&slot)); @@ -1542,7 +1616,7 @@ pub mod tests { { let width = 16; let start = 100; - let (bitfield, _hash) = setup_wide(width, start); + let bitfield = setup_wide(width, start).bitfield; // too large let mut slot = width; @@ -1556,46 +1630,165 @@ pub mod tests { } #[test] - #[should_panic(expected = "out of range")] fn test_bitfield_remove_wide() { let width = 16; let start = 0; - let (mut bitfield, _hash) = setup_wide(width, start); + let mut tester = setup_wide(width, start); let slot = width; - assert!(!bitfield.remove(&slot)); + assert!(!tester.remove(&slot)); + } + + #[test] + fn test_bitfield_excess2() { + solana_logger::setup(); + let width = 16; + let mut tester = setup_empty(width); + let slot = 100; + // insert 1st slot + tester.insert(slot); + assert!(tester.bitfield.excess.is_empty()); + + // insert a slot before the previous one. this is 'excess' since we don't use this pattern in normal operation + let slot2 = slot - 1; + tester.insert(slot2); + assert_eq!(tester.bitfield.excess.len(), 1); + + // remove the 1st slot. we will be left with only excess + tester.remove(&slot); + assert!(tester.bitfield.contains(&slot2)); + assert_eq!(tester.bitfield.excess.len(), 1); + + // re-insert at valid range, making sure we don't insert into excess + tester.insert(slot); + assert_eq!(tester.bitfield.excess.len(), 1); + + // remove the excess slot. + tester.remove(&slot2); + assert!(tester.bitfield.contains(&slot)); + assert!(tester.bitfield.excess.is_empty()); + + // re-insert the excess slot + tester.insert(slot2); + assert_eq!(tester.bitfield.excess.len(), 1); + } + + #[test] + fn test_bitfield_excess() { + solana_logger::setup(); + // start at slot 0 or a separate, higher slot + for width in [16, 4194304].iter() { + let width = *width; + let mut tester = setup_empty(width); + for start in [0, width * 5].iter().cloned() { + // recreate means create empty bitfield with each iteration, otherwise re-use + for recreate in [false, true].iter().cloned() { + let max = start + 3; + // first root to add + for slot in start..max { + // subsequent roots to add + for slot2 in (slot + 1)..max { + // reverse_slots = 1 means add slots in reverse order (max to min). This causes us to add second and later slots to excess. + for reverse_slots in [false, true].iter().cloned() { + let maybe_reverse = |slot| { + if reverse_slots { + max - slot + } else { + slot + } + }; + if recreate { + let recreated = setup_empty(width); + tester = recreated; + } + + // insert + for slot in slot..=slot2 { + let slot_use = maybe_reverse(slot); + tester.insert(slot_use); + debug!( + "slot: {}, bitfield: {:?}, reverse: {}, len: {}, excess: {:?}", + slot_use, + tester.bitfield, + reverse_slots, + tester.bitfield.len(), + tester.bitfield.excess + ); + assert!( + (reverse_slots && tester.bitfield.len() > 1) + ^ tester.bitfield.excess.is_empty() + ); + } + if start > width * 2 { + assert!(!tester.bitfield.contains(&(start - width * 2))); + } + assert!(!tester.bitfield.contains(&(start + width * 2))); + let len = (slot2 - slot + 1) as usize; + assert_eq!(tester.bitfield.len(), len); + assert_eq!(tester.bitfield.count, len); + + // remove + for slot in slot..=slot2 { + let slot_use = maybe_reverse(slot); + assert!(tester.remove(&slot_use)); + assert!( + (reverse_slots && !tester.bitfield.is_empty()) + ^ tester.bitfield.excess.is_empty() + ); + } + assert!(tester.bitfield.is_empty()); + assert_eq!(tester.bitfield.count, 0); + if start > width * 2 { + assert!(!tester.bitfield.contains(&(start - width * 2))); + } + assert!(!tester.bitfield.contains(&(start + width * 2))); + } + } + } + } + } + } } #[test] - #[should_panic(expected = "out of range")] fn test_bitfield_remove_wide_before() { let width = 16; let start = 100; - let (mut bitfield, _hash) = setup_wide(width, start); + let mut tester = setup_wide(width, start); let slot = start + 1 - width; - assert!(!bitfield.remove(&slot)); + assert!(!tester.remove(&slot)); } fn compare(hashset: &HashSet, bitfield: &RollingBitField) { assert_eq!(hashset.len(), bitfield.len()); assert_eq!(hashset.is_empty(), bitfield.is_empty()); - let mut min = Slot::MAX; - let mut max = Slot::MIN; - for item in bitfield.get_all() { - min = std::cmp::min(min, item); - max = std::cmp::max(max, item); - assert!(hashset.contains(&item)); - } - assert!( - bitfield.range_width() - >= if bitfield.is_empty() { + if !bitfield.is_empty() { + let mut min = Slot::MAX; + let mut max = Slot::MIN; + for item in bitfield.get_all() { + assert!(hashset.contains(&item)); + if !bitfield.excess.contains(&item) { + min = std::cmp::min(min, item); + max = std::cmp::max(max, item); + } + } + assert_eq!(bitfield.get_all().len(), hashset.len()); + // range isn't tracked for excess items + if bitfield.excess.len() != bitfield.len() { + let width = if bitfield.is_empty() { 0 } else { max + 1 - min - }, - "hashset: {:?}, bitfield: {:?}", - hashset, - bitfield.get_all() - ); + }; + assert!( + bitfield.range_width() >= width, + "hashset: {:?}, bitfield: {:?}, bitfield.range_width: {}, width: {}", + hashset, + bitfield.get_all(), + bitfield.range_width(), + width, + ); + } + } } #[test] @@ -1613,61 +1806,52 @@ pub mod tests { 2 }; for width in 0..width_iteration_max { - let mut bitfield = RollingBitField::new(max_bitfield_width); - let mut hash = HashSet::new(); + let mut tester = setup_empty(max_bitfield_width); let min = 101_000; let dead = 19; - compare(&hash, &bitfield); - let mut slot = min; - while hash.len() < width { + while tester.hash_set.len() < width { slot += 1; if max_bitfield_width > 2 && slot % dead == 0 { // with max_bitfield_width of 1 and 2, there is no room for dead slots continue; } - hash.insert(slot); - bitfield.insert(slot); + tester.insert(slot); } let max = slot + 1; - compare(&hash, &bitfield); - for slot in (min - 10)..max + 100 { - assert_eq!(bitfield.contains(&slot), hash.contains(&slot)); + assert_eq!( + tester.bitfield.contains(&slot), + tester.hash_set.contains(&slot) + ); } if width > 0 { - hash.remove(&slot); - assert!(bitfield.remove(&slot)); - assert!(!bitfield.remove(&slot)); + assert!(tester.remove(&slot)); + assert!(!tester.remove(&slot)); } - compare(&hash, &bitfield); - let all = bitfield.get_all(); + let all = tester.bitfield.get_all(); // remove the rest, including a call that removes slot again for item in all.iter() { - assert!(hash.remove(&item)); - assert!(!hash.remove(&item)); - assert!(bitfield.remove(&item)); - assert!(!bitfield.remove(&item)); - compare(&hash, &bitfield); + assert!(tester.remove(&item)); + assert!(!tester.remove(&item)); } let min = max + ((width * 2) as u64) + 3; let slot = min; // several widths past previous min let max = slot + 1; - hash.insert(slot); - bitfield.insert(slot); - compare(&hash, &bitfield); - - assert!(hash.contains(&slot)); + tester.insert(slot); for slot in (min - 10)..max + 100 { - assert_eq!(bitfield.contains(&slot), hash.contains(&slot)); + assert_eq!( + tester.bitfield.contains(&slot), + tester.hash_set.contains(&slot) + ); } } } @@ -1776,18 +1960,18 @@ pub mod tests { for width in 0..34 { let mut bitfield = RollingBitField::new(4096); - let mut hash = HashSet::new(); + let mut hash_set = HashSet::new(); let min = 1_010_000; let dead = 19; let mut slot = min; - while hash.len() < width { + while hash_set.len() < width { slot += 1; if slot % dead == 0 { continue; } - hash.insert(slot); + hash_set.insert(slot); bitfield.insert(slot); } @@ -1796,7 +1980,7 @@ pub mod tests { let mut time = Measure::start(""); let mut count = 0; for slot in (min - 10)..max + 100 { - if hash.contains(&slot) { + if hash_set.contains(&slot) { count += 1; } }