stretchy roots tracker (#16830)

* stretchy roots tracker

* rename hash to hash_set in tests

* update comment

* try 2 widths in test

* bool iter

* add assert

* helper function for bitfield insert/remove

* introduce RollingBitFieldTester

* another bool iter replacement

* map cleanup

* map to cloned
This commit is contained in:
Jeff Washington (jwash) 2021-04-29 09:11:28 -05:00 committed by GitHub
parent a2fbb9cfef
commit 23d67e4ac7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 297 additions and 113 deletions

View File

@ -187,6 +187,10 @@ pub struct RollingBitField {
max: u64, // exclusive
bits: BitVec,
count: usize,
// These are items that are true and lower than min.
// They would cause us to exceed max_width if we stored them in our bit field.
// We only expect these items in conditions where there is some other bug in the system.
excess: HashSet<u64>,
}
// functionally similar to a hashset
// Relies on there being a sliding window of key values. The key values continue to increase.
@ -202,6 +206,7 @@ impl RollingBitField {
count: 0,
min: 0,
max: 0,
excess: HashSet::new(),
}
}
@ -210,52 +215,86 @@ impl RollingBitField {
key % self.max_width
}
fn check_range(&self, key: u64) {
assert!(
self.count == 0
|| (self.max.saturating_sub(key) <= self.max_width as u64
&& key.saturating_sub(self.min) < self.max_width as u64),
"out of range: count: {}, min: {}, max: {}, width: {}, key: {}",
self.count,
self.min,
self.max,
self.max_width,
key
);
}
pub fn range_width(&self) -> u64 {
// note that max isn't updated on remove, so it can be above the current max
self.max - self.min
}
pub fn insert(&mut self, key: u64) {
self.check_range(key);
let address = self.get_address(&key);
let value = self.bits.get(address);
if !value {
self.bits.set(address, true);
if self.count == 0 {
self.min = key;
self.max = key + 1;
} else {
self.min = std::cmp::min(self.min, key);
self.max = std::cmp::max(self.max, key + 1);
let bits_empty = self.count == 0 || self.count == self.excess.len();
let update_bits = if bits_empty {
true // nothing in bits, so in range
} else if key < self.min {
// bits not empty and this insert is before min, so add to excess
if self.excess.insert(key) {
self.count += 1;
}
false
} else if key < self.max {
true // fits current bit field range
} else {
// key is >= max
let new_max = key + 1;
loop {
let new_width = new_max.saturating_sub(self.min);
if new_width <= self.max_width {
// this key will fit the max range
break;
}
// move the min item from bits to excess and then purge from min to make room for this new max
let inserted = self.excess.insert(self.min);
assert!(inserted);
let key = self.min;
let address = self.get_address(&key);
self.bits.set(address, false);
self.purge(&key);
}
true // moved things to excess if necessary, so update bits with the new entry
};
if update_bits {
let address = self.get_address(&key);
let value = self.bits.get(address);
if !value {
self.bits.set(address, true);
if bits_empty {
self.min = key;
self.max = key + 1;
} else {
self.min = std::cmp::min(self.min, key);
self.max = std::cmp::max(self.max, key + 1);
}
self.count += 1;
}
self.count += 1;
}
}
pub fn remove(&mut self, key: &u64) -> bool {
self.check_range(*key);
let address = self.get_address(key);
let value = self.bits.get(address);
if value {
self.count -= 1;
self.bits.set(address, false);
self.purge(key);
if key >= &self.min {
// if asked to remove something bigger than max, then no-op
if key < &self.max {
let address = self.get_address(key);
let get = self.bits.get(address);
if get {
self.count -= 1;
self.bits.set(address, false);
self.purge(key);
}
get
} else {
false
}
} else {
// asked to remove something < min. would be in excess if it exists
let remove = self.excess.remove(key);
if remove {
self.count -= 1;
}
remove
}
value
}
// after removing 'key' where 'key' = min, make min the correct new min value
@ -283,10 +322,15 @@ impl RollingBitField {
}
pub fn contains(&self, key: &u64) -> bool {
let result = self.contains_assume_in_range(key);
// A contains call outside min and max is allowed. The answer will be false.
// Only need to do range check if we found true.
result && (key >= &self.min && key < &self.max)
if key < &self.max {
if key >= &self.min {
self.contains_assume_in_range(key)
} else {
self.excess.contains(key)
}
} else {
false
}
}
pub fn len(&self) -> usize {
@ -304,6 +348,7 @@ impl RollingBitField {
pub fn get_all(&self) -> Vec<u64> {
let mut all = Vec::with_capacity(self.count);
self.excess.iter().for_each(|slot| all.push(*slot));
for key in self.min..self.max {
if self.contains_assume_in_range(&key) {
all.push(key);
@ -1469,50 +1514,77 @@ pub mod tests {
let _ = RollingBitField::new(0);
}
fn setup_wide(width: u64, start: u64) -> (RollingBitField, HashSet<u64>) {
let mut bitfield = RollingBitField::new(width);
let mut hash = HashSet::new();
fn setup_empty(width: u64) -> RollingBitFieldTester {
let bitfield = RollingBitField::new(width);
let hash_set = HashSet::new();
RollingBitFieldTester { bitfield, hash_set }
}
compare(&hash, &bitfield);
struct RollingBitFieldTester {
pub bitfield: RollingBitField,
pub hash_set: HashSet<u64>,
}
let mut slot = start;
bitfield.insert(slot);
hash.insert(slot);
impl RollingBitFieldTester {
fn insert(&mut self, slot: u64) {
self.bitfield.insert(slot);
self.hash_set.insert(slot);
assert!(self.bitfield.contains(&slot));
compare(&self.hash_set, &self.bitfield);
}
fn remove(&mut self, slot: &u64) -> bool {
let result = self.bitfield.remove(slot);
assert_eq!(result, self.hash_set.remove(slot));
assert!(!self.bitfield.contains(&slot));
self.compare();
result
}
fn compare(&self) {
compare(&self.hash_set, &self.bitfield);
}
}
compare(&hash, &bitfield);
fn setup_wide(width: u64, start: u64) -> RollingBitFieldTester {
let mut tester = setup_empty(width);
slot += 1;
bitfield.insert(slot);
hash.insert(slot);
compare(&hash, &bitfield);
(bitfield, hash)
tester.compare();
tester.insert(start);
tester.insert(start + 1);
tester
}
#[test]
#[should_panic(expected = "out of range")]
fn test_bitfield_insert_wide() {
solana_logger::setup();
let width = 16;
let start = 0;
let (mut bitfield, _hash) = setup_wide(width, start);
let mut tester = setup_wide(width, start);
let slot = start + width;
// assert here -- higher than max range
bitfield.insert(slot);
let all = tester.bitfield.get_all();
// higher than max range by 1
tester.insert(slot);
let bitfield = tester.bitfield;
for slot in all {
assert!(bitfield.contains(&slot));
}
assert_eq!(bitfield.excess.len(), 1);
assert_eq!(bitfield.count, 3);
}
#[test]
#[should_panic(expected = "out of range")]
fn test_bitfield_insert_wide_before() {
solana_logger::setup();
let width = 16;
let start = 100;
let (mut bitfield, _hash) = setup_wide(width, start);
let mut bitfield = setup_wide(width, start).bitfield;
let slot = start + 1 - width;
// assert here - would make min too low, causing too wide of a range
bitfield.insert(slot);
assert_eq!(1, bitfield.excess.len());
assert_eq!(3, bitfield.count);
assert!(bitfield.contains(&slot));
}
#[test]
@ -1520,11 +1592,13 @@ pub mod tests {
solana_logger::setup();
let width = 16;
let start = 100;
let (mut bitfield, _hash) = setup_wide(width, start);
let mut bitfield = setup_wide(width, start).bitfield;
let slot = start + 2 - width; // this item would make our width exactly equal to what is allowed
let slot = start + 2 - width; // this item would make our width exactly equal to what is allowed, but it is also inserting prior to min
bitfield.insert(slot);
assert_eq!(1, bitfield.excess.len());
assert!(bitfield.contains(&slot));
assert_eq!(3, bitfield.count);
}
#[test]
@ -1532,7 +1606,7 @@ pub mod tests {
{
let width = 16;
let start = 0;
let (bitfield, _hash) = setup_wide(width, start);
let bitfield = setup_wide(width, start).bitfield;
let mut slot = width;
assert!(!bitfield.contains(&slot));
@ -1542,7 +1616,7 @@ pub mod tests {
{
let width = 16;
let start = 100;
let (bitfield, _hash) = setup_wide(width, start);
let bitfield = setup_wide(width, start).bitfield;
// too large
let mut slot = width;
@ -1556,46 +1630,165 @@ pub mod tests {
}
#[test]
#[should_panic(expected = "out of range")]
fn test_bitfield_remove_wide() {
let width = 16;
let start = 0;
let (mut bitfield, _hash) = setup_wide(width, start);
let mut tester = setup_wide(width, start);
let slot = width;
assert!(!bitfield.remove(&slot));
assert!(!tester.remove(&slot));
}
#[test]
fn test_bitfield_excess2() {
solana_logger::setup();
let width = 16;
let mut tester = setup_empty(width);
let slot = 100;
// insert 1st slot
tester.insert(slot);
assert!(tester.bitfield.excess.is_empty());
// insert a slot before the previous one. this is 'excess' since we don't use this pattern in normal operation
let slot2 = slot - 1;
tester.insert(slot2);
assert_eq!(tester.bitfield.excess.len(), 1);
// remove the 1st slot. we will be left with only excess
tester.remove(&slot);
assert!(tester.bitfield.contains(&slot2));
assert_eq!(tester.bitfield.excess.len(), 1);
// re-insert at valid range, making sure we don't insert into excess
tester.insert(slot);
assert_eq!(tester.bitfield.excess.len(), 1);
// remove the excess slot.
tester.remove(&slot2);
assert!(tester.bitfield.contains(&slot));
assert!(tester.bitfield.excess.is_empty());
// re-insert the excess slot
tester.insert(slot2);
assert_eq!(tester.bitfield.excess.len(), 1);
}
#[test]
fn test_bitfield_excess() {
solana_logger::setup();
// start at slot 0 or a separate, higher slot
for width in [16, 4194304].iter() {
let width = *width;
let mut tester = setup_empty(width);
for start in [0, width * 5].iter().cloned() {
// recreate means create empty bitfield with each iteration, otherwise re-use
for recreate in [false, true].iter().cloned() {
let max = start + 3;
// first root to add
for slot in start..max {
// subsequent roots to add
for slot2 in (slot + 1)..max {
// reverse_slots = 1 means add slots in reverse order (max to min). This causes us to add second and later slots to excess.
for reverse_slots in [false, true].iter().cloned() {
let maybe_reverse = |slot| {
if reverse_slots {
max - slot
} else {
slot
}
};
if recreate {
let recreated = setup_empty(width);
tester = recreated;
}
// insert
for slot in slot..=slot2 {
let slot_use = maybe_reverse(slot);
tester.insert(slot_use);
debug!(
"slot: {}, bitfield: {:?}, reverse: {}, len: {}, excess: {:?}",
slot_use,
tester.bitfield,
reverse_slots,
tester.bitfield.len(),
tester.bitfield.excess
);
assert!(
(reverse_slots && tester.bitfield.len() > 1)
^ tester.bitfield.excess.is_empty()
);
}
if start > width * 2 {
assert!(!tester.bitfield.contains(&(start - width * 2)));
}
assert!(!tester.bitfield.contains(&(start + width * 2)));
let len = (slot2 - slot + 1) as usize;
assert_eq!(tester.bitfield.len(), len);
assert_eq!(tester.bitfield.count, len);
// remove
for slot in slot..=slot2 {
let slot_use = maybe_reverse(slot);
assert!(tester.remove(&slot_use));
assert!(
(reverse_slots && !tester.bitfield.is_empty())
^ tester.bitfield.excess.is_empty()
);
}
assert!(tester.bitfield.is_empty());
assert_eq!(tester.bitfield.count, 0);
if start > width * 2 {
assert!(!tester.bitfield.contains(&(start - width * 2)));
}
assert!(!tester.bitfield.contains(&(start + width * 2)));
}
}
}
}
}
}
}
#[test]
#[should_panic(expected = "out of range")]
fn test_bitfield_remove_wide_before() {
let width = 16;
let start = 100;
let (mut bitfield, _hash) = setup_wide(width, start);
let mut tester = setup_wide(width, start);
let slot = start + 1 - width;
assert!(!bitfield.remove(&slot));
assert!(!tester.remove(&slot));
}
fn compare(hashset: &HashSet<u64>, bitfield: &RollingBitField) {
assert_eq!(hashset.len(), bitfield.len());
assert_eq!(hashset.is_empty(), bitfield.is_empty());
let mut min = Slot::MAX;
let mut max = Slot::MIN;
for item in bitfield.get_all() {
min = std::cmp::min(min, item);
max = std::cmp::max(max, item);
assert!(hashset.contains(&item));
}
assert!(
bitfield.range_width()
>= if bitfield.is_empty() {
if !bitfield.is_empty() {
let mut min = Slot::MAX;
let mut max = Slot::MIN;
for item in bitfield.get_all() {
assert!(hashset.contains(&item));
if !bitfield.excess.contains(&item) {
min = std::cmp::min(min, item);
max = std::cmp::max(max, item);
}
}
assert_eq!(bitfield.get_all().len(), hashset.len());
// range isn't tracked for excess items
if bitfield.excess.len() != bitfield.len() {
let width = if bitfield.is_empty() {
0
} else {
max + 1 - min
},
"hashset: {:?}, bitfield: {:?}",
hashset,
bitfield.get_all()
);
};
assert!(
bitfield.range_width() >= width,
"hashset: {:?}, bitfield: {:?}, bitfield.range_width: {}, width: {}",
hashset,
bitfield.get_all(),
bitfield.range_width(),
width,
);
}
}
}
#[test]
@ -1613,61 +1806,52 @@ pub mod tests {
2
};
for width in 0..width_iteration_max {
let mut bitfield = RollingBitField::new(max_bitfield_width);
let mut hash = HashSet::new();
let mut tester = setup_empty(max_bitfield_width);
let min = 101_000;
let dead = 19;
compare(&hash, &bitfield);
let mut slot = min;
while hash.len() < width {
while tester.hash_set.len() < width {
slot += 1;
if max_bitfield_width > 2 && slot % dead == 0 {
// with max_bitfield_width of 1 and 2, there is no room for dead slots
continue;
}
hash.insert(slot);
bitfield.insert(slot);
tester.insert(slot);
}
let max = slot + 1;
compare(&hash, &bitfield);
for slot in (min - 10)..max + 100 {
assert_eq!(bitfield.contains(&slot), hash.contains(&slot));
assert_eq!(
tester.bitfield.contains(&slot),
tester.hash_set.contains(&slot)
);
}
if width > 0 {
hash.remove(&slot);
assert!(bitfield.remove(&slot));
assert!(!bitfield.remove(&slot));
assert!(tester.remove(&slot));
assert!(!tester.remove(&slot));
}
compare(&hash, &bitfield);
let all = bitfield.get_all();
let all = tester.bitfield.get_all();
// remove the rest, including a call that removes slot again
for item in all.iter() {
assert!(hash.remove(&item));
assert!(!hash.remove(&item));
assert!(bitfield.remove(&item));
assert!(!bitfield.remove(&item));
compare(&hash, &bitfield);
assert!(tester.remove(&item));
assert!(!tester.remove(&item));
}
let min = max + ((width * 2) as u64) + 3;
let slot = min; // several widths past previous min
let max = slot + 1;
hash.insert(slot);
bitfield.insert(slot);
compare(&hash, &bitfield);
assert!(hash.contains(&slot));
tester.insert(slot);
for slot in (min - 10)..max + 100 {
assert_eq!(bitfield.contains(&slot), hash.contains(&slot));
assert_eq!(
tester.bitfield.contains(&slot),
tester.hash_set.contains(&slot)
);
}
}
}
@ -1776,18 +1960,18 @@ pub mod tests {
for width in 0..34 {
let mut bitfield = RollingBitField::new(4096);
let mut hash = HashSet::new();
let mut hash_set = HashSet::new();
let min = 1_010_000;
let dead = 19;
let mut slot = min;
while hash.len() < width {
while hash_set.len() < width {
slot += 1;
if slot % dead == 0 {
continue;
}
hash.insert(slot);
hash_set.insert(slot);
bitfield.insert(slot);
}
@ -1796,7 +1980,7 @@ pub mod tests {
let mut time = Measure::start("");
let mut count = 0;
for slot in (min - 10)..max + 100 {
if hash.contains(&slot) {
if hash_set.contains(&slot) {
count += 1;
}
}