From 105a6bfb469e2d91486173e7e3ccecd8a678f407 Mon Sep 17 00:00:00 2001
From: "Jeff Washington (jwash)"
 <75863576+jeffwashington@users.noreply.github.com>
Date: Mon, 12 Apr 2021 11:11:33 -0600
Subject: [PATCH] Replace RootsTracker HashSet (#16310)

* Replace RootsTracker HashSet

* use bitvec

* cleanup, add brenchmark test

* test cleanup

* add lots of tests

* get rid of demo

* change warp test constant

* get rid of unused function

* pr feedback

* reorder use

* rework get_all to remove range checks

* add tests, fix bugs
---
 program-test/tests/warp.rs    |   2 +-
 runtime/src/accounts_index.rs | 547 +++++++++++++++++++++++++++++++++-
 2 files changed, 539 insertions(+), 10 deletions(-)
diff --git a/program-test/tests/warp.rs b/program-test/tests/warp.rs
index 59cf304d3..197b071ee 100644
--- a/program-test/tests/warp.rs
+++ b/program-test/tests/warp.rs
@@ -55,7 +55,7 @@ async fn clock_sysvar_updated_from_warp() {
     );
 
     let mut context = program_test.start_with_context().await;
-    let expected_slot = 5_000_000;
+    let expected_slot = 100_000;
     let instruction = Instruction::new_with_bincode(
         program_id,
         &expected_slot,
diff --git a/runtime/src/accounts_index.rs b/runtime/src/accounts_index.rs
index e878c0400..a5bcbffd7 100644
--- a/runtime/src/accounts_index.rs
+++ b/runtime/src/accounts_index.rs
@@ -3,6 +3,7 @@ use crate::{
     inline_spl_token_v2_0::{self, SPL_TOKEN_ACCOUNT_MINT_OFFSET, SPL_TOKEN_ACCOUNT_OWNER_OFFSET},
     secondary_index::*,
 };
+use bv::BitVec;
 use dashmap::DashSet;
 use ouroboros::self_referencing;
 use solana_measure::measure::Measure;
@@ -174,13 +175,150 @@ impl<T: 'static + Clone + IsCached> WriteAccountMapEntry<T> {
 }
 
 #[derive(Debug, Default)]
+pub struct RollingBitField {
+    max_width: u64,
+    min: u64,
+    max: u64, // exclusive
+    bits: BitVec,
+    count: usize,
+}
+// functionally similar to a hashset
+// Relies on there being a sliding window of key values. The key values continue to increase.
+// Old key values are removed from the lesser values and do not accumulate.
+impl RollingBitField {
+    pub fn new(max_width: u64) -> Self {
+        assert!(max_width > 0);
+        assert!(max_width.is_power_of_two()); // power of 2 to make dividing a shift
+        let bits = BitVec::new_fill(false, max_width);
+        Self {
+            max_width,
+            bits,
+            count: 0,
+            min: 0,
+            max: 0,
+        }
+    }
+
+    // find the array index
+    fn get_address(&self, key: &u64) -> u64 {
+        key % self.max_width
+    }
+
+    fn check_range(&self, key: u64) {
+        assert!(
+            self.count == 0
+                || (self.max.saturating_sub(key) <= self.max_width as u64
+                    && key.saturating_sub(self.min) < self.max_width as u64),
+            "out of range"
+        );
+    }
+
+    pub fn insert(&mut self, key: u64) {
+        self.check_range(key);
+        let address = self.get_address(&key);
+        let value = self.bits.get(address);
+        if !value {
+            self.bits.set(address, true);
+            if self.count == 0 {
+                self.min = key;
+                self.max = key + 1;
+            } else {
+                self.min = std::cmp::min(self.min, key);
+                self.max = std::cmp::max(self.max, key + 1);
+            }
+            self.count += 1;
+        }
+    }
+
+    pub fn remove(&mut self, key: &u64) {
+        self.check_range(*key);
+        let address = self.get_address(key);
+        let value = self.bits.get(address);
+        if value {
+            self.count -= 1;
+            self.bits.set(address, false);
+            self.purge(key);
+        }
+    }
+
+    // after removing 'key' where 'key' = min, make min the correct new min value
+    fn purge(&mut self, key: &u64) {
+        if key == &self.min && self.count > 0 {
+            let start = self.min + 1; // min just got removed
+            for key in start..self.max {
+                if self.contains_assume_in_range(&key) {
+                    self.min = key;
+                    break;
+                }
+            }
+        }
+    }
+
+    fn contains_assume_in_range(&self, key: &u64) -> bool {
+        // the result may be aliased. Caller is responsible for determining key is in range.
+        let address = self.get_address(key);
+        self.bits.get(address)
+    }
+
+    pub fn contains(&self, key: &u64) -> bool {
+        let result = self.contains_assume_in_range(key);
+        // A contains call outside min and max is allowed. The answer will be false.
+        // Only need to do range check if we found true.
+        result && (key >= &self.min && key < &self.max)
+    }
+
+    pub fn len(&self) -> usize {
+        self.count
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    pub fn clear(&mut self) {
+        let mut n = Self::new(self.max_width);
+        std::mem::swap(&mut n, self);
+    }
+
+    pub fn get_all(&self) -> Vec<u64> {
+        let mut all = Vec::with_capacity(self.count);
+        for key in self.min..self.max {
+            if self.contains_assume_in_range(&key) {
+                all.push(key);
+            }
+        }
+        all
+    }
+}
+
+#[derive(Debug)]
 pub struct RootsTracker {
-    roots: HashSet<Slot>,
+    roots: RollingBitField,
     max_root: Slot,
     uncleaned_roots: HashSet<Slot>,
     previous_uncleaned_roots: HashSet<Slot>,
 }
 
+impl Default for RootsTracker {
+    fn default() -> Self {
+        // we expect to keep a rolling set of 400k slots around at a time
+        // 2M gives us plenty of extra room to handle a width 5x what we should need.
+        // cost is 2M bits of memory
+        RootsTracker::new(2097152)
+    }
+}
+
+impl RootsTracker {
+    pub fn new(max_width: u64) -> Self {
+        Self {
+            roots: RollingBitField::new(max_width),
+            max_root: 0,
+            uncleaned_roots: HashSet::new(),
+            previous_uncleaned_roots: HashSet::new(),
+        }
+    }
+}
+
 #[derive(Debug, Default)]
 pub struct AccountsIndexRootsStats {
     pub roots_len: usize,
@@ -801,7 +939,7 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
 
     // Get the maximum root <= `max_allowed_root` from the given `slice`
     fn get_max_root(
-        roots: &HashSet<Slot>,
+        roots: &RollingBitField,
         slice: SlotSlice<T>,
         max_allowed_root: Option<Slot>,
     ) -> Slot {
@@ -1122,13 +1260,8 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
     }
 
     pub fn all_roots(&self) -> Vec<Slot> {
-        self.roots_tracker
-            .read()
-            .unwrap()
-            .roots
-            .iter()
-            .cloned()
-            .collect()
+        let tracker = self.roots_tracker.read().unwrap();
+        tracker.roots.get_all()
     }
 
     #[cfg(test)]
@@ -1158,6 +1291,7 @@ impl<T: 'static + Clone + IsCached + ZeroLamport> AccountsIndex<T> {
 #[cfg(test)]
 pub mod tests {
     use super::*;
+    use log::*;
     use solana_sdk::signature::{Keypair, Signer};
 
     pub enum SecondaryIndexTypes<'a> {
@@ -1201,6 +1335,401 @@ pub mod tests {
         )
     }
 
+    #[test]
+    fn test_bitfield_permutations() {
+        solana_logger::setup();
+        let mut bitfield = RollingBitField::new(2097152);
+        let mut hash = HashSet::new();
+
+        let min = 101_000;
+        let width = 400_000;
+        let dead = 19;
+
+        let mut slot = min;
+        while hash.len() < width {
+            slot += 1;
+            if slot % dead == 0 {
+                continue;
+            }
+            hash.insert(slot);
+            bitfield.insert(slot);
+        }
+        compare(&hash, &bitfield);
+
+        let max = slot + 1;
+
+        let mut time = Measure::start("");
+        let mut count = 0;
+        for slot in (min - 10)..max + 100 {
+            if hash.contains(&slot) {
+                count += 1;
+            }
+        }
+        time.stop();
+
+        let mut time2 = Measure::start("");
+        let mut count2 = 0;
+        for slot in (min - 10)..max + 100 {
+            if bitfield.contains(&slot) {
+                count2 += 1;
+            }
+        }
+        time2.stop();
+        info!(
+            "{}ms, {}ms, {} ratio",
+            time.as_ms(),
+            time2.as_ms(),
+            time.as_ns() / time2.as_ns()
+        );
+        assert_eq!(count, count2);
+    }
+
+    #[test]
+    #[should_panic(expected = "assertion failed: max_width.is_power_of_two()")]
+    fn test_bitfield_power_2() {
+        let _ = RollingBitField::new(3);
+    }
+
+    #[test]
+    #[should_panic(expected = "assertion failed: max_width > 0")]
+    fn test_bitfield_0() {
+        let _ = RollingBitField::new(0);
+    }
+
+    fn setup_wide(width: u64, start: u64) -> (RollingBitField, HashSet<u64>) {
+        let mut bitfield = RollingBitField::new(width);
+        let mut hash = HashSet::new();
+
+        compare(&hash, &bitfield);
+
+        let mut slot = start;
+        bitfield.insert(slot);
+        hash.insert(slot);
+
+        compare(&hash, &bitfield);
+
+        slot += 1;
+        bitfield.insert(slot);
+        hash.insert(slot);
+
+        compare(&hash, &bitfield);
+        (bitfield, hash)
+    }
+
+    #[test]
+    #[should_panic(expected = "out of range")]
+    fn test_bitfield_insert_wide() {
+        solana_logger::setup();
+        let width = 16;
+        let start = 0;
+        let (mut bitfield, _hash) = setup_wide(width, start);
+
+        let slot = start + width;
+        // assert here -- higher than max range
+        bitfield.insert(slot);
+    }
+
+    #[test]
+    #[should_panic(expected = "out of range")]
+    fn test_bitfield_insert_wide_before() {
+        solana_logger::setup();
+        let width = 16;
+        let start = 100;
+        let (mut bitfield, _hash) = setup_wide(width, start);
+
+        let slot = start + 1 - width;
+        // assert here - would make min too low, causing too wide of a range
+        bitfield.insert(slot);
+    }
+
+    #[test]
+    fn test_bitfield_insert_wide_before_ok() {
+        solana_logger::setup();
+        let width = 16;
+        let start = 100;
+        let (mut bitfield, _hash) = setup_wide(width, start);
+
+        let slot = start + 2 - width; // this item would make our width exactly equal to what is allowed
+        bitfield.insert(slot);
+        assert!(bitfield.contains(&slot));
+    }
+
+    #[test]
+    fn test_bitfield_contains_wide_no_assert() {
+        {
+            let width = 16;
+            let start = 0;
+            let (bitfield, _hash) = setup_wide(width, start);
+
+            let mut slot = width;
+            assert!(!bitfield.contains(&slot));
+            slot += 1;
+            assert!(!bitfield.contains(&slot));
+        }
+        {
+            let width = 16;
+            let start = 100;
+            let (bitfield, _hash) = setup_wide(width, start);
+
+            // too large
+            let mut slot = width;
+            assert!(!bitfield.contains(&slot));
+            slot += 1;
+            assert!(!bitfield.contains(&slot));
+            // too small, before min
+            slot = 0;
+            assert!(!bitfield.contains(&slot));
+        }
+    }
+
+    #[test]
+    #[should_panic(expected = "out of range")]
+    fn test_bitfield_remove_wide() {
+        let width = 16;
+        let start = 0;
+        let (mut bitfield, _hash) = setup_wide(width, start);
+        let slot = width;
+        // not set anyway, so no need to assert
+        bitfield.remove(&slot);
+    }
+
+    #[test]
+    #[should_panic(expected = "out of range")]
+    fn test_bitfield_remove_wide_before() {
+        let width = 16;
+        let start = 100;
+        let (mut bitfield, _hash) = setup_wide(width, start);
+        let slot = start + 1 - width;
+        bitfield.remove(&slot);
+    }
+
+    fn compare(hashset: &HashSet<u64>, bitfield: &RollingBitField) {
+        assert_eq!(hashset.len(), bitfield.len());
+        assert_eq!(hashset.is_empty(), bitfield.is_empty());
+        for item in bitfield.get_all() {
+            assert!(hashset.contains(&item));
+        }
+    }
+
+    #[test]
+    fn test_bitfield_functionality() {
+        solana_logger::setup();
+
+        // bitfield sizes are powers of 2, cycle through values of 1, 2, 4, .. 2^9
+        for power in 0..10 {
+            let max_bitfield_width = 2u64.pow(power) as u64;
+            let width_iteration_max = if max_bitfield_width > 1 {
+                // add up to 2 items so we can test out multiple items
+                3
+            } else {
+                // 0 or 1 items is all we can fit with a width of 1 item
+                2
+            };
+            for width in 0..width_iteration_max {
+                let mut bitfield = RollingBitField::new(max_bitfield_width);
+                let mut hash = HashSet::new();
+
+                let min = 101_000;
+                let dead = 19;
+
+                compare(&hash, &bitfield);
+
+                let mut slot = min;
+                while hash.len() < width {
+                    slot += 1;
+                    if max_bitfield_width > 2 && slot % dead == 0 {
+                        // with max_bitfield_width of 1 and 2, there is no room for dead slots
+                        continue;
+                    }
+                    hash.insert(slot);
+                    bitfield.insert(slot);
+                }
+                let max = slot + 1;
+
+                compare(&hash, &bitfield);
+
+                for slot in (min - 10)..max + 100 {
+                    assert_eq!(bitfield.contains(&slot), hash.contains(&slot));
+                }
+
+                let all = bitfield.get_all();
+
+                if width > 0 {
+                    hash.remove(&slot);
+                    bitfield.remove(&slot);
+                }
+
+                compare(&hash, &bitfield);
+
+                // remove the rest, including a call that removes slot again
+                for item in all.iter() {
+                    hash.remove(&item);
+                    bitfield.remove(&item);
+                    compare(&hash, &bitfield);
+                }
+
+                let min = max + ((width * 2) as u64) + 3;
+                let slot = min; // several widths past previous min
+                let max = slot + 1;
+                hash.insert(slot);
+                bitfield.insert(slot);
+                compare(&hash, &bitfield);
+
+                assert!(hash.contains(&slot));
+
+                for slot in (min - 10)..max + 100 {
+                    assert_eq!(bitfield.contains(&slot), hash.contains(&slot));
+                }
+            }
+        }
+    }
+
+    fn bitfield_insert_and_test(bitfield: &mut RollingBitField, slot: Slot) {
+        let len = bitfield.len();
+        let old_all = bitfield.get_all();
+        let (new_min, new_max) = if bitfield.is_empty() {
+            (slot, slot + 1)
+        } else {
+            (
+                std::cmp::min(bitfield.min, slot),
+                std::cmp::max(bitfield.max, slot + 1),
+            )
+        };
+        bitfield.insert(slot);
+        assert_eq!(bitfield.min, new_min);
+        assert_eq!(bitfield.max, new_max);
+        assert_eq!(bitfield.len(), len + 1);
+        assert!(!bitfield.is_empty());
+        assert!(bitfield.contains(&slot));
+        // verify aliasing is what we expect
+        assert!(bitfield.contains_assume_in_range(&(slot + bitfield.max_width)));
+        let get_all = bitfield.get_all();
+        old_all
+            .into_iter()
+            .for_each(|slot| assert!(get_all.contains(&slot)));
+        assert!(get_all.contains(&slot));
+        assert!(get_all.len() == len + 1);
+    }
+
+    #[test]
+    fn test_bitfield_clear() {
+        let mut bitfield = RollingBitField::new(4);
+        assert_eq!(bitfield.len(), 0);
+        assert!(bitfield.is_empty());
+        bitfield_insert_and_test(&mut bitfield, 0);
+        bitfield.clear();
+        assert_eq!(bitfield.len(), 0);
+        assert!(bitfield.is_empty());
+        assert!(bitfield.get_all().is_empty());
+        bitfield_insert_and_test(&mut bitfield, 1);
+        bitfield.clear();
+        assert_eq!(bitfield.len(), 0);
+        assert!(bitfield.is_empty());
+        assert!(bitfield.get_all().is_empty());
+        bitfield_insert_and_test(&mut bitfield, 4);
+    }
+
+    #[test]
+    fn test_bitfield_wrapping() {
+        let mut bitfield = RollingBitField::new(4);
+        assert_eq!(bitfield.len(), 0);
+        assert!(bitfield.is_empty());
+        bitfield_insert_and_test(&mut bitfield, 0);
+        assert_eq!(bitfield.get_all(), vec![0]);
+        bitfield_insert_and_test(&mut bitfield, 2);
+        assert_eq!(bitfield.get_all(), vec![0, 2]);
+        bitfield_insert_and_test(&mut bitfield, 3);
+        bitfield.insert(3); // redundant insert
+        assert_eq!(bitfield.get_all(), vec![0, 2, 3]);
+        bitfield.remove(&0);
+        assert_eq!(bitfield.min, 2);
+        assert_eq!(bitfield.max, 4);
+        assert_eq!(bitfield.len(), 2);
+        bitfield.remove(&0); // redundant remove
+        assert_eq!(bitfield.len(), 2);
+        assert_eq!(bitfield.get_all(), vec![2, 3]);
+        bitfield.insert(4); // wrapped around value - same bit as '0'
+        assert_eq!(bitfield.min, 2);
+        assert_eq!(bitfield.max, 5);
+        assert_eq!(bitfield.len(), 3);
+        assert_eq!(bitfield.get_all(), vec![2, 3, 4]);
+        bitfield.remove(&2);
+        assert_eq!(bitfield.min, 3);
+        assert_eq!(bitfield.max, 5);
+        assert_eq!(bitfield.len(), 2);
+        assert_eq!(bitfield.get_all(), vec![3, 4]);
+        bitfield.remove(&3);
+        assert_eq!(bitfield.min, 4);
+        assert_eq!(bitfield.max, 5);
+        assert_eq!(bitfield.len(), 1);
+        assert_eq!(bitfield.get_all(), vec![4]);
+        bitfield.remove(&4);
+        assert_eq!(bitfield.len(), 0);
+        assert!(bitfield.is_empty());
+        assert!(bitfield.get_all().is_empty());
+        bitfield_insert_and_test(&mut bitfield, 8);
+        bitfield.remove(&8);
+        assert_eq!(bitfield.len(), 0);
+        assert!(bitfield.is_empty());
+        assert!(bitfield.get_all().is_empty());
+        bitfield_insert_and_test(&mut bitfield, 9);
+        bitfield.remove(&9);
+        assert_eq!(bitfield.len(), 0);
+        assert!(bitfield.is_empty());
+        assert!(bitfield.get_all().is_empty());
+    }
+
+    #[test]
+    fn test_bitfield_smaller() {
+        // smaller bitfield, fewer entries, including 0
+        solana_logger::setup();
+
+        for width in 0..34 {
+            let mut bitfield = RollingBitField::new(4096);
+            let mut hash = HashSet::new();
+
+            let min = 1_010_000;
+            let dead = 19;
+
+            let mut slot = min;
+            while hash.len() < width {
+                slot += 1;
+                if slot % dead == 0 {
+                    continue;
+                }
+                hash.insert(slot);
+                bitfield.insert(slot);
+            }
+
+            let max = slot + 1;
+
+            let mut time = Measure::start("");
+            let mut count = 0;
+            for slot in (min - 10)..max + 100 {
+                if hash.contains(&slot) {
+                    count += 1;
+                }
+            }
+            time.stop();
+
+            let mut time2 = Measure::start("");
+            let mut count2 = 0;
+            for slot in (min - 10)..max + 100 {
+                if bitfield.contains(&slot) {
+                    count2 += 1;
+                }
+            }
+            time2.stop();
+            info!(
+                "{}, {}, {}",
+                time.as_ms(),
+                time2.as_ms(),
+                time.as_ns() / time2.as_ns()
+            );
+            assert_eq!(count, count2);
+        }
+    }
+
     #[test]
     fn test_get_empty() {
         let key = Keypair::new();