From 105a6bfb469e2d91486173e7e3ccecd8a678f407 Mon Sep 17 00:00:00 2001 From: "Jeff Washington (jwash)" <75863576+jeffwashington@users.noreply.github.com> Date: Mon, 12 Apr 2021 11:11:33 -0600 Subject: [PATCH] Replace RootsTracker HashSet (#16310) * Replace RootsTracker HashSet * use bitvec * cleanup, add brenchmark test * test cleanup * add lots of tests * get rid of demo * change warp test constant * get rid of unused function * pr feedback * reorder use * rework get_all to remove range checks * add tests, fix bugs --- program-test/tests/warp.rs | 2 +- runtime/src/accounts_index.rs | 547 +++++++++++++++++++++++++++++++++- 2 files changed, 539 insertions(+), 10 deletions(-) diff --git a/program-test/tests/warp.rs b/program-test/tests/warp.rs index 59cf304d3..197b071ee 100644 --- a/program-test/tests/warp.rs +++ b/program-test/tests/warp.rs @@ -55,7 +55,7 @@ async fn clock_sysvar_updated_from_warp() { ); let mut context = program_test.start_with_context().await; - let expected_slot = 5_000_000; + let expected_slot = 100_000; let instruction = Instruction::new_with_bincode( program_id, &expected_slot, diff --git a/runtime/src/accounts_index.rs b/runtime/src/accounts_index.rs index e878c0400..a5bcbffd7 100644 --- a/runtime/src/accounts_index.rs +++ b/runtime/src/accounts_index.rs @@ -3,6 +3,7 @@ use crate::{ inline_spl_token_v2_0::{self, SPL_TOKEN_ACCOUNT_MINT_OFFSET, SPL_TOKEN_ACCOUNT_OWNER_OFFSET}, secondary_index::*, }; +use bv::BitVec; use dashmap::DashSet; use ouroboros::self_referencing; use solana_measure::measure::Measure; @@ -174,13 +175,150 @@ impl WriteAccountMapEntry { } #[derive(Debug, Default)] +pub struct RollingBitField { + max_width: u64, + min: u64, + max: u64, // exclusive + bits: BitVec, + count: usize, +} +// functionally similar to a hashset +// Relies on there being a sliding window of key values. The key values continue to increase. +// Old key values are removed from the lesser values and do not accumulate. +impl RollingBitField { + pub fn new(max_width: u64) -> Self { + assert!(max_width > 0); + assert!(max_width.is_power_of_two()); // power of 2 to make dividing a shift + let bits = BitVec::new_fill(false, max_width); + Self { + max_width, + bits, + count: 0, + min: 0, + max: 0, + } + } + + // find the array index + fn get_address(&self, key: &u64) -> u64 { + key % self.max_width + } + + fn check_range(&self, key: u64) { + assert!( + self.count == 0 + || (self.max.saturating_sub(key) <= self.max_width as u64 + && key.saturating_sub(self.min) < self.max_width as u64), + "out of range" + ); + } + + pub fn insert(&mut self, key: u64) { + self.check_range(key); + let address = self.get_address(&key); + let value = self.bits.get(address); + if !value { + self.bits.set(address, true); + if self.count == 0 { + self.min = key; + self.max = key + 1; + } else { + self.min = std::cmp::min(self.min, key); + self.max = std::cmp::max(self.max, key + 1); + } + self.count += 1; + } + } + + pub fn remove(&mut self, key: &u64) { + self.check_range(*key); + let address = self.get_address(key); + let value = self.bits.get(address); + if value { + self.count -= 1; + self.bits.set(address, false); + self.purge(key); + } + } + + // after removing 'key' where 'key' = min, make min the correct new min value + fn purge(&mut self, key: &u64) { + if key == &self.min && self.count > 0 { + let start = self.min + 1; // min just got removed + for key in start..self.max { + if self.contains_assume_in_range(&key) { + self.min = key; + break; + } + } + } + } + + fn contains_assume_in_range(&self, key: &u64) -> bool { + // the result may be aliased. Caller is responsible for determining key is in range. + let address = self.get_address(key); + self.bits.get(address) + } + + pub fn contains(&self, key: &u64) -> bool { + let result = self.contains_assume_in_range(key); + // A contains call outside min and max is allowed. The answer will be false. + // Only need to do range check if we found true. + result && (key >= &self.min && key < &self.max) + } + + pub fn len(&self) -> usize { + self.count + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn clear(&mut self) { + let mut n = Self::new(self.max_width); + std::mem::swap(&mut n, self); + } + + pub fn get_all(&self) -> Vec { + let mut all = Vec::with_capacity(self.count); + for key in self.min..self.max { + if self.contains_assume_in_range(&key) { + all.push(key); + } + } + all + } +} + +#[derive(Debug)] pub struct RootsTracker { - roots: HashSet, + roots: RollingBitField, max_root: Slot, uncleaned_roots: HashSet, previous_uncleaned_roots: HashSet, } +impl Default for RootsTracker { + fn default() -> Self { + // we expect to keep a rolling set of 400k slots around at a time + // 2M gives us plenty of extra room to handle a width 5x what we should need. + // cost is 2M bits of memory + RootsTracker::new(2097152) + } +} + +impl RootsTracker { + pub fn new(max_width: u64) -> Self { + Self { + roots: RollingBitField::new(max_width), + max_root: 0, + uncleaned_roots: HashSet::new(), + previous_uncleaned_roots: HashSet::new(), + } + } +} + #[derive(Debug, Default)] pub struct AccountsIndexRootsStats { pub roots_len: usize, @@ -801,7 +939,7 @@ impl AccountsIndex { // Get the maximum root <= `max_allowed_root` from the given `slice` fn get_max_root( - roots: &HashSet, + roots: &RollingBitField, slice: SlotSlice, max_allowed_root: Option, ) -> Slot { @@ -1122,13 +1260,8 @@ impl AccountsIndex { } pub fn all_roots(&self) -> Vec { - self.roots_tracker - .read() - .unwrap() - .roots - .iter() - .cloned() - .collect() + let tracker = self.roots_tracker.read().unwrap(); + tracker.roots.get_all() } #[cfg(test)] @@ -1158,6 +1291,7 @@ impl AccountsIndex { #[cfg(test)] pub mod tests { use super::*; + use log::*; use solana_sdk::signature::{Keypair, Signer}; pub enum SecondaryIndexTypes<'a> { @@ -1201,6 +1335,401 @@ pub mod tests { ) } + #[test] + fn test_bitfield_permutations() { + solana_logger::setup(); + let mut bitfield = RollingBitField::new(2097152); + let mut hash = HashSet::new(); + + let min = 101_000; + let width = 400_000; + let dead = 19; + + let mut slot = min; + while hash.len() < width { + slot += 1; + if slot % dead == 0 { + continue; + } + hash.insert(slot); + bitfield.insert(slot); + } + compare(&hash, &bitfield); + + let max = slot + 1; + + let mut time = Measure::start(""); + let mut count = 0; + for slot in (min - 10)..max + 100 { + if hash.contains(&slot) { + count += 1; + } + } + time.stop(); + + let mut time2 = Measure::start(""); + let mut count2 = 0; + for slot in (min - 10)..max + 100 { + if bitfield.contains(&slot) { + count2 += 1; + } + } + time2.stop(); + info!( + "{}ms, {}ms, {} ratio", + time.as_ms(), + time2.as_ms(), + time.as_ns() / time2.as_ns() + ); + assert_eq!(count, count2); + } + + #[test] + #[should_panic(expected = "assertion failed: max_width.is_power_of_two()")] + fn test_bitfield_power_2() { + let _ = RollingBitField::new(3); + } + + #[test] + #[should_panic(expected = "assertion failed: max_width > 0")] + fn test_bitfield_0() { + let _ = RollingBitField::new(0); + } + + fn setup_wide(width: u64, start: u64) -> (RollingBitField, HashSet) { + let mut bitfield = RollingBitField::new(width); + let mut hash = HashSet::new(); + + compare(&hash, &bitfield); + + let mut slot = start; + bitfield.insert(slot); + hash.insert(slot); + + compare(&hash, &bitfield); + + slot += 1; + bitfield.insert(slot); + hash.insert(slot); + + compare(&hash, &bitfield); + (bitfield, hash) + } + + #[test] + #[should_panic(expected = "out of range")] + fn test_bitfield_insert_wide() { + solana_logger::setup(); + let width = 16; + let start = 0; + let (mut bitfield, _hash) = setup_wide(width, start); + + let slot = start + width; + // assert here -- higher than max range + bitfield.insert(slot); + } + + #[test] + #[should_panic(expected = "out of range")] + fn test_bitfield_insert_wide_before() { + solana_logger::setup(); + let width = 16; + let start = 100; + let (mut bitfield, _hash) = setup_wide(width, start); + + let slot = start + 1 - width; + // assert here - would make min too low, causing too wide of a range + bitfield.insert(slot); + } + + #[test] + fn test_bitfield_insert_wide_before_ok() { + solana_logger::setup(); + let width = 16; + let start = 100; + let (mut bitfield, _hash) = setup_wide(width, start); + + let slot = start + 2 - width; // this item would make our width exactly equal to what is allowed + bitfield.insert(slot); + assert!(bitfield.contains(&slot)); + } + + #[test] + fn test_bitfield_contains_wide_no_assert() { + { + let width = 16; + let start = 0; + let (bitfield, _hash) = setup_wide(width, start); + + let mut slot = width; + assert!(!bitfield.contains(&slot)); + slot += 1; + assert!(!bitfield.contains(&slot)); + } + { + let width = 16; + let start = 100; + let (bitfield, _hash) = setup_wide(width, start); + + // too large + let mut slot = width; + assert!(!bitfield.contains(&slot)); + slot += 1; + assert!(!bitfield.contains(&slot)); + // too small, before min + slot = 0; + assert!(!bitfield.contains(&slot)); + } + } + + #[test] + #[should_panic(expected = "out of range")] + fn test_bitfield_remove_wide() { + let width = 16; + let start = 0; + let (mut bitfield, _hash) = setup_wide(width, start); + let slot = width; + // not set anyway, so no need to assert + bitfield.remove(&slot); + } + + #[test] + #[should_panic(expected = "out of range")] + fn test_bitfield_remove_wide_before() { + let width = 16; + let start = 100; + let (mut bitfield, _hash) = setup_wide(width, start); + let slot = start + 1 - width; + bitfield.remove(&slot); + } + + fn compare(hashset: &HashSet, bitfield: &RollingBitField) { + assert_eq!(hashset.len(), bitfield.len()); + assert_eq!(hashset.is_empty(), bitfield.is_empty()); + for item in bitfield.get_all() { + assert!(hashset.contains(&item)); + } + } + + #[test] + fn test_bitfield_functionality() { + solana_logger::setup(); + + // bitfield sizes are powers of 2, cycle through values of 1, 2, 4, .. 2^9 + for power in 0..10 { + let max_bitfield_width = 2u64.pow(power) as u64; + let width_iteration_max = if max_bitfield_width > 1 { + // add up to 2 items so we can test out multiple items + 3 + } else { + // 0 or 1 items is all we can fit with a width of 1 item + 2 + }; + for width in 0..width_iteration_max { + let mut bitfield = RollingBitField::new(max_bitfield_width); + let mut hash = HashSet::new(); + + let min = 101_000; + let dead = 19; + + compare(&hash, &bitfield); + + let mut slot = min; + while hash.len() < width { + slot += 1; + if max_bitfield_width > 2 && slot % dead == 0 { + // with max_bitfield_width of 1 and 2, there is no room for dead slots + continue; + } + hash.insert(slot); + bitfield.insert(slot); + } + let max = slot + 1; + + compare(&hash, &bitfield); + + for slot in (min - 10)..max + 100 { + assert_eq!(bitfield.contains(&slot), hash.contains(&slot)); + } + + let all = bitfield.get_all(); + + if width > 0 { + hash.remove(&slot); + bitfield.remove(&slot); + } + + compare(&hash, &bitfield); + + // remove the rest, including a call that removes slot again + for item in all.iter() { + hash.remove(&item); + bitfield.remove(&item); + compare(&hash, &bitfield); + } + + let min = max + ((width * 2) as u64) + 3; + let slot = min; // several widths past previous min + let max = slot + 1; + hash.insert(slot); + bitfield.insert(slot); + compare(&hash, &bitfield); + + assert!(hash.contains(&slot)); + + for slot in (min - 10)..max + 100 { + assert_eq!(bitfield.contains(&slot), hash.contains(&slot)); + } + } + } + } + + fn bitfield_insert_and_test(bitfield: &mut RollingBitField, slot: Slot) { + let len = bitfield.len(); + let old_all = bitfield.get_all(); + let (new_min, new_max) = if bitfield.is_empty() { + (slot, slot + 1) + } else { + ( + std::cmp::min(bitfield.min, slot), + std::cmp::max(bitfield.max, slot + 1), + ) + }; + bitfield.insert(slot); + assert_eq!(bitfield.min, new_min); + assert_eq!(bitfield.max, new_max); + assert_eq!(bitfield.len(), len + 1); + assert!(!bitfield.is_empty()); + assert!(bitfield.contains(&slot)); + // verify aliasing is what we expect + assert!(bitfield.contains_assume_in_range(&(slot + bitfield.max_width))); + let get_all = bitfield.get_all(); + old_all + .into_iter() + .for_each(|slot| assert!(get_all.contains(&slot))); + assert!(get_all.contains(&slot)); + assert!(get_all.len() == len + 1); + } + + #[test] + fn test_bitfield_clear() { + let mut bitfield = RollingBitField::new(4); + assert_eq!(bitfield.len(), 0); + assert!(bitfield.is_empty()); + bitfield_insert_and_test(&mut bitfield, 0); + bitfield.clear(); + assert_eq!(bitfield.len(), 0); + assert!(bitfield.is_empty()); + assert!(bitfield.get_all().is_empty()); + bitfield_insert_and_test(&mut bitfield, 1); + bitfield.clear(); + assert_eq!(bitfield.len(), 0); + assert!(bitfield.is_empty()); + assert!(bitfield.get_all().is_empty()); + bitfield_insert_and_test(&mut bitfield, 4); + } + + #[test] + fn test_bitfield_wrapping() { + let mut bitfield = RollingBitField::new(4); + assert_eq!(bitfield.len(), 0); + assert!(bitfield.is_empty()); + bitfield_insert_and_test(&mut bitfield, 0); + assert_eq!(bitfield.get_all(), vec![0]); + bitfield_insert_and_test(&mut bitfield, 2); + assert_eq!(bitfield.get_all(), vec![0, 2]); + bitfield_insert_and_test(&mut bitfield, 3); + bitfield.insert(3); // redundant insert + assert_eq!(bitfield.get_all(), vec![0, 2, 3]); + bitfield.remove(&0); + assert_eq!(bitfield.min, 2); + assert_eq!(bitfield.max, 4); + assert_eq!(bitfield.len(), 2); + bitfield.remove(&0); // redundant remove + assert_eq!(bitfield.len(), 2); + assert_eq!(bitfield.get_all(), vec![2, 3]); + bitfield.insert(4); // wrapped around value - same bit as '0' + assert_eq!(bitfield.min, 2); + assert_eq!(bitfield.max, 5); + assert_eq!(bitfield.len(), 3); + assert_eq!(bitfield.get_all(), vec![2, 3, 4]); + bitfield.remove(&2); + assert_eq!(bitfield.min, 3); + assert_eq!(bitfield.max, 5); + assert_eq!(bitfield.len(), 2); + assert_eq!(bitfield.get_all(), vec![3, 4]); + bitfield.remove(&3); + assert_eq!(bitfield.min, 4); + assert_eq!(bitfield.max, 5); + assert_eq!(bitfield.len(), 1); + assert_eq!(bitfield.get_all(), vec![4]); + bitfield.remove(&4); + assert_eq!(bitfield.len(), 0); + assert!(bitfield.is_empty()); + assert!(bitfield.get_all().is_empty()); + bitfield_insert_and_test(&mut bitfield, 8); + bitfield.remove(&8); + assert_eq!(bitfield.len(), 0); + assert!(bitfield.is_empty()); + assert!(bitfield.get_all().is_empty()); + bitfield_insert_and_test(&mut bitfield, 9); + bitfield.remove(&9); + assert_eq!(bitfield.len(), 0); + assert!(bitfield.is_empty()); + assert!(bitfield.get_all().is_empty()); + } + + #[test] + fn test_bitfield_smaller() { + // smaller bitfield, fewer entries, including 0 + solana_logger::setup(); + + for width in 0..34 { + let mut bitfield = RollingBitField::new(4096); + let mut hash = HashSet::new(); + + let min = 1_010_000; + let dead = 19; + + let mut slot = min; + while hash.len() < width { + slot += 1; + if slot % dead == 0 { + continue; + } + hash.insert(slot); + bitfield.insert(slot); + } + + let max = slot + 1; + + let mut time = Measure::start(""); + let mut count = 0; + for slot in (min - 10)..max + 100 { + if hash.contains(&slot) { + count += 1; + } + } + time.stop(); + + let mut time2 = Measure::start(""); + let mut count2 = 0; + for slot in (min - 10)..max + 100 { + if bitfield.contains(&slot) { + count2 += 1; + } + } + time2.stop(); + info!( + "{}, {}, {}", + time.as_ms(), + time2.as_ms(), + time.as_ns() / time2.as_ns() + ); + assert_eq!(count, count2); + } + } + #[test] fn test_get_empty() { let key = Keypair::new();