2021-09-18 07:54:00 -07:00
|
|
|
use crate::accounts_index::{AccountsIndexConfig, IndexValue};
|
2021-09-12 15:14:59 -07:00
|
|
|
use crate::bucket_map_holder_stats::BucketMapHolderStats;
|
2021-09-22 10:40:30 -07:00
|
|
|
use crate::in_mem_accounts_index::{InMemAccountsIndex, SlotT};
|
2021-09-17 08:41:30 -07:00
|
|
|
use crate::waitable_condvar::WaitableCondvar;
|
2021-09-20 06:40:10 -07:00
|
|
|
use solana_bucket_map::bucket_map::{BucketMap, BucketMapConfig};
|
2021-09-23 11:37:14 -07:00
|
|
|
use solana_measure::measure::Measure;
|
2021-09-20 12:29:34 -07:00
|
|
|
use solana_sdk::clock::SLOT_MS;
|
|
|
|
use solana_sdk::timing::AtomicInterval;
|
2021-09-12 15:14:59 -07:00
|
|
|
use std::fmt::Debug;
|
2021-09-18 10:55:57 -07:00
|
|
|
use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering};
|
2021-09-22 10:40:30 -07:00
|
|
|
use std::sync::{Arc, Mutex};
|
|
|
|
use std::time::Duration;
|
2021-09-17 13:11:07 -07:00
|
|
|
pub type Age = u8;
|
2021-09-12 15:14:59 -07:00
|
|
|
|
2021-09-22 07:39:49 -07:00
|
|
|
pub const AGE_MS: u64 = SLOT_MS; // match one age per slot time
|
2021-09-20 12:29:34 -07:00
|
|
|
|
2021-09-14 15:51:07 -07:00
|
|
|
pub struct BucketMapHolder<T: IndexValue> {
|
2021-09-20 06:40:10 -07:00
|
|
|
pub disk: Option<BucketMap<SlotT<T>>>,
|
|
|
|
|
2021-09-17 13:11:07 -07:00
|
|
|
pub count_ages_flushed: AtomicUsize,
|
|
|
|
pub age: AtomicU8,
|
2021-09-12 15:14:59 -07:00
|
|
|
pub stats: BucketMapHolderStats,
|
2021-09-17 08:41:30 -07:00
|
|
|
|
2021-09-20 12:29:34 -07:00
|
|
|
age_timer: AtomicInterval,
|
|
|
|
|
2021-09-17 08:41:30 -07:00
|
|
|
// used by bg processing to know when any bucket has become dirty
|
2021-09-20 07:58:20 -07:00
|
|
|
pub wait_dirty_or_aged: WaitableCondvar,
|
2021-09-17 08:41:30 -07:00
|
|
|
next_bucket_to_flush: Mutex<usize>,
|
|
|
|
bins: usize,
|
2021-09-18 10:55:57 -07:00
|
|
|
|
2021-09-23 17:56:44 -07:00
|
|
|
_threads: usize,
|
|
|
|
|
2021-09-19 16:00:15 -07:00
|
|
|
// how much mb are we allowed to keep in the in-mem index?
|
|
|
|
// Rest goes to disk.
|
|
|
|
pub mem_budget_mb: Option<usize>,
|
2021-09-19 18:22:09 -07:00
|
|
|
ages_to_stay_in_cache: Age,
|
2021-09-19 16:00:15 -07:00
|
|
|
|
2021-09-18 10:55:57 -07:00
|
|
|
/// startup is a special time for flush to focus on moving everything to disk as fast and efficiently as possible
|
|
|
|
/// with less thread count limitations. LRU and access patterns are not important. Freeing memory
|
|
|
|
/// and writing to disk in parallel are.
|
|
|
|
/// Note startup is an optimization and is not required for correctness.
|
|
|
|
startup: AtomicBool,
|
2021-09-12 15:14:59 -07:00
|
|
|
}
|
|
|
|
|
2021-09-15 07:54:16 -07:00
|
|
|
impl<T: IndexValue> Debug for BucketMapHolder<T> {
|
|
|
|
fn fmt(&self, _f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-17 08:41:30 -07:00
|
|
|
#[allow(clippy::mutex_atomic)]
|
2021-09-14 15:51:07 -07:00
|
|
|
impl<T: IndexValue> BucketMapHolder<T> {
|
2021-09-17 13:11:07 -07:00
|
|
|
pub fn increment_age(&self) {
|
2021-09-21 08:52:39 -07:00
|
|
|
// since we are about to change age, there are now 0 buckets that have been flushed at this age
|
|
|
|
// this should happen before the age.fetch_add
|
|
|
|
let previous = self.count_ages_flushed.swap(0, Ordering::Acquire);
|
2021-09-17 13:11:07 -07:00
|
|
|
// fetch_add is defined to wrap.
|
|
|
|
// That's what we want. 0..255, then back to 0.
|
2021-09-21 08:52:39 -07:00
|
|
|
self.age.fetch_add(1, Ordering::Release);
|
2021-09-17 13:11:07 -07:00
|
|
|
assert!(previous >= self.bins); // we should not have increased age before previous age was fully flushed
|
2021-09-20 07:58:20 -07:00
|
|
|
self.wait_dirty_or_aged.notify_all(); // notify all because we can age scan in parallel
|
2021-09-17 13:11:07 -07:00
|
|
|
}
|
|
|
|
|
2021-09-19 18:22:09 -07:00
|
|
|
pub fn future_age_to_flush(&self) -> Age {
|
|
|
|
self.current_age().wrapping_add(self.ages_to_stay_in_cache)
|
|
|
|
}
|
2021-09-20 12:29:34 -07:00
|
|
|
|
|
|
|
fn has_age_interval_elapsed(&self) -> bool {
|
|
|
|
// note that when this returns true, state of age_timer is modified
|
|
|
|
self.age_timer.should_update(AGE_MS)
|
|
|
|
}
|
|
|
|
|
2021-09-18 10:55:57 -07:00
|
|
|
/// used by bg processes to determine # active threads and how aggressively to flush
|
|
|
|
pub fn get_startup(&self) -> bool {
|
|
|
|
self.startup.load(Ordering::Relaxed)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_startup(&self, value: bool) {
|
2021-09-18 20:08:58 -07:00
|
|
|
if !value {
|
|
|
|
self.wait_for_idle();
|
|
|
|
}
|
2021-09-18 10:55:57 -07:00
|
|
|
self.startup.store(value, Ordering::Relaxed)
|
|
|
|
}
|
|
|
|
|
2021-09-18 20:08:58 -07:00
|
|
|
pub(crate) fn wait_for_idle(&self) {
|
|
|
|
assert!(self.get_startup());
|
|
|
|
}
|
|
|
|
|
2021-09-17 13:11:07 -07:00
|
|
|
pub fn current_age(&self) -> Age {
|
2021-09-21 08:52:39 -07:00
|
|
|
self.age.load(Ordering::Acquire)
|
2021-09-17 13:11:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn bucket_flushed_at_current_age(&self) {
|
2021-09-21 08:52:39 -07:00
|
|
|
self.count_ages_flushed.fetch_add(1, Ordering::Release);
|
|
|
|
self.maybe_advance_age();
|
2021-09-17 13:11:07 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// have all buckets been flushed at the current age?
|
|
|
|
pub fn all_buckets_flushed_at_current_age(&self) -> bool {
|
2021-09-21 06:40:55 -07:00
|
|
|
self.count_ages_flushed() >= self.bins
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn count_ages_flushed(&self) -> usize {
|
2021-09-21 08:52:39 -07:00
|
|
|
self.count_ages_flushed.load(Ordering::Acquire)
|
2021-09-17 13:11:07 -07:00
|
|
|
}
|
|
|
|
|
2021-09-20 12:29:34 -07:00
|
|
|
pub fn maybe_advance_age(&self) -> bool {
|
|
|
|
// check has_age_interval_elapsed last as calling it modifies state on success
|
|
|
|
if self.all_buckets_flushed_at_current_age() && self.has_age_interval_elapsed() {
|
|
|
|
self.increment_age();
|
|
|
|
true
|
|
|
|
} else {
|
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-23 17:56:44 -07:00
|
|
|
pub fn new(bins: usize, config: &Option<AccountsIndexConfig>, threads: usize) -> Self {
|
2021-09-19 18:22:09 -07:00
|
|
|
const DEFAULT_AGE_TO_STAY_IN_CACHE: Age = 5;
|
|
|
|
let ages_to_stay_in_cache = config
|
|
|
|
.as_ref()
|
|
|
|
.and_then(|config| config.ages_to_stay_in_cache)
|
|
|
|
.unwrap_or(DEFAULT_AGE_TO_STAY_IN_CACHE);
|
2021-09-20 06:40:10 -07:00
|
|
|
|
|
|
|
let mut bucket_config = BucketMapConfig::new(bins);
|
|
|
|
bucket_config.drives = config.as_ref().and_then(|config| config.drives.clone());
|
|
|
|
let mem_budget_mb = config.as_ref().and_then(|config| config.index_limit_mb);
|
|
|
|
// only allocate if mem_budget_mb is Some
|
|
|
|
let disk = mem_budget_mb.map(|_| BucketMap::new(bucket_config));
|
2021-09-15 11:07:53 -07:00
|
|
|
Self {
|
2021-09-20 06:40:10 -07:00
|
|
|
disk,
|
2021-09-19 18:22:09 -07:00
|
|
|
ages_to_stay_in_cache,
|
2021-09-17 13:11:07 -07:00
|
|
|
count_ages_flushed: AtomicUsize::default(),
|
|
|
|
age: AtomicU8::default(),
|
2021-09-22 06:55:07 -07:00
|
|
|
stats: BucketMapHolderStats::new(bins),
|
2021-09-20 07:58:20 -07:00
|
|
|
wait_dirty_or_aged: WaitableCondvar::default(),
|
2021-09-17 08:41:30 -07:00
|
|
|
next_bucket_to_flush: Mutex::new(0),
|
2021-09-20 12:29:34 -07:00
|
|
|
age_timer: AtomicInterval::default(),
|
2021-09-17 08:41:30 -07:00
|
|
|
bins,
|
2021-09-18 10:55:57 -07:00
|
|
|
startup: AtomicBool::default(),
|
2021-09-20 06:40:10 -07:00
|
|
|
mem_budget_mb,
|
2021-09-23 17:56:44 -07:00
|
|
|
_threads: threads,
|
2021-09-15 11:07:53 -07:00
|
|
|
}
|
2021-09-12 15:14:59 -07:00
|
|
|
}
|
2021-09-17 08:41:30 -07:00
|
|
|
|
|
|
|
// get the next bucket to flush, with the idea that the previous bucket
|
|
|
|
// is perhaps being flushed by another thread already.
|
|
|
|
pub fn next_bucket_to_flush(&self) -> usize {
|
|
|
|
// could be lock-free as an optimization
|
|
|
|
// wrapping is tricky
|
|
|
|
let mut lock = self.next_bucket_to_flush.lock().unwrap();
|
|
|
|
let result = *lock;
|
|
|
|
*lock = (result + 1) % self.bins;
|
|
|
|
result
|
|
|
|
}
|
2021-09-22 10:40:30 -07:00
|
|
|
|
|
|
|
// intended to execute in a bg thread
|
|
|
|
pub fn background(&self, exit: Arc<AtomicBool>, in_mem: Vec<Arc<InMemAccountsIndex<T>>>) {
|
|
|
|
let bins = in_mem.len();
|
|
|
|
let flush = self.disk.is_some();
|
|
|
|
loop {
|
2021-09-24 11:19:06 -07:00
|
|
|
if self.all_buckets_flushed_at_current_age() {
|
2021-09-24 14:00:41 -07:00
|
|
|
let wait = std::cmp::min(
|
|
|
|
self.age_timer.remaining_until_next_interval(AGE_MS),
|
|
|
|
self.stats.remaining_until_next_interval(),
|
|
|
|
);
|
|
|
|
|
2021-09-24 11:19:06 -07:00
|
|
|
let mut m = Measure::start("wait");
|
|
|
|
self.wait_dirty_or_aged
|
2021-09-24 14:00:41 -07:00
|
|
|
.wait_timeout(Duration::from_millis(wait));
|
2021-09-24 11:19:06 -07:00
|
|
|
m.stop();
|
|
|
|
self.stats
|
|
|
|
.bg_waiting_us
|
|
|
|
.fetch_add(m.as_us(), Ordering::Relaxed);
|
|
|
|
}
|
2021-09-23 11:37:14 -07:00
|
|
|
|
2021-09-22 10:40:30 -07:00
|
|
|
if exit.load(Ordering::Relaxed) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
self.stats.active_threads.fetch_add(1, Ordering::Relaxed);
|
|
|
|
for _ in 0..bins {
|
|
|
|
if flush {
|
|
|
|
let index = self.next_bucket_to_flush();
|
|
|
|
in_mem[index].flush();
|
|
|
|
}
|
|
|
|
self.stats.report_stats(self);
|
2021-09-24 11:19:06 -07:00
|
|
|
if self.all_buckets_flushed_at_current_age() {
|
|
|
|
break;
|
|
|
|
}
|
2021-09-22 10:40:30 -07:00
|
|
|
}
|
|
|
|
self.stats.active_threads.fetch_sub(1, Ordering::Relaxed);
|
|
|
|
}
|
|
|
|
}
|
2021-09-17 08:41:30 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
pub mod tests {
|
|
|
|
use super::*;
|
|
|
|
use rayon::prelude::*;
|
|
|
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
2021-09-20 12:29:34 -07:00
|
|
|
use std::time::Instant;
|
2021-09-17 08:41:30 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_next_bucket_to_flush() {
|
|
|
|
solana_logger::setup();
|
|
|
|
let bins = 4;
|
2021-09-23 17:56:44 -07:00
|
|
|
let test = BucketMapHolder::<u64>::new(bins, &Some(AccountsIndexConfig::default()), 1);
|
2021-09-17 08:41:30 -07:00
|
|
|
let visited = (0..bins)
|
|
|
|
.into_iter()
|
|
|
|
.map(|_| AtomicUsize::default())
|
|
|
|
.collect::<Vec<_>>();
|
|
|
|
let iterations = bins * 30;
|
|
|
|
let threads = bins * 4;
|
|
|
|
let expected = threads * iterations / bins;
|
|
|
|
|
|
|
|
(0..threads).into_par_iter().for_each(|_| {
|
|
|
|
(0..iterations).into_iter().for_each(|_| {
|
|
|
|
let bin = test.next_bucket_to_flush();
|
|
|
|
visited[bin].fetch_add(1, Ordering::Relaxed);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
visited.iter().enumerate().for_each(|(bin, visited)| {
|
|
|
|
assert_eq!(visited.load(Ordering::Relaxed), expected, "bin: {}", bin)
|
|
|
|
});
|
|
|
|
}
|
2021-09-17 13:11:07 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_age_increment() {
|
|
|
|
solana_logger::setup();
|
|
|
|
let bins = 4;
|
2021-09-23 17:56:44 -07:00
|
|
|
let test = BucketMapHolder::<u64>::new(bins, &Some(AccountsIndexConfig::default()), 1);
|
2021-09-17 13:11:07 -07:00
|
|
|
for age in 0..513 {
|
|
|
|
assert_eq!(test.current_age(), (age % 256) as Age);
|
|
|
|
|
|
|
|
// inc all
|
|
|
|
for _ in 0..bins {
|
|
|
|
assert!(!test.all_buckets_flushed_at_current_age());
|
2021-09-21 08:52:39 -07:00
|
|
|
// cannot call this because based on timing, it may fire: test.bucket_flushed_at_current_age();
|
2021-09-17 13:11:07 -07:00
|
|
|
}
|
|
|
|
|
2021-09-21 08:52:39 -07:00
|
|
|
// this would normally happen once time went off and all buckets had been flushed at the previous age
|
|
|
|
test.count_ages_flushed.fetch_add(bins, Ordering::Release);
|
2021-09-17 13:11:07 -07:00
|
|
|
test.increment_age();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-20 12:29:34 -07:00
|
|
|
#[test]
|
|
|
|
fn test_age_time() {
|
|
|
|
solana_logger::setup();
|
|
|
|
let bins = 1;
|
2021-09-23 17:56:44 -07:00
|
|
|
let test = BucketMapHolder::<u64>::new(bins, &Some(AccountsIndexConfig::default()), 1);
|
2021-09-20 12:29:34 -07:00
|
|
|
let threads = 2;
|
|
|
|
let time = AGE_MS * 5 / 2;
|
|
|
|
let expected = (time / AGE_MS) as Age;
|
|
|
|
let now = Instant::now();
|
|
|
|
test.bucket_flushed_at_current_age(); // done with age 0
|
|
|
|
(0..threads).into_par_iter().for_each(|_| {
|
|
|
|
while now.elapsed().as_millis() < (time as u128) {
|
|
|
|
if test.maybe_advance_age() {
|
|
|
|
test.bucket_flushed_at_current_age();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
assert_eq!(test.current_age(), expected);
|
|
|
|
}
|
|
|
|
|
2021-09-17 13:11:07 -07:00
|
|
|
#[test]
|
|
|
|
fn test_age_broad() {
|
|
|
|
solana_logger::setup();
|
|
|
|
let bins = 4;
|
2021-09-23 17:56:44 -07:00
|
|
|
let test = BucketMapHolder::<u64>::new(bins, &Some(AccountsIndexConfig::default()), 1);
|
2021-09-17 13:11:07 -07:00
|
|
|
assert_eq!(test.current_age(), 0);
|
2021-09-21 08:52:39 -07:00
|
|
|
for _ in 0..bins {
|
2021-09-17 13:11:07 -07:00
|
|
|
assert!(!test.all_buckets_flushed_at_current_age());
|
2021-09-21 08:52:39 -07:00
|
|
|
test.bucket_flushed_at_current_age();
|
2021-09-17 13:11:07 -07:00
|
|
|
}
|
2021-09-21 08:52:39 -07:00
|
|
|
std::thread::sleep(std::time::Duration::from_millis(AGE_MS * 2));
|
|
|
|
test.maybe_advance_age();
|
2021-09-17 13:11:07 -07:00
|
|
|
assert_eq!(test.current_age(), 1);
|
|
|
|
assert!(!test.all_buckets_flushed_at_current_age());
|
|
|
|
}
|
2021-09-12 15:14:59 -07:00
|
|
|
}
|