421 lines
13 KiB
Rust
421 lines
13 KiB
Rust
use {
|
|
crate::{bucket_stats::BucketStats, MaxSearch},
|
|
memmap2::MmapMut,
|
|
rand::{thread_rng, Rng},
|
|
solana_measure::measure::Measure,
|
|
std::{
|
|
fs::{remove_file, OpenOptions},
|
|
io::{Seek, SeekFrom, Write},
|
|
path::PathBuf,
|
|
sync::{
|
|
atomic::{AtomicU64, Ordering},
|
|
Arc,
|
|
},
|
|
},
|
|
};
|
|
|
|
/*
|
|
1 2
|
|
2 4
|
|
3 8
|
|
4 16
|
|
5 32
|
|
6 64
|
|
7 128
|
|
8 256
|
|
9 512
|
|
10 1,024
|
|
11 2,048
|
|
12 4,096
|
|
13 8,192
|
|
14 16,384
|
|
23 8,388,608
|
|
24 16,777,216
|
|
*/
|
|
pub const DEFAULT_CAPACITY_POW2: u8 = 5;
|
|
|
|
/// A Header UID of 0 indicates that the header is unlocked
|
|
const UID_UNLOCKED: Uid = 0;
|
|
|
|
pub(crate) type Uid = u64;
|
|
|
|
#[repr(C)]
|
|
struct Header {
|
|
lock: u64,
|
|
}
|
|
|
|
impl Header {
|
|
/// try to lock this entry with 'uid'
|
|
/// return true if it could be locked
|
|
fn try_lock(&mut self, uid: Uid) -> bool {
|
|
if self.lock == UID_UNLOCKED {
|
|
self.lock = uid;
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
/// mark this entry as unlocked
|
|
fn unlock(&mut self, expected: Uid) {
|
|
assert_eq!(expected, self.lock);
|
|
self.lock = UID_UNLOCKED;
|
|
}
|
|
/// uid that has locked this entry or None if unlocked
|
|
fn uid(&self) -> Option<Uid> {
|
|
if self.lock == UID_UNLOCKED {
|
|
None
|
|
} else {
|
|
Some(self.lock)
|
|
}
|
|
}
|
|
/// true if this entry is unlocked
|
|
fn is_unlocked(&self) -> bool {
|
|
self.lock == UID_UNLOCKED
|
|
}
|
|
}
|
|
|
|
pub struct BucketStorage {
|
|
path: PathBuf,
|
|
mmap: MmapMut,
|
|
pub cell_size: u64,
|
|
pub capacity_pow2: u8,
|
|
pub count: Arc<AtomicU64>,
|
|
pub stats: Arc<BucketStats>,
|
|
pub max_search: MaxSearch,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum BucketStorageError {
|
|
AlreadyAllocated,
|
|
}
|
|
|
|
impl Drop for BucketStorage {
|
|
fn drop(&mut self) {
|
|
let _ = remove_file(&self.path);
|
|
}
|
|
}
|
|
|
|
impl BucketStorage {
|
|
pub fn new_with_capacity(
|
|
drives: Arc<Vec<PathBuf>>,
|
|
num_elems: u64,
|
|
elem_size: u64,
|
|
capacity_pow2: u8,
|
|
max_search: MaxSearch,
|
|
stats: Arc<BucketStats>,
|
|
count: Arc<AtomicU64>,
|
|
) -> Self {
|
|
let cell_size = elem_size * num_elems + std::mem::size_of::<Header>() as u64;
|
|
let (mmap, path) = Self::new_map(&drives, cell_size as usize, capacity_pow2, &stats);
|
|
Self {
|
|
path,
|
|
mmap,
|
|
cell_size,
|
|
count,
|
|
capacity_pow2,
|
|
stats,
|
|
max_search,
|
|
}
|
|
}
|
|
|
|
pub fn max_search(&self) -> u64 {
|
|
self.max_search as u64
|
|
}
|
|
|
|
pub fn new(
|
|
drives: Arc<Vec<PathBuf>>,
|
|
num_elems: u64,
|
|
elem_size: u64,
|
|
max_search: MaxSearch,
|
|
stats: Arc<BucketStats>,
|
|
count: Arc<AtomicU64>,
|
|
) -> Self {
|
|
Self::new_with_capacity(
|
|
drives,
|
|
num_elems,
|
|
elem_size,
|
|
DEFAULT_CAPACITY_POW2,
|
|
max_search,
|
|
stats,
|
|
count,
|
|
)
|
|
}
|
|
|
|
/// return ref to header of item 'ix' in mmapped file
|
|
fn header_ptr(&self, ix: u64) -> &Header {
|
|
let ix = (ix * self.cell_size) as usize;
|
|
let hdr_slice: &[u8] = &self.mmap[ix..ix + std::mem::size_of::<Header>()];
|
|
unsafe {
|
|
let hdr = hdr_slice.as_ptr() as *const Header;
|
|
hdr.as_ref().unwrap()
|
|
}
|
|
}
|
|
|
|
/// return ref to header of item 'ix' in mmapped file
|
|
#[allow(clippy::mut_from_ref)]
|
|
fn header_mut_ptr(&self, ix: u64) -> &mut Header {
|
|
let ix = (ix * self.cell_size) as usize;
|
|
let hdr_slice: &[u8] = &self.mmap[ix..ix + std::mem::size_of::<Header>()];
|
|
unsafe {
|
|
let hdr = hdr_slice.as_ptr() as *mut Header;
|
|
hdr.as_mut().unwrap()
|
|
}
|
|
}
|
|
|
|
/// return uid allocated at index 'ix' or None if vacant
|
|
pub fn uid(&self, ix: u64) -> Option<Uid> {
|
|
assert!(ix < self.capacity(), "bad index size");
|
|
self.header_ptr(ix).uid()
|
|
}
|
|
|
|
/// true if the entry at index 'ix' is free (as opposed to being allocated)
|
|
pub fn is_free(&self, ix: u64) -> bool {
|
|
// note that the terminology in the implementation is locked or unlocked.
|
|
// but our api is allocate/free
|
|
self.header_ptr(ix).is_unlocked()
|
|
}
|
|
|
|
/// caller knows id is not empty
|
|
pub fn uid_unchecked(&self, ix: u64) -> Uid {
|
|
self.uid(ix).unwrap()
|
|
}
|
|
|
|
/// 'is_resizing' true if caller is resizing the index (so don't increment count)
|
|
/// 'is_resizing' false if caller is adding an item to the index (so increment count)
|
|
pub fn allocate(&self, ix: u64, uid: Uid, is_resizing: bool) -> Result<(), BucketStorageError> {
|
|
assert!(ix < self.capacity(), "allocate: bad index size");
|
|
assert!(UID_UNLOCKED != uid, "allocate: bad uid");
|
|
let mut e = Err(BucketStorageError::AlreadyAllocated);
|
|
//debug!("ALLOC {} {}", ix, uid);
|
|
if self.header_mut_ptr(ix).try_lock(uid) {
|
|
e = Ok(());
|
|
if !is_resizing {
|
|
self.count.fetch_add(1, Ordering::Relaxed);
|
|
}
|
|
}
|
|
e
|
|
}
|
|
|
|
pub fn free(&mut self, ix: u64, uid: Uid) {
|
|
assert!(ix < self.capacity(), "bad index size");
|
|
assert!(UID_UNLOCKED != uid, "free: bad uid");
|
|
self.header_mut_ptr(ix).unlock(uid);
|
|
self.count.fetch_sub(1, Ordering::Relaxed);
|
|
}
|
|
|
|
pub fn get<T: Sized>(&self, ix: u64) -> &T {
|
|
assert!(ix < self.capacity(), "bad index size");
|
|
let start = (ix * self.cell_size) as usize + std::mem::size_of::<Header>();
|
|
let end = start + std::mem::size_of::<T>();
|
|
let item_slice: &[u8] = &self.mmap[start..end];
|
|
unsafe {
|
|
let item = item_slice.as_ptr() as *const T;
|
|
&*item
|
|
}
|
|
}
|
|
|
|
pub fn get_empty_cell_slice<T: Sized>(&self) -> &[T] {
|
|
let len = 0;
|
|
let item_slice: &[u8] = &self.mmap[0..0];
|
|
unsafe {
|
|
let item = item_slice.as_ptr() as *const T;
|
|
std::slice::from_raw_parts(item, len as usize)
|
|
}
|
|
}
|
|
|
|
pub fn get_cell_slice<T: Sized>(&self, ix: u64, len: u64) -> &[T] {
|
|
assert!(ix < self.capacity(), "bad index size");
|
|
let ix = self.cell_size * ix;
|
|
let start = ix as usize + std::mem::size_of::<Header>();
|
|
let end = start + std::mem::size_of::<T>() * len as usize;
|
|
//debug!("GET slice {} {}", start, end);
|
|
let item_slice: &[u8] = &self.mmap[start..end];
|
|
unsafe {
|
|
let item = item_slice.as_ptr() as *const T;
|
|
std::slice::from_raw_parts(item, len as usize)
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::mut_from_ref)]
|
|
pub fn get_mut<T: Sized>(&self, ix: u64) -> &mut T {
|
|
assert!(ix < self.capacity(), "bad index size");
|
|
let start = (ix * self.cell_size) as usize + std::mem::size_of::<Header>();
|
|
let end = start + std::mem::size_of::<T>();
|
|
let item_slice: &[u8] = &self.mmap[start..end];
|
|
unsafe {
|
|
let item = item_slice.as_ptr() as *mut T;
|
|
&mut *item
|
|
}
|
|
}
|
|
|
|
#[allow(clippy::mut_from_ref)]
|
|
pub fn get_mut_cell_slice<T: Sized>(&self, ix: u64, len: u64) -> &mut [T] {
|
|
assert!(ix < self.capacity(), "bad index size");
|
|
let ix = self.cell_size * ix;
|
|
let start = ix as usize + std::mem::size_of::<Header>();
|
|
let end = start + std::mem::size_of::<T>() * len as usize;
|
|
//debug!("GET mut slice {} {}", start, end);
|
|
let item_slice: &[u8] = &self.mmap[start..end];
|
|
unsafe {
|
|
let item = item_slice.as_ptr() as *mut T;
|
|
std::slice::from_raw_parts_mut(item, len as usize)
|
|
}
|
|
}
|
|
|
|
fn new_map(
|
|
drives: &[PathBuf],
|
|
cell_size: usize,
|
|
capacity_pow2: u8,
|
|
stats: &BucketStats,
|
|
) -> (MmapMut, PathBuf) {
|
|
let mut measure_new_file = Measure::start("measure_new_file");
|
|
let capacity = 1u64 << capacity_pow2;
|
|
let r = thread_rng().gen_range(0, drives.len());
|
|
let drive = &drives[r];
|
|
let pos = format!("{}", thread_rng().gen_range(0, u128::MAX),);
|
|
let file = drive.join(pos);
|
|
let mut data = OpenOptions::new()
|
|
.read(true)
|
|
.write(true)
|
|
.create(true)
|
|
.open(file.clone())
|
|
.map_err(|e| {
|
|
panic!(
|
|
"Unable to create data file {} in current dir({:?}): {:?}",
|
|
file.display(),
|
|
std::env::current_dir(),
|
|
e
|
|
);
|
|
})
|
|
.unwrap();
|
|
|
|
// Theoretical performance optimization: write a zero to the end of
|
|
// the file so that we won't have to resize it later, which may be
|
|
// expensive.
|
|
//debug!("GROWING file {}", capacity * cell_size as u64);
|
|
data.seek(SeekFrom::Start(capacity * cell_size as u64 - 1))
|
|
.unwrap();
|
|
data.write_all(&[0]).unwrap();
|
|
data.seek(SeekFrom::Start(0)).unwrap();
|
|
measure_new_file.stop();
|
|
let mut measure_flush = Measure::start("measure_flush");
|
|
data.flush().unwrap(); // can we skip this?
|
|
measure_flush.stop();
|
|
let mut measure_mmap = Measure::start("measure_mmap");
|
|
let res = (unsafe { MmapMut::map_mut(&data).unwrap() }, file);
|
|
measure_mmap.stop();
|
|
stats
|
|
.new_file_us
|
|
.fetch_add(measure_new_file.as_us(), Ordering::Relaxed);
|
|
stats
|
|
.flush_file_us
|
|
.fetch_add(measure_flush.as_us(), Ordering::Relaxed);
|
|
stats
|
|
.mmap_us
|
|
.fetch_add(measure_mmap.as_us(), Ordering::Relaxed);
|
|
res
|
|
}
|
|
|
|
/// copy contents from 'old_bucket' to 'self'
|
|
fn copy_contents(&mut self, old_bucket: &Self) {
|
|
let mut m = Measure::start("grow");
|
|
let old_cap = old_bucket.capacity();
|
|
let old_map = &old_bucket.mmap;
|
|
|
|
let increment = self.capacity_pow2 - old_bucket.capacity_pow2;
|
|
let index_grow = 1 << increment;
|
|
(0..old_cap as usize).into_iter().for_each(|i| {
|
|
let old_ix = i * old_bucket.cell_size as usize;
|
|
let new_ix = old_ix * index_grow;
|
|
let dst_slice: &[u8] = &self.mmap[new_ix..new_ix + old_bucket.cell_size as usize];
|
|
let src_slice: &[u8] = &old_map[old_ix..old_ix + old_bucket.cell_size as usize];
|
|
|
|
unsafe {
|
|
let dst = dst_slice.as_ptr() as *mut u8;
|
|
let src = src_slice.as_ptr() as *const u8;
|
|
std::ptr::copy_nonoverlapping(src, dst, old_bucket.cell_size as usize);
|
|
};
|
|
});
|
|
m.stop();
|
|
self.stats.resizes.fetch_add(1, Ordering::Relaxed);
|
|
self.stats.resize_us.fetch_add(m.as_us(), Ordering::Relaxed);
|
|
}
|
|
|
|
/// allocate a new bucket, copying data from 'bucket'
|
|
pub fn new_resized(
|
|
drives: &Arc<Vec<PathBuf>>,
|
|
max_search: MaxSearch,
|
|
bucket: Option<&Self>,
|
|
capacity_pow_2: u8,
|
|
num_elems: u64,
|
|
elem_size: u64,
|
|
stats: &Arc<BucketStats>,
|
|
) -> Self {
|
|
let mut new_bucket = Self::new_with_capacity(
|
|
Arc::clone(drives),
|
|
num_elems,
|
|
elem_size,
|
|
capacity_pow_2,
|
|
max_search,
|
|
Arc::clone(stats),
|
|
bucket
|
|
.map(|bucket| Arc::clone(&bucket.count))
|
|
.unwrap_or_default(),
|
|
);
|
|
if let Some(bucket) = bucket {
|
|
new_bucket.copy_contents(bucket);
|
|
}
|
|
let sz = new_bucket.capacity();
|
|
{
|
|
let mut max = new_bucket.stats.max_size.lock().unwrap();
|
|
*max = std::cmp::max(*max, sz);
|
|
}
|
|
new_bucket
|
|
}
|
|
|
|
/// Return the number of cells currently allocated
|
|
pub fn capacity(&self) -> u64 {
|
|
1 << self.capacity_pow2
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_bucket_storage() {
|
|
let tmpdir1 = std::env::temp_dir().join("bucket_map_test_mt");
|
|
let paths: Vec<PathBuf> = [tmpdir1]
|
|
.iter()
|
|
.filter(|x| std::fs::create_dir_all(x).is_ok())
|
|
.cloned()
|
|
.collect();
|
|
assert!(!paths.is_empty());
|
|
|
|
let mut storage =
|
|
BucketStorage::new(Arc::new(paths), 1, 1, 1, Arc::default(), Arc::default());
|
|
let ix = 0;
|
|
let uid = Uid::MAX;
|
|
assert!(storage.is_free(ix));
|
|
assert!(storage.allocate(ix, uid, false).is_ok());
|
|
assert!(storage.allocate(ix, uid, false).is_err());
|
|
assert!(!storage.is_free(ix));
|
|
assert_eq!(storage.uid(ix), Some(uid));
|
|
assert_eq!(storage.uid_unchecked(ix), uid);
|
|
storage.free(ix, uid);
|
|
assert!(storage.is_free(ix));
|
|
assert_eq!(storage.uid(ix), None);
|
|
let uid = 1;
|
|
assert!(storage.is_free(ix));
|
|
assert!(storage.allocate(ix, uid, false).is_ok());
|
|
assert!(storage.allocate(ix, uid, false).is_err());
|
|
assert!(!storage.is_free(ix));
|
|
assert_eq!(storage.uid(ix), Some(uid));
|
|
assert_eq!(storage.uid_unchecked(ix), uid);
|
|
storage.free(ix, uid);
|
|
assert!(storage.is_free(ix));
|
|
assert_eq!(storage.uid(ix), None);
|
|
}
|
|
}
|