2022-01-12 12:37:34 -08:00
|
|
|
#![allow(dead_code)]
|
2022-04-14 20:22:08 -07:00
|
|
|
|
2021-12-03 09:00:31 -08:00
|
|
|
use {
|
2023-03-27 11:07:02 -07:00
|
|
|
crate::{
|
|
|
|
bucket_storage::{BucketOccupied, BucketStorage},
|
|
|
|
RefCount,
|
|
|
|
},
|
2023-03-28 12:17:28 -07:00
|
|
|
bv::BitVec,
|
2022-01-12 12:37:34 -08:00
|
|
|
modular_bitfield::prelude::*,
|
2021-12-03 09:00:31 -08:00
|
|
|
solana_sdk::{clock::Slot, pubkey::Pubkey},
|
2023-03-22 06:30:44 -07:00
|
|
|
std::fmt::Debug,
|
2021-12-03 09:00:31 -08:00
|
|
|
};
|
2021-09-17 13:11:27 -07:00
|
|
|
|
2023-03-27 11:07:02 -07:00
|
|
|
/// in use/occupied
|
|
|
|
const OCCUPIED_OCCUPIED: u64 = 1;
|
|
|
|
/// free, ie. not occupied
|
|
|
|
const OCCUPIED_FREE: u64 = 0;
|
|
|
|
|
|
|
|
/// header for elements in a bucket
|
|
|
|
/// needs to be multiple of size_of::<u64>()
|
|
|
|
#[repr(C)]
|
|
|
|
struct OccupiedHeader {
|
|
|
|
/// OCCUPIED_OCCUPIED or OCCUPIED_FREE
|
|
|
|
occupied: u64,
|
|
|
|
}
|
|
|
|
|
|
|
|
/// allocated in `contents` in a BucketStorage
|
2023-03-28 12:17:28 -07:00
|
|
|
pub struct BucketWithBitVec {
|
|
|
|
pub occupied: BitVec,
|
|
|
|
}
|
2023-03-27 11:07:02 -07:00
|
|
|
|
2023-03-28 12:17:28 -07:00
|
|
|
impl BucketOccupied for BucketWithBitVec {
|
|
|
|
fn occupy(&mut self, element: &mut [u8], ix: usize) {
|
|
|
|
assert!(self.is_free(element, ix));
|
|
|
|
self.occupied.set(ix as u64, true);
|
2023-03-27 11:07:02 -07:00
|
|
|
}
|
2023-03-28 12:17:28 -07:00
|
|
|
fn free(&mut self, element: &mut [u8], ix: usize) {
|
|
|
|
assert!(!self.is_free(element, ix));
|
|
|
|
self.occupied.set(ix as u64, false);
|
2023-03-27 11:07:02 -07:00
|
|
|
}
|
2023-03-28 12:17:28 -07:00
|
|
|
fn is_free(&self, _element: &[u8], ix: usize) -> bool {
|
|
|
|
!self.occupied.get(ix as u64)
|
2023-03-27 11:07:02 -07:00
|
|
|
}
|
|
|
|
fn offset_to_first_data() -> usize {
|
2023-03-28 12:17:28 -07:00
|
|
|
// no header, nothing stored in data stream
|
|
|
|
0
|
2023-03-27 11:07:02 -07:00
|
|
|
}
|
2023-03-28 12:17:28 -07:00
|
|
|
fn new(num_elements: usize) -> Self {
|
|
|
|
Self {
|
|
|
|
occupied: BitVec::new_fill(false, num_elements as u64),
|
|
|
|
}
|
2023-03-27 11:07:02 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-28 12:17:28 -07:00
|
|
|
pub type DataBucket = BucketWithBitVec;
|
|
|
|
pub type IndexBucket = BucketWithBitVec;
|
2023-03-27 11:07:02 -07:00
|
|
|
|
2023-03-29 13:41:10 -07:00
|
|
|
/// contains the index of an entry in the index bucket.
|
|
|
|
/// This type allows us to call methods to interact with the index entry on this type.
|
|
|
|
pub struct IndexEntryPlaceInBucket {
|
|
|
|
pub ix: u64,
|
|
|
|
}
|
|
|
|
|
2021-09-17 13:11:27 -07:00
|
|
|
#[repr(C)]
|
2022-05-22 18:00:42 -07:00
|
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
2021-09-17 13:11:27 -07:00
|
|
|
// one instance of this per item in the index
|
|
|
|
// stored in the index bucket
|
|
|
|
pub struct IndexEntry {
|
|
|
|
pub key: Pubkey, // can this be smaller if we have reduced the keys into buckets already?
|
2023-03-29 13:41:10 -07:00
|
|
|
ref_count: RefCount, // can this be smaller? Do we ever need more than 4B refcounts?
|
2022-01-12 12:37:34 -08:00
|
|
|
storage_cap_and_offset: PackedStorage,
|
2021-09-17 13:11:27 -07:00
|
|
|
// if the bucket doubled, the index can be recomputed using create_bucket_capacity_pow2
|
2023-03-29 13:41:10 -07:00
|
|
|
num_slots: Slot, // can this be smaller? epoch size should ~ be the max len. this is the num elements in the slot list
|
2021-09-17 13:11:27 -07:00
|
|
|
}
|
|
|
|
|
2022-01-12 12:37:34 -08:00
|
|
|
/// Pack the storage offset and capacity-when-crated-pow2 fields into a single u64
|
|
|
|
#[bitfield(bits = 64)]
|
|
|
|
#[repr(C)]
|
|
|
|
#[derive(Debug, Default, Copy, Clone, Eq, PartialEq)]
|
|
|
|
struct PackedStorage {
|
|
|
|
capacity_when_created_pow2: B8,
|
|
|
|
offset: B56,
|
|
|
|
}
|
|
|
|
|
2021-09-17 13:11:27 -07:00
|
|
|
impl IndexEntry {
|
2022-04-14 20:22:08 -07:00
|
|
|
/// return closest bucket index fit for the slot slice.
|
|
|
|
/// Since bucket size is 2^index, the return value is
|
|
|
|
/// min index, such that 2^index >= num_slots
|
|
|
|
/// index = ceiling(log2(num_slots))
|
|
|
|
/// special case, when slot slice empty, return 0th index.
|
2021-09-17 13:11:27 -07:00
|
|
|
pub fn data_bucket_from_num_slots(num_slots: Slot) -> u64 {
|
2022-04-14 20:22:08 -07:00
|
|
|
// Compute the ceiling of log2 for integer
|
|
|
|
if num_slots == 0 {
|
|
|
|
0
|
|
|
|
} else {
|
|
|
|
(Slot::BITS - (num_slots - 1).leading_zeros()) as u64
|
|
|
|
}
|
2021-09-17 13:11:27 -07:00
|
|
|
}
|
2023-03-29 13:41:10 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
impl IndexEntryPlaceInBucket {
|
|
|
|
pub fn init(&self, index_bucket: &mut BucketStorage<IndexBucket>, pubkey: &Pubkey) {
|
|
|
|
let index_entry = index_bucket.get_mut::<IndexEntry>(self.ix);
|
|
|
|
index_entry.key = *pubkey;
|
|
|
|
index_entry.ref_count = 0;
|
|
|
|
index_entry.storage_cap_and_offset = PackedStorage::default();
|
|
|
|
index_entry.num_slots = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_storage_capacity_when_created_pow2(
|
|
|
|
&self,
|
|
|
|
index_bucket: &mut BucketStorage<IndexBucket>,
|
|
|
|
storage_capacity_when_created_pow2: u8,
|
|
|
|
) {
|
|
|
|
index_bucket
|
|
|
|
.get_mut::<IndexEntry>(self.ix)
|
|
|
|
.storage_cap_and_offset
|
|
|
|
.set_capacity_when_created_pow2(storage_capacity_when_created_pow2)
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_storage_offset(
|
|
|
|
&self,
|
|
|
|
index_bucket: &mut BucketStorage<IndexBucket>,
|
|
|
|
storage_offset: u64,
|
|
|
|
) {
|
|
|
|
index_bucket
|
|
|
|
.get_mut::<IndexEntry>(self.ix)
|
|
|
|
.storage_cap_and_offset
|
|
|
|
.set_offset_checked(storage_offset)
|
|
|
|
.expect("New storage offset must fit into 7 bytes!");
|
|
|
|
}
|
2021-09-17 13:11:27 -07:00
|
|
|
|
2023-03-29 13:41:10 -07:00
|
|
|
pub fn data_bucket_ix(&self, index_bucket: &BucketStorage<IndexBucket>) -> u64 {
|
|
|
|
IndexEntry::data_bucket_from_num_slots(self.num_slots(index_bucket))
|
2021-09-17 13:11:27 -07:00
|
|
|
}
|
|
|
|
|
2023-03-29 13:41:10 -07:00
|
|
|
pub fn ref_count(&self, index_bucket: &BucketStorage<IndexBucket>) -> RefCount {
|
|
|
|
let index_entry = index_bucket.get::<IndexEntry>(self.ix);
|
|
|
|
index_entry.ref_count
|
2021-09-17 13:11:27 -07:00
|
|
|
}
|
|
|
|
|
2023-03-29 13:41:10 -07:00
|
|
|
fn storage_capacity_when_created_pow2(&self, index_bucket: &BucketStorage<IndexBucket>) -> u8 {
|
|
|
|
let index_entry = index_bucket.get::<IndexEntry>(self.ix);
|
|
|
|
index_entry
|
|
|
|
.storage_cap_and_offset
|
|
|
|
.capacity_when_created_pow2()
|
2022-01-12 12:37:34 -08:00
|
|
|
}
|
|
|
|
|
2023-03-29 13:41:10 -07:00
|
|
|
pub fn storage_offset(&self, index_bucket: &BucketStorage<IndexBucket>) -> u64 {
|
|
|
|
index_bucket
|
|
|
|
.get::<IndexEntry>(self.ix)
|
|
|
|
.storage_cap_and_offset
|
|
|
|
.offset()
|
2022-01-06 13:45:47 -08:00
|
|
|
}
|
|
|
|
|
2023-03-29 13:41:10 -07:00
|
|
|
/// This function maps the original data location into an index in the current bucket storage.
|
|
|
|
/// This is coupled with how we resize bucket storages.
|
|
|
|
pub fn data_loc(
|
|
|
|
&self,
|
|
|
|
index_bucket: &BucketStorage<IndexBucket>,
|
|
|
|
storage: &BucketStorage<DataBucket>,
|
|
|
|
) -> u64 {
|
|
|
|
let index_entry = index_bucket.get::<IndexEntry>(self.ix);
|
|
|
|
self.storage_offset(index_bucket)
|
|
|
|
<< (storage.capacity_pow2
|
|
|
|
- index_entry
|
|
|
|
.storage_cap_and_offset
|
|
|
|
.capacity_when_created_pow2())
|
2021-09-17 13:11:27 -07:00
|
|
|
}
|
|
|
|
|
2023-03-29 13:41:10 -07:00
|
|
|
pub fn read_value<'a, T>(
|
|
|
|
&self,
|
|
|
|
index_bucket: &BucketStorage<IndexBucket>,
|
|
|
|
data_buckets: &'a [BucketStorage<DataBucket>],
|
|
|
|
) -> Option<(&'a [T], RefCount)> {
|
|
|
|
let num_slots = self.num_slots(index_bucket);
|
|
|
|
let slice = if num_slots > 0 {
|
|
|
|
let data_bucket_ix = self.data_bucket_ix(index_bucket);
|
|
|
|
let data_bucket = &data_buckets[data_bucket_ix as usize];
|
|
|
|
let loc = self.data_loc(index_bucket, data_bucket);
|
2023-03-22 06:30:44 -07:00
|
|
|
assert!(!data_bucket.is_free(loc));
|
2023-03-29 13:41:10 -07:00
|
|
|
data_bucket.get_cell_slice(loc, num_slots)
|
2021-09-17 13:11:27 -07:00
|
|
|
} else {
|
|
|
|
// num_slots is 0. This means we don't have an actual allocation.
|
2023-03-27 11:07:02 -07:00
|
|
|
&[]
|
2021-09-17 13:11:27 -07:00
|
|
|
};
|
2023-03-29 13:41:10 -07:00
|
|
|
Some((slice, self.ref_count(index_bucket)))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn new(ix: u64) -> Self {
|
|
|
|
Self { ix }
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn key<'a>(&self, index_bucket: &'a BucketStorage<IndexBucket>) -> &'a Pubkey {
|
|
|
|
let entry: &IndexEntry = index_bucket.get(self.ix);
|
|
|
|
&entry.key
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_ref_count(
|
|
|
|
&self,
|
|
|
|
index_bucket: &mut BucketStorage<IndexBucket>,
|
|
|
|
ref_count: RefCount,
|
|
|
|
) {
|
|
|
|
let index_entry = index_bucket.get_mut::<IndexEntry>(self.ix);
|
|
|
|
index_entry.ref_count = ref_count;
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn num_slots(&self, index_bucket: &BucketStorage<IndexBucket>) -> Slot {
|
|
|
|
index_bucket.get::<IndexEntry>(self.ix).num_slots
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn set_num_slots(&self, index_bucket: &mut BucketStorage<IndexBucket>, num_slots: Slot) {
|
|
|
|
index_bucket.get_mut::<IndexEntry>(self.ix).num_slots = num_slots;
|
2021-09-17 13:11:27 -07:00
|
|
|
}
|
|
|
|
}
|
2022-01-06 13:45:47 -08:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2023-03-29 13:41:10 -07:00
|
|
|
use {
|
|
|
|
super::*,
|
|
|
|
std::{path::PathBuf, sync::Arc},
|
|
|
|
tempfile::tempdir,
|
|
|
|
};
|
2022-01-06 13:45:47 -08:00
|
|
|
|
|
|
|
impl IndexEntry {
|
|
|
|
pub fn new(key: Pubkey) -> Self {
|
|
|
|
IndexEntry {
|
|
|
|
key,
|
|
|
|
ref_count: 0,
|
2022-01-12 12:37:34 -08:00
|
|
|
storage_cap_and_offset: PackedStorage::default(),
|
2022-01-06 13:45:47 -08:00
|
|
|
num_slots: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// verify that accessors for storage_offset and capacity_when_created are
|
|
|
|
/// correct and independent
|
|
|
|
#[test]
|
|
|
|
fn test_api() {
|
|
|
|
for offset in [0, 1, u32::MAX as u64] {
|
2023-03-29 13:41:10 -07:00
|
|
|
let (mut index_bucket, index) = index_entry_for_testing();
|
2022-01-06 13:45:47 -08:00
|
|
|
if offset != 0 {
|
2023-03-29 13:41:10 -07:00
|
|
|
index.set_storage_offset(&mut index_bucket, offset);
|
2022-01-06 13:45:47 -08:00
|
|
|
}
|
2023-03-29 13:41:10 -07:00
|
|
|
assert_eq!(index.storage_offset(&index_bucket,), offset);
|
|
|
|
assert_eq!(index.storage_capacity_when_created_pow2(&index_bucket,), 0);
|
2022-01-06 13:45:47 -08:00
|
|
|
for pow in [1, 255, 0] {
|
2023-03-29 13:41:10 -07:00
|
|
|
index.set_storage_capacity_when_created_pow2(&mut index_bucket, pow);
|
|
|
|
assert_eq!(index.storage_offset(&index_bucket,), offset);
|
|
|
|
assert_eq!(
|
|
|
|
index.storage_capacity_when_created_pow2(&index_bucket,),
|
|
|
|
pow
|
|
|
|
);
|
2022-01-06 13:45:47 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-01-12 12:37:34 -08:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_size() {
|
|
|
|
assert_eq!(std::mem::size_of::<PackedStorage>(), 1 + 7);
|
|
|
|
assert_eq!(std::mem::size_of::<IndexEntry>(), 32 + 8 + 8 + 8);
|
|
|
|
}
|
|
|
|
|
2023-03-29 13:41:10 -07:00
|
|
|
fn index_bucket_for_testing() -> BucketStorage<IndexBucket> {
|
|
|
|
let tmpdir = tempdir().unwrap();
|
|
|
|
let paths: Vec<PathBuf> = vec![tmpdir.path().to_path_buf()];
|
|
|
|
assert!(!paths.is_empty());
|
|
|
|
|
|
|
|
// `new` here creates a file in `tmpdir`. Once the file is created, `tmpdir` can be dropped without issue.
|
|
|
|
BucketStorage::<IndexBucket>::new(
|
|
|
|
Arc::new(paths),
|
|
|
|
1,
|
|
|
|
std::mem::size_of::<IndexEntry>() as u64,
|
|
|
|
1,
|
|
|
|
Arc::default(),
|
|
|
|
Arc::default(),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn index_entry_for_testing() -> (BucketStorage<IndexBucket>, IndexEntryPlaceInBucket) {
|
|
|
|
(index_bucket_for_testing(), IndexEntryPlaceInBucket::new(0))
|
|
|
|
}
|
|
|
|
|
2022-01-12 12:37:34 -08:00
|
|
|
#[test]
|
|
|
|
#[should_panic(expected = "New storage offset must fit into 7 bytes!")]
|
|
|
|
fn test_set_storage_offset_value_too_large() {
|
|
|
|
let too_big = 1 << 56;
|
2023-03-29 13:41:10 -07:00
|
|
|
let (mut index_bucket, index) = index_entry_for_testing();
|
|
|
|
index.set_storage_offset(&mut index_bucket, too_big);
|
2022-01-12 12:37:34 -08:00
|
|
|
}
|
2022-04-14 20:22:08 -07:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_data_bucket_from_num_slots() {
|
|
|
|
for n in 0..512 {
|
|
|
|
assert_eq!(
|
|
|
|
IndexEntry::data_bucket_from_num_slots(n),
|
|
|
|
(n as f64).log2().ceil() as u64
|
|
|
|
);
|
|
|
|
}
|
|
|
|
assert_eq!(IndexEntry::data_bucket_from_num_slots(u32::MAX as u64), 32);
|
|
|
|
assert_eq!(
|
|
|
|
IndexEntry::data_bucket_from_num_slots(u32::MAX as u64 + 1),
|
|
|
|
32
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
IndexEntry::data_bucket_from_num_slots(u32::MAX as u64 + 2),
|
|
|
|
33
|
|
|
|
);
|
|
|
|
}
|
2022-01-06 13:45:47 -08:00
|
|
|
}
|