disk index: batch insert (#31094)
This commit is contained in:
parent
ce21a58b65
commit
d63359a3ff
|
@ -30,7 +30,6 @@ use {
|
||||||
|
|
||||||
pub struct ReallocatedItems<I: BucketOccupied, D: BucketOccupied> {
|
pub struct ReallocatedItems<I: BucketOccupied, D: BucketOccupied> {
|
||||||
// Some if the index was reallocated
|
// Some if the index was reallocated
|
||||||
// u64 is random associated with the new index
|
|
||||||
pub index: Option<BucketStorage<I>>,
|
pub index: Option<BucketStorage<I>>,
|
||||||
// Some for a data bucket reallocation
|
// Some for a data bucket reallocation
|
||||||
// u64 is data bucket index
|
// u64 is data bucket index
|
||||||
|
@ -104,6 +103,10 @@ pub struct Bucket<T: Copy + 'static> {
|
||||||
anticipated_size: u64,
|
anticipated_size: u64,
|
||||||
|
|
||||||
pub reallocated: Reallocated<IndexBucket<T>, DataBucket>,
|
pub reallocated: Reallocated<IndexBucket<T>, DataBucket>,
|
||||||
|
|
||||||
|
/// set to true once any entries have been deleted from the index.
|
||||||
|
/// Deletes indicate that there can be free slots and that the full search range must be searched for an entry.
|
||||||
|
at_least_one_entry_deleted: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
|
@ -131,6 +134,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
stats,
|
stats,
|
||||||
reallocated: Reallocated::default(),
|
reallocated: Reallocated::default(),
|
||||||
anticipated_size: 0,
|
anticipated_size: 0,
|
||||||
|
at_least_one_entry_deleted: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -269,12 +273,124 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
Err(BucketMapError::IndexNoSpace(index.contents.capacity()))
|
Err(BucketMapError::IndexNoSpace(index.contents.capacity()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn read_value(&self, key: &Pubkey) -> Option<(&[T], RefCount)> {
|
pub(crate) fn read_value(&self, key: &Pubkey) -> Option<(&[T], RefCount)> {
|
||||||
//debug!("READ_VALUE: {:?}", key);
|
//debug!("READ_VALUE: {:?}", key);
|
||||||
let (elem, _) = self.find_index_entry(key)?;
|
let (elem, _) = self.find_index_entry(key)?;
|
||||||
Some(elem.read_value(&self.index, &self.data))
|
Some(elem.read_value(&self.index, &self.data))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// for each item in `items`, get the hash value when hashed with `random`.
|
||||||
|
/// Return a vec of tuples:
|
||||||
|
/// (hash_value, key, value)
|
||||||
|
fn index_entries(
|
||||||
|
items: impl Iterator<Item = (Pubkey, T)>,
|
||||||
|
count: usize,
|
||||||
|
random: u64,
|
||||||
|
) -> Vec<(u64, Pubkey, T)> {
|
||||||
|
let mut inserts = Vec::with_capacity(count);
|
||||||
|
items.for_each(|(key, v)| {
|
||||||
|
let ix = Self::bucket_index_ix(&key, random);
|
||||||
|
inserts.push((ix, key, v));
|
||||||
|
});
|
||||||
|
inserts
|
||||||
|
}
|
||||||
|
|
||||||
|
/// insert all of `items` into the index.
|
||||||
|
/// return duplicates
|
||||||
|
pub(crate) fn batch_insert_non_duplicates(
|
||||||
|
&mut self,
|
||||||
|
items: impl Iterator<Item = (Pubkey, T)>,
|
||||||
|
count: usize,
|
||||||
|
) -> Vec<(Pubkey, T, T)> {
|
||||||
|
assert!(
|
||||||
|
!self.at_least_one_entry_deleted,
|
||||||
|
"efficient batch insertion can only occur prior to any deletes"
|
||||||
|
);
|
||||||
|
let current_len = self.index.count.load(Ordering::Relaxed);
|
||||||
|
let anticipated = count as u64;
|
||||||
|
self.set_anticipated_count((anticipated).saturating_add(current_len));
|
||||||
|
let mut entries = Self::index_entries(items, count, self.random);
|
||||||
|
let mut duplicates = Vec::default();
|
||||||
|
// insert, but resizes may be necessary
|
||||||
|
loop {
|
||||||
|
let cap = self.index.capacity();
|
||||||
|
// sort entries by their index % cap, so we'll search over the same spots in the file close to each other
|
||||||
|
// `reverse()` is so we can efficiently pop off the end but get ascending order index values
|
||||||
|
// sort before calling to make `batch_insert_non_duplicates_internal` easier to test.
|
||||||
|
entries.sort_unstable_by(|a, b| (a.0 % cap).cmp(&(b.0 % cap)).reverse());
|
||||||
|
|
||||||
|
let result = Self::batch_insert_non_duplicates_internal(
|
||||||
|
&mut self.index,
|
||||||
|
&self.data,
|
||||||
|
&mut entries,
|
||||||
|
&mut duplicates,
|
||||||
|
);
|
||||||
|
match result {
|
||||||
|
Ok(_result) => {
|
||||||
|
// everything added
|
||||||
|
self.set_anticipated_count(0);
|
||||||
|
return duplicates;
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
// resize and add more
|
||||||
|
// `entries` will have had items removed from it
|
||||||
|
self.grow(error);
|
||||||
|
self.handle_delayed_grows();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// sort `entries` by hash value
|
||||||
|
/// insert as much of `entries` as possible into `index`.
|
||||||
|
/// return an error if the index needs to resize.
|
||||||
|
/// for every entry that already exists in `index`, add it (and the value already in the index) to `duplicates`
|
||||||
|
pub fn batch_insert_non_duplicates_internal(
|
||||||
|
index: &mut BucketStorage<IndexBucket<T>>,
|
||||||
|
data_buckets: &[BucketStorage<DataBucket>],
|
||||||
|
reverse_sorted_entries: &mut Vec<(u64, Pubkey, T)>,
|
||||||
|
duplicates: &mut Vec<(Pubkey, T, T)>,
|
||||||
|
) -> Result<(), BucketMapError> {
|
||||||
|
let max_search = index.max_search();
|
||||||
|
let cap = index.capacity();
|
||||||
|
let search_end = max_search.min(cap);
|
||||||
|
|
||||||
|
// pop one entry at a time to insert
|
||||||
|
'outer: while let Some((ix_entry_raw, k, v)) = reverse_sorted_entries.pop() {
|
||||||
|
let ix_entry = ix_entry_raw % cap;
|
||||||
|
// search for an empty spot starting at `ix_entry`
|
||||||
|
for search in 0..search_end {
|
||||||
|
let ix_index = (ix_entry + search) % cap;
|
||||||
|
let elem = IndexEntryPlaceInBucket::new(ix_index);
|
||||||
|
if index.try_lock(ix_index) {
|
||||||
|
// found free element and occupied it
|
||||||
|
// These fields will be overwritten after allocation by callers.
|
||||||
|
// Since this part of the mmapped file could have previously been used by someone else, there can be garbage here.
|
||||||
|
elem.init(index, &k);
|
||||||
|
|
||||||
|
// new data stored should be stored in IndexEntry and NOT in data file
|
||||||
|
// new data len is 1
|
||||||
|
elem.set_slot_count_enum_value(index, OccupiedEnum::OneSlotInIndex(&v));
|
||||||
|
continue 'outer; // this 'insertion' is completed: inserted successfully
|
||||||
|
} else {
|
||||||
|
// occupied, see if the key already exists here
|
||||||
|
if elem.key(index) == &k {
|
||||||
|
let (v_existing, _ref_count_existing) =
|
||||||
|
elem.read_value(index, data_buckets);
|
||||||
|
duplicates.push((k, v, *v_existing.first().unwrap()));
|
||||||
|
continue 'outer; // this 'insertion' is completed: found a duplicate entry
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// search loop ended without finding a spot to insert this key
|
||||||
|
// so, remember the item we were trying to insert for next time after resizing
|
||||||
|
reverse_sorted_entries.push((ix_entry_raw, k, v));
|
||||||
|
return Err(BucketMapError::IndexNoSpace(cap));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn try_write(
|
pub fn try_write(
|
||||||
&mut self,
|
&mut self,
|
||||||
key: &Pubkey,
|
key: &Pubkey,
|
||||||
|
@ -417,6 +533,7 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
|
|
||||||
pub fn delete_key(&mut self, key: &Pubkey) {
|
pub fn delete_key(&mut self, key: &Pubkey) {
|
||||||
if let Some((elem, elem_ix)) = self.find_index_entry(key) {
|
if let Some((elem, elem_ix)) = self.find_index_entry(key) {
|
||||||
|
self.at_least_one_entry_deleted = true;
|
||||||
if let OccupiedEnum::MultipleSlots(multiple_slots) =
|
if let OccupiedEnum::MultipleSlots(multiple_slots) =
|
||||||
elem.get_slot_count_enum(&self.index)
|
elem.get_slot_count_enum(&self.index)
|
||||||
{
|
{
|
||||||
|
@ -637,3 +754,153 @@ impl<'b, T: Clone + Copy + 'static> Bucket<T> {
|
||||||
self.insert(key, (&new, refct));
|
self.insert(key, (&new, refct));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use {super::*, tempfile::tempdir};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_index_entries() {
|
||||||
|
for v in 10..12u64 {
|
||||||
|
for random in 1..3 {
|
||||||
|
for len in 1..3 {
|
||||||
|
let raw = (0..len)
|
||||||
|
.map(|l| {
|
||||||
|
let k = Pubkey::from([l as u8; 32]);
|
||||||
|
(k, v + (l as u64))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let hashed = Bucket::index_entries(raw.clone().into_iter(), len, random);
|
||||||
|
assert_eq!(hashed.len(), len);
|
||||||
|
(0..len).for_each(|i| {
|
||||||
|
let raw = raw[i];
|
||||||
|
let hashed = hashed[i];
|
||||||
|
assert_eq!(Bucket::<u64>::bucket_index_ix(&raw.0, random), hashed.0);
|
||||||
|
assert_eq!(raw.0, hashed.1);
|
||||||
|
assert_eq!(raw.1, hashed.2);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn create_test_index(max_search: Option<u8>) -> BucketStorage<IndexBucket<u64>> {
|
||||||
|
let tmpdir = tempdir().unwrap();
|
||||||
|
let paths: Vec<PathBuf> = vec![tmpdir.path().to_path_buf()];
|
||||||
|
assert!(!paths.is_empty());
|
||||||
|
let max_search = max_search.unwrap_or(2);
|
||||||
|
BucketStorage::<IndexBucket<u64>>::new(
|
||||||
|
Arc::new(paths),
|
||||||
|
1,
|
||||||
|
std::mem::size_of::<crate::index_entry::IndexEntry<u64>>() as u64,
|
||||||
|
max_search,
|
||||||
|
Arc::default(),
|
||||||
|
Arc::default(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn batch_insert_non_duplicates_internal_simple() {
|
||||||
|
solana_logger::setup();
|
||||||
|
// add 2 entries, make sure they are added in the buckets we expect
|
||||||
|
let random = 1;
|
||||||
|
let data_buckets = Vec::default();
|
||||||
|
for v in 10..12u64 {
|
||||||
|
for len in 1..3 {
|
||||||
|
let raw = (0..len)
|
||||||
|
.map(|l| {
|
||||||
|
let k = Pubkey::from([l as u8; 32]);
|
||||||
|
(k, v + (l as u64))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let mut hashed = Bucket::index_entries(raw.clone().into_iter(), len, random);
|
||||||
|
let hashed_raw = hashed.clone();
|
||||||
|
|
||||||
|
let mut index = create_test_index(None);
|
||||||
|
|
||||||
|
let mut duplicates = Vec::default();
|
||||||
|
assert!(Bucket::<u64>::batch_insert_non_duplicates_internal(
|
||||||
|
&mut index,
|
||||||
|
&Vec::default(),
|
||||||
|
&mut hashed,
|
||||||
|
&mut duplicates,
|
||||||
|
)
|
||||||
|
.is_ok());
|
||||||
|
|
||||||
|
assert_eq!(hashed.len(), 0);
|
||||||
|
(0..len).for_each(|i| {
|
||||||
|
let raw = hashed_raw[i];
|
||||||
|
let elem = IndexEntryPlaceInBucket::new(raw.0 % index.capacity());
|
||||||
|
let (value, ref_count) = elem.read_value(&index, &data_buckets);
|
||||||
|
assert_eq!(ref_count, 1);
|
||||||
|
assert_eq!(value, &[hashed_raw[i].2]);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn batch_insert_non_duplicates_internal_same_ix_exceeds_max_search() {
|
||||||
|
solana_logger::setup();
|
||||||
|
// add `len` entries with the same ix, make sure they are added in subsequent buckets.
|
||||||
|
// adjust `max_search`. If we try to add an entry that causes us to exceed `max_search`, then assert that the adding fails with an error and
|
||||||
|
// the colliding item remains in `entries`
|
||||||
|
let random = 1;
|
||||||
|
let data_buckets = Vec::default();
|
||||||
|
for max_search in [2usize, 3] {
|
||||||
|
for v in 10..12u64 {
|
||||||
|
for len in 1..(max_search + 1) {
|
||||||
|
let raw = (0..len)
|
||||||
|
.map(|l| {
|
||||||
|
let k = Pubkey::from([l as u8; 32]);
|
||||||
|
(k, v + (l as u64))
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let mut hashed = Bucket::index_entries(raw.clone().into_iter(), len, random);
|
||||||
|
let common_ix = 2; // both are put at same ix
|
||||||
|
hashed.iter_mut().for_each(|mut v| {
|
||||||
|
v.0 = common_ix;
|
||||||
|
});
|
||||||
|
let hashed_raw = hashed.clone();
|
||||||
|
|
||||||
|
let mut index = create_test_index(Some(max_search as u8));
|
||||||
|
|
||||||
|
let mut duplicates = Vec::default();
|
||||||
|
let result = Bucket::<u64>::batch_insert_non_duplicates_internal(
|
||||||
|
&mut index,
|
||||||
|
&Vec::default(),
|
||||||
|
&mut hashed,
|
||||||
|
&mut duplicates,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
hashed.len(),
|
||||||
|
if len > max_search { 1 } else { 0 },
|
||||||
|
"len: {len}"
|
||||||
|
);
|
||||||
|
(0..len).for_each(|i| {
|
||||||
|
assert!(if len > max_search {
|
||||||
|
result.is_err()
|
||||||
|
} else {
|
||||||
|
result.is_ok()
|
||||||
|
});
|
||||||
|
let raw = hashed_raw[i];
|
||||||
|
if i == 0 && len > max_search {
|
||||||
|
// max search was exceeded and the first entry was unable to be inserted, so it remained in `hashed`
|
||||||
|
assert_eq!(hashed[0], hashed_raw[0]);
|
||||||
|
} else {
|
||||||
|
// we insert in reverse order when ix values are equal, so we expect to find item[1] in item[1]'s expected ix and item[0] will be 1 search distance away from expected ix
|
||||||
|
let search_required = (len - i - 1) as u64;
|
||||||
|
let elem = IndexEntryPlaceInBucket::new(
|
||||||
|
(raw.0 + search_required) % index.capacity(),
|
||||||
|
);
|
||||||
|
let (value, ref_count) = elem.read_value(&index, &data_buckets);
|
||||||
|
assert_eq!(ref_count, 1);
|
||||||
|
assert_eq!(value, &[hashed_raw[i].2]);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -118,6 +118,20 @@ impl<T: Clone + Copy> BucketApi<T> {
|
||||||
bucket.as_mut().unwrap().set_anticipated_count(count);
|
bucket.as_mut().unwrap().set_anticipated_count(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// batch insert of `items`. Assumption is a single slot list element and ref_count == 1.
|
||||||
|
/// For any pubkeys that already exist, the failed insertion data and the existing data are returned.
|
||||||
|
pub fn batch_insert_non_duplicates(
|
||||||
|
&self,
|
||||||
|
items: impl Iterator<Item = (Pubkey, T)>,
|
||||||
|
count: usize,
|
||||||
|
) -> Vec<(Pubkey, T, T)> {
|
||||||
|
let mut bucket = self.get_write_bucket();
|
||||||
|
bucket
|
||||||
|
.as_mut()
|
||||||
|
.unwrap()
|
||||||
|
.batch_insert_non_duplicates(items, count)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn update<F>(&self, key: &Pubkey, updatefn: F)
|
pub fn update<F>(&self, key: &Pubkey, updatefn: F)
|
||||||
where
|
where
|
||||||
F: FnMut(Option<(&[T], RefCount)>) -> Option<(Vec<T>, RefCount)>,
|
F: FnMut(Option<(&[T], RefCount)>) -> Option<(Vec<T>, RefCount)>,
|
||||||
|
|
|
@ -355,6 +355,7 @@ mod tests {
|
||||||
fn hashmap_compare() {
|
fn hashmap_compare() {
|
||||||
use std::sync::Mutex;
|
use std::sync::Mutex;
|
||||||
solana_logger::setup();
|
solana_logger::setup();
|
||||||
|
for mut use_batch_insert in [true, false] {
|
||||||
let maps = (0..2)
|
let maps = (0..2)
|
||||||
.map(|max_buckets_pow2| {
|
.map(|max_buckets_pow2| {
|
||||||
let config = BucketMapConfig::new(1 << max_buckets_pow2);
|
let config = BucketMapConfig::new(1 << max_buckets_pow2);
|
||||||
|
@ -429,21 +430,72 @@ mod tests {
|
||||||
assert!(map.is_empty());
|
assert!(map.is_empty());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut initial = 100; // put this many items in to start
|
let mut initial: usize = 100; // put this many items in to start
|
||||||
|
if use_batch_insert {
|
||||||
|
// insert a lot more when inserting with batch to make sure we hit resizing during batch
|
||||||
|
initial *= 3;
|
||||||
|
}
|
||||||
|
|
||||||
// do random operations: insert, update, delete, add/unref in random order
|
// do random operations: insert, update, delete, add/unref in random order
|
||||||
// verify consistency between hashmap and all bucket maps
|
// verify consistency between hashmap and all bucket maps
|
||||||
for i in 0..10000 {
|
for i in 0..10000 {
|
||||||
if initial > 0 {
|
initial = initial.saturating_sub(1);
|
||||||
initial -= 1;
|
|
||||||
}
|
|
||||||
if initial > 0 || thread_rng().gen_range(0, 5) == 0 {
|
if initial > 0 || thread_rng().gen_range(0, 5) == 0 {
|
||||||
// insert
|
// insert
|
||||||
|
let mut to_add = 1;
|
||||||
|
if initial > 1 && use_batch_insert {
|
||||||
|
to_add = thread_rng().gen_range(1, (initial / 4).max(2));
|
||||||
|
initial -= to_add;
|
||||||
|
}
|
||||||
|
|
||||||
|
let additions = (0..to_add)
|
||||||
|
.map(|_| {
|
||||||
let k = solana_sdk::pubkey::new_rand();
|
let k = solana_sdk::pubkey::new_rand();
|
||||||
let v = gen_rand_value();
|
let mut v = gen_rand_value();
|
||||||
hash_map.write().unwrap().insert(k, v.clone());
|
if use_batch_insert {
|
||||||
|
// refcount has to be 1 to use batch insert
|
||||||
|
v.1 = 1;
|
||||||
|
// len has to be 1 to use batch insert
|
||||||
|
if v.0.len() > 1 {
|
||||||
|
v.0.truncate(1);
|
||||||
|
} else if v.0.is_empty() {
|
||||||
|
loop {
|
||||||
|
let mut new_v = gen_rand_value();
|
||||||
|
if !new_v.0.is_empty() {
|
||||||
|
v.0 = vec![new_v.0.pop().unwrap()];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(k, v)
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
additions.clone().into_iter().for_each(|(k, v)| {
|
||||||
|
hash_map.write().unwrap().insert(k, v);
|
||||||
|
return_key(k);
|
||||||
|
});
|
||||||
let insert = thread_rng().gen_range(0, 2) == 0;
|
let insert = thread_rng().gen_range(0, 2) == 0;
|
||||||
maps.iter().for_each(|map| {
|
maps.iter().for_each(|map| {
|
||||||
|
// batch insert can only work for the map with only 1 bucket so that we can batch add to a single bucket
|
||||||
|
let batch_insert_now = map.buckets.len() == 1
|
||||||
|
&& use_batch_insert
|
||||||
|
&& thread_rng().gen_range(0, 2) == 0;
|
||||||
|
if batch_insert_now {
|
||||||
|
// batch insert into the map with 1 bucket 50% of the time
|
||||||
|
assert!(map
|
||||||
|
.get_bucket_from_index(0)
|
||||||
|
.batch_insert_non_duplicates(
|
||||||
|
additions
|
||||||
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.map(|(k, mut v)| (k, v.0.pop().unwrap())),
|
||||||
|
to_add,
|
||||||
|
)
|
||||||
|
.is_empty());
|
||||||
|
} else {
|
||||||
|
additions.clone().into_iter().for_each(|(k, v)| {
|
||||||
if insert {
|
if insert {
|
||||||
map.insert(&k, (&v.0, v.1))
|
map.insert(&k, (&v.0, v.1))
|
||||||
} else {
|
} else {
|
||||||
|
@ -453,7 +505,18 @@ mod tests {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return_key(k);
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if use_batch_insert && initial == 1 {
|
||||||
|
// done using batch insert once we have added the initial entries
|
||||||
|
// now, the test can remove, update, addref, etc.
|
||||||
|
use_batch_insert = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if use_batch_insert && initial > 0 {
|
||||||
|
// if we are using batch insert, it is illegal to update, delete, or addref/unref an account until all batch inserts are complete
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
if thread_rng().gen_range(0, 10) == 0 {
|
if thread_rng().gen_range(0, 10) == 0 {
|
||||||
// update
|
// update
|
||||||
|
@ -513,3 +576,4 @@ mod tests {
|
||||||
verify();
|
verify();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -218,7 +218,8 @@ impl<O: BucketOccupied> BucketStorage<O> {
|
||||||
self.contents.is_free(entry, ix as usize)
|
self.contents.is_free(entry, ix as usize)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_lock(&mut self, ix: u64) -> bool {
|
/// try to occupy `ix`. return true if successful
|
||||||
|
pub(crate) fn try_lock(&mut self, ix: u64) -> bool {
|
||||||
let start = self.get_start_offset_with_header(ix);
|
let start = self.get_start_offset_with_header(ix);
|
||||||
let entry = &mut self.mmap[start..];
|
let entry = &mut self.mmap[start..];
|
||||||
if self.contents.is_free(entry, ix as usize) {
|
if self.contents.is_free(entry, ix as usize) {
|
||||||
|
|
|
@ -2491,8 +2491,8 @@ pub mod tests {
|
||||||
assert_eq!(num, 1);
|
assert_eq!(num, 1);
|
||||||
|
|
||||||
// not zero lamports
|
// not zero lamports
|
||||||
let index = AccountsIndex::<AccountInfoTest, AccountInfoTest>::default_for_tests();
|
let index = AccountsIndex::<bool, bool>::default_for_tests();
|
||||||
let account_info: AccountInfoTest = 0 as AccountInfoTest;
|
let account_info = false;
|
||||||
let items = vec![(*pubkey, account_info)];
|
let items = vec![(*pubkey, account_info)];
|
||||||
index.set_startup(Startup::Startup);
|
index.set_startup(Startup::Startup);
|
||||||
index.insert_new_if_missing_into_primary_index(slot, items.len(), items.into_iter());
|
index.insert_new_if_missing_into_primary_index(slot, items.len(), items.into_iter());
|
||||||
|
@ -2516,7 +2516,7 @@ pub mod tests {
|
||||||
assert!(index
|
assert!(index
|
||||||
.get_for_tests(pubkey, Some(&ancestors), None)
|
.get_for_tests(pubkey, Some(&ancestors), None)
|
||||||
.is_some());
|
.is_some());
|
||||||
assert_eq!(index.ref_count_from_storage(pubkey), 0); // cached, so 0
|
assert_eq!(index.ref_count_from_storage(pubkey), 1);
|
||||||
index.unchecked_scan_accounts(
|
index.unchecked_scan_accounts(
|
||||||
"",
|
"",
|
||||||
&ancestors,
|
&ancestors,
|
||||||
|
|
|
@ -1060,36 +1060,21 @@ impl<T: IndexValue, U: DiskIndexValue + From<T> + Into<T>> InMemAccountsIndex<T,
|
||||||
|
|
||||||
// merge all items into the disk index now
|
// merge all items into the disk index now
|
||||||
let disk = self.bucket.as_ref().unwrap();
|
let disk = self.bucket.as_ref().unwrap();
|
||||||
let mut count = 0;
|
let mut count = insert.len() as u64;
|
||||||
let current_len = disk.bucket_len();
|
for (k, entry, duplicate_entry) in disk.batch_insert_non_duplicates(
|
||||||
let anticipated = insert.len();
|
insert.into_iter().map(|(slot, k, v)| (k, (slot, v.into()))),
|
||||||
disk.set_anticipated_count((anticipated as u64).saturating_add(current_len));
|
count as usize,
|
||||||
insert.into_iter().for_each(|(slot, k, v)| {
|
) {
|
||||||
let entry = (slot, v);
|
duplicates.duplicates.push((entry.0, k, entry.1.into()));
|
||||||
let new_ref_count = u64::from(!v.is_cached());
|
|
||||||
disk.update(&k, |current| {
|
|
||||||
match current {
|
|
||||||
Some((current_slot_list, ref_count)) => {
|
|
||||||
// already on disk, so remember the new (slot, info) for later
|
|
||||||
duplicates.duplicates.push((slot, k, entry.1));
|
|
||||||
if let Some((slot, _)) = current_slot_list.first() {
|
|
||||||
// accurately account for there being a duplicate for the first entry that was previously added to the disk index.
|
// accurately account for there being a duplicate for the first entry that was previously added to the disk index.
|
||||||
// That entry could not have known yet that it was a duplicate.
|
// That entry could not have known yet that it was a duplicate.
|
||||||
// It is important to capture each slot with a duplicate because of slot limits applied to clean.
|
// It is important to capture each slot with a duplicate because of slot limits applied to clean.
|
||||||
duplicates.duplicates_put_on_disk.insert((*slot, k));
|
duplicates
|
||||||
|
.duplicates_put_on_disk
|
||||||
|
.insert((duplicate_entry.0, k));
|
||||||
|
count -= 1;
|
||||||
}
|
}
|
||||||
Some((current_slot_list.to_vec(), ref_count))
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
count += 1;
|
|
||||||
// not on disk, insert it
|
|
||||||
Some((vec![(entry.0, entry.1.into())], new_ref_count))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
// remove the guidance for how many entries the bucket will eventually contain since we have added all we knew about
|
|
||||||
disk.set_anticipated_count(0);
|
|
||||||
self.stats().inc_insert_count(count);
|
self.stats().inc_insert_count(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue