diff --git a/bucket_map/src/bucket.rs b/bucket_map/src/bucket.rs index 959ed1bbe7..d71444b889 100644 --- a/bucket_map/src/bucket.rs +++ b/bucket_map/src/bucket.rs @@ -803,6 +803,50 @@ mod tests { ) } + #[test] + fn batch_insert_duplicates_internal_simple() { + solana_logger::setup(); + // add the same duplicate key several times. + // make sure the resulting index and returned `duplicates` is correct. + let random = 1; + let data_buckets = Vec::default(); + let k = Pubkey::from([1u8; 32]); + for v in 10..12u64 { + for len in 1..4 { + let raw = (0..len).map(|l| (k, v + (l as u64))).collect::>(); + let mut hashed = Bucket::index_entries(raw.clone().into_iter(), len, random); + let hashed_raw = hashed.clone(); + + let mut index = create_test_index(None); + + let mut duplicates = Vec::default(); + assert!(Bucket::::batch_insert_non_duplicates_internal( + &mut index, + &Vec::default(), + &mut hashed, + &mut duplicates, + ) + .is_ok()); + + assert_eq!(duplicates.len(), len - 1); + assert_eq!(hashed.len(), 0); + let single_hashed_raw_inserted = hashed_raw.last().unwrap(); + let elem = + IndexEntryPlaceInBucket::new(single_hashed_raw_inserted.0 % index.capacity()); + let (value, ref_count) = elem.read_value(&index, &data_buckets); + assert_eq!(ref_count, 1); + assert_eq!(value, &[single_hashed_raw_inserted.2]); + let expected_duplicates = hashed_raw + .iter() + .rev() + .skip(1) + .map(|(_hash, k, v)| (*k, *v, single_hashed_raw_inserted.2)) + .collect::>(); + assert_eq!(expected_duplicates, duplicates); + } + } + } + #[test] fn batch_insert_non_duplicates_internal_simple() { solana_logger::setup(); @@ -907,4 +951,24 @@ mod tests { } } } + + #[should_panic(expected = "batch insertion can only occur prior to any deletes")] + #[test] + fn batch_insert_after_delete() { + solana_logger::setup(); + + let tmpdir = tempdir().unwrap(); + let paths: Vec = vec![tmpdir.path().to_path_buf()]; + assert!(!paths.is_empty()); + let max_search = 2; + let mut bucket = Bucket::new(Arc::new(paths), max_search, Arc::default(), Arc::default()); + + let key = Pubkey::new_unique(); + assert_eq!(bucket.read_value(&key), None); + + bucket.update(&key, |_| Some((vec![0], 0))); + bucket.delete_key(&key); + + bucket.batch_insert_non_duplicates(std::iter::empty(), 0); + } } diff --git a/bucket_map/src/bucket_map.rs b/bucket_map/src/bucket_map.rs index 7eb9cfdd73..76827f45a1 100644 --- a/bucket_map/src/bucket_map.rs +++ b/bucket_map/src/bucket_map.rs @@ -489,16 +489,34 @@ mod tests { && thread_rng().gen_range(0, 2) == 0; if batch_insert_now { // batch insert into the map with 1 bucket 50% of the time - assert!(map - .get_bucket_from_index(0) - .batch_insert_non_duplicates( - additions - .clone() - .into_iter() - .map(|(k, mut v)| (k, v.0.pop().unwrap())), - to_add, - ) - .is_empty()); + let mut batch_additions = additions + .clone() + .into_iter() + .map(|(k, mut v)| (k, v.0.pop().unwrap())) + .collect::>(); + let mut duplicates = 0; + if batch_additions.len() > 1 && thread_rng().gen_range(0, 2) == 0 { + // insert a duplicate sometimes + let item_to_duplicate = + thread_rng().gen_range(0, batch_additions.len()); + let where_to_insert_duplicate = + thread_rng().gen_range(0, batch_additions.len()); + batch_additions.insert( + where_to_insert_duplicate, + batch_additions[item_to_duplicate], + ); + duplicates += 1; + } + let count = batch_additions.len(); + assert_eq!( + map.get_bucket_from_index(0) + .batch_insert_non_duplicates( + batch_additions.into_iter(), + count, + ) + .len(), + duplicates + ); } else { additions.clone().into_iter().for_each(|(k, v)| { if insert {