get unique keys while finding dups (#33039)
* get unique keys while finding dups * pr feedback
This commit is contained in:
parent
9b4feddb55
commit
9bc09c9610
|
@ -9263,6 +9263,9 @@ impl AccountsDb {
|
||||||
let mut index_flush_us = 0;
|
let mut index_flush_us = 0;
|
||||||
let total_duplicate_slot_keys = AtomicU64::default();
|
let total_duplicate_slot_keys = AtomicU64::default();
|
||||||
let mut populate_duplicate_keys_us = 0;
|
let mut populate_duplicate_keys_us = 0;
|
||||||
|
// outer vec is accounts index bin (determined by pubkey value)
|
||||||
|
// inner vec is the pubkeys within that bin that are present in > 1 slot
|
||||||
|
let unique_pubkeys_by_bin = Mutex::new(Vec::<Vec<Pubkey>>::default());
|
||||||
if pass == 0 {
|
if pass == 0 {
|
||||||
// tell accounts index we are done adding the initial accounts at startup
|
// tell accounts index we are done adding the initial accounts at startup
|
||||||
let mut m = Measure::start("accounts_index_idle_us");
|
let mut m = Measure::start("accounts_index_idle_us");
|
||||||
|
@ -9277,6 +9280,8 @@ impl AccountsDb {
|
||||||
.populate_and_retrieve_duplicate_keys_from_startup(|slot_keys| {
|
.populate_and_retrieve_duplicate_keys_from_startup(|slot_keys| {
|
||||||
total_duplicate_slot_keys
|
total_duplicate_slot_keys
|
||||||
.fetch_add(slot_keys.len() as u64, Ordering::Relaxed);
|
.fetch_add(slot_keys.len() as u64, Ordering::Relaxed);
|
||||||
|
let unique_keys =
|
||||||
|
HashSet::<Pubkey>::from_iter(slot_keys.iter().map(|(_, key)| *key));
|
||||||
for (slot, key) in slot_keys {
|
for (slot, key) in slot_keys {
|
||||||
match self.uncleaned_pubkeys.entry(slot) {
|
match self.uncleaned_pubkeys.entry(slot) {
|
||||||
Occupied(mut occupied) => occupied.get_mut().push(key),
|
Occupied(mut occupied) => occupied.get_mut().push(key),
|
||||||
|
@ -9285,10 +9290,18 @@ impl AccountsDb {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let unique_pubkeys_by_bin_inner =
|
||||||
|
unique_keys.into_iter().collect::<Vec<_>>();
|
||||||
|
// does not matter that this is not ordered by slot
|
||||||
|
unique_pubkeys_by_bin
|
||||||
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.push(unique_pubkeys_by_bin_inner);
|
||||||
});
|
});
|
||||||
})
|
})
|
||||||
.1;
|
.1;
|
||||||
}
|
}
|
||||||
|
let unique_pubkeys_by_bin = unique_pubkeys_by_bin.into_inner().unwrap();
|
||||||
|
|
||||||
let storage_info_timings = storage_info_timings.into_inner().unwrap();
|
let storage_info_timings = storage_info_timings.into_inner().unwrap();
|
||||||
let mut timings = GenerateIndexTimings {
|
let mut timings = GenerateIndexTimings {
|
||||||
|
@ -9315,15 +9328,10 @@ impl AccountsDb {
|
||||||
Measure::start("handle accounts data len duplicates");
|
Measure::start("handle accounts data len duplicates");
|
||||||
let uncleaned_roots = Mutex::new(HashSet::<Slot>::default());
|
let uncleaned_roots = Mutex::new(HashSet::<Slot>::default());
|
||||||
if pass == 0 {
|
if pass == 0 {
|
||||||
let mut unique_pubkeys = HashSet::<Pubkey>::default();
|
let accounts_data_len_from_duplicates = unique_pubkeys_by_bin
|
||||||
self.uncleaned_pubkeys.iter().for_each(|entry| {
|
.par_iter()
|
||||||
entry.value().iter().for_each(|pubkey| {
|
.map(|unique_keys| {
|
||||||
unique_pubkeys.insert(*pubkey);
|
unique_keys
|
||||||
})
|
|
||||||
});
|
|
||||||
let accounts_data_len_from_duplicates = unique_pubkeys
|
|
||||||
.into_iter()
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
.par_chunks(4096)
|
.par_chunks(4096)
|
||||||
.map(|pubkeys| {
|
.map(|pubkeys| {
|
||||||
let (count, uncleaned_roots_this_group) = self
|
let (count, uncleaned_roots_this_group) = self
|
||||||
|
@ -9338,6 +9346,8 @@ impl AccountsDb {
|
||||||
});
|
});
|
||||||
count
|
count
|
||||||
})
|
})
|
||||||
|
.sum::<u64>()
|
||||||
|
})
|
||||||
.sum();
|
.sum();
|
||||||
accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed);
|
accounts_data_len.fetch_sub(accounts_data_len_from_duplicates, Ordering::Relaxed);
|
||||||
info!(
|
info!(
|
||||||
|
|
Loading…
Reference in New Issue