Fix packet accounting after dedup (#25357)

* Fix packet accounting after dedup
* Rename function to better represent intent
This commit is contained in:
Brennan Watt 2022-05-20 17:00:13 -07:00 committed by GitHub
parent 8caf0aabd1
commit e025376719
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 10 deletions

View File

@ -249,9 +249,9 @@ impl SigVerifyStage {
);
let mut dedup_time = Measure::start("sigverify_dedup_time");
let dedup_fail = deduper.dedup_packets(&mut batches) as usize;
let discard_or_dedup_fail = deduper.dedup_packets_and_count_discards(&mut batches) as usize;
dedup_time.stop();
let num_unique = num_packets.saturating_sub(dedup_fail);
let num_unique = num_packets.saturating_sub(discard_or_dedup_fail);
let mut discard_time = Measure::start("sigverify_discard_time");
let mut num_valid_packets = num_unique;
@ -308,7 +308,7 @@ impl SigVerifyStage {
stats.packets_hist.increment(num_packets as u64).unwrap();
stats.total_batches += batches_len;
stats.total_packets += num_packets;
stats.total_dedup += dedup_fail;
stats.total_dedup += discard_or_dedup_fail;
stats.total_valid_packets += num_valid_packets;
stats.total_excess_fail += excess_fail;
stats.total_shrinks += total_shrinks;

View File

@ -26,7 +26,7 @@ fn do_bench_dedup_packets(bencher: &mut Bencher, mut batches: Vec<PacketBatch>)
// verify packets
let mut deduper = sigverify::Deduper::new(1_000_000, Duration::from_millis(2_000));
bencher.iter(|| {
let _ans = deduper.dedup_packets(&mut batches);
let _ans = deduper.dedup_packets_and_count_discards(&mut batches);
deduper.reset();
batches
.iter_mut()

View File

@ -471,10 +471,11 @@ impl Deduper {
}
}
// Deduplicates packets and returns 1 if packet is to be discarded. Else, 0.
fn dedup_packet(&self, packet: &mut Packet) -> u64 {
// If this packet was already marked as discard, drop it
if packet.meta.discard() {
return 0;
return 1;
}
let mut hasher = AHasher::new_with_keys(self.seed.0, self.seed.1);
hasher.write(&packet.data[0..packet.meta.size]);
@ -495,7 +496,7 @@ impl Deduper {
0
}
pub fn dedup_packets(&self, batches: &mut [PacketBatch]) -> u64 {
pub fn dedup_packets_and_count_discards(&self, batches: &mut [PacketBatch]) -> u64 {
batches
.iter_mut()
.flat_map(|batch| batch.packets.iter_mut().map(|p| self.dedup_packet(p)))
@ -1415,7 +1416,7 @@ mod tests {
to_packet_batches(&std::iter::repeat(tx).take(1024).collect::<Vec<_>>(), 128);
let packet_count = sigverify::count_packets_in_batches(&batches);
let filter = Deduper::new(1_000_000, Duration::from_millis(0));
let discard = filter.dedup_packets(&mut batches) as usize;
let discard = filter.dedup_packets_and_count_discards(&mut batches) as usize;
assert_eq!(packet_count, discard + 1);
}
@ -1424,7 +1425,7 @@ mod tests {
let mut filter = Deduper::new(1_000_000, Duration::from_millis(0));
let mut batches = to_packet_batches(&(0..1024).map(|_| test_tx()).collect::<Vec<_>>(), 128);
let discard = filter.dedup_packets(&mut batches) as usize;
let discard = filter.dedup_packets_and_count_discards(&mut batches) as usize;
// because dedup uses a threadpool, there maybe up to N threads of txs that go through
assert_eq!(discard, 0);
filter.reset();
@ -1442,7 +1443,7 @@ mod tests {
for i in 0..1000 {
let mut batches =
to_packet_batches(&(0..1000).map(|_| test_tx()).collect::<Vec<_>>(), 128);
discard += filter.dedup_packets(&mut batches) as usize;
discard += filter.dedup_packets_and_count_discards(&mut batches) as usize;
debug!("{} {}", i, discard);
if filter.saturated.load(Ordering::Relaxed) {
break;
@ -1458,7 +1459,7 @@ mod tests {
for i in 0..10 {
let mut batches =
to_packet_batches(&(0..1024).map(|_| test_tx()).collect::<Vec<_>>(), 128);
discard += filter.dedup_packets(&mut batches) as usize;
discard += filter.dedup_packets_and_count_discards(&mut batches) as usize;
debug!("false positive rate: {}/{}", discard, i * 1024);
}
//allow for 1 false positive even if extremely unlikely