Use LRU cache and blake3 hash of shreds to filter duplicates (#13976)

This commit is contained in:
sakridge 2020-12-07 16:42:39 -08:00 committed by GitHub
parent 6e9dbb4f6e
commit f6600810d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 83 additions and 40 deletions

31
Cargo.lock generated
View File

@ -40,6 +40,17 @@ version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6789e291be47ace86a60303502173d84af8327e3627ecf334356ee0f87a164c"
[[package]]
name = "ahash"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "865f8b0b3fced577b7df82e9b0eb7609595d7209c0b39e78d0646672e244b1b1"
dependencies = [
"getrandom 0.2.0",
"lazy_static",
"version_check 0.9.2",
]
[[package]]
name = "aho-corasick"
version = "0.7.10"
@ -616,7 +627,7 @@ version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25e4c606eb459dd29f7c57b2e0879f2b6f14ee130918c2b78ccb58a9624e6c7a"
dependencies = [
"getrandom",
"getrandom 0.1.14",
"proc-macro-hack",
]
@ -1302,6 +1313,17 @@ dependencies = [
"wasi",
]
[[package]]
name = "getrandom"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee8025cf36f917e6a52cce185b7c7177689b838b7ec138364e50cc2277a56cf4"
dependencies = [
"cfg-if 0.1.10",
"libc",
"wasi",
]
[[package]]
name = "gimli"
version = "0.21.0"
@ -2862,7 +2884,7 @@ version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
dependencies = [
"getrandom",
"getrandom 0.1.14",
"libc",
"rand_chacha 0.2.2",
"rand_core 0.5.1",
@ -2911,7 +2933,7 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
dependencies = [
"getrandom",
"getrandom 0.1.14",
]
[[package]]
@ -3049,7 +3071,7 @@ version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09b23093265f8d200fa7b4c2c76297f47e681c655f6f1285a8780d6a022f7431"
dependencies = [
"getrandom",
"getrandom 0.1.14",
"redox_syscall",
"rust-argon2",
]
@ -3941,6 +3963,7 @@ dependencies = [
name = "solana-core"
version = "1.5.0"
dependencies = [
"ahash 0.6.1",
"base64 0.12.3",
"bincode",
"bs58",

View File

@ -14,6 +14,7 @@ edition = "2018"
codecov = { repository = "solana-labs/solana", branch = "master", service = "github" }
[dependencies]
ahash = "0.6.1"
base64 = "0.12.3"
bincode = "1.3.1"
bv = { version = "0.11.1", features = ["serde"] }

View File

@ -1,6 +1,10 @@
//! The `shred_fetch_stage` pulls shreds from UDP sockets and sends it to a channel.
use bv::BitVec;
use ahash::AHasher;
use lru::LruCache;
use rand::{thread_rng, Rng};
use std::hash::Hasher;
use solana_ledger::blockstore::MAX_DATA_SHREDS_PER_SLOT;
use solana_ledger::shred::{
CODING_SHRED, DATA_SHRED, OFFSET_OF_SHRED_INDEX, OFFSET_OF_SHRED_SLOT, OFFSET_OF_SHRED_TYPE,
@ -10,9 +14,8 @@ use solana_perf::cuda_runtime::PinnedVec;
use solana_perf::packet::{limited_deserialize, Packet, PacketsRecycler};
use solana_perf::recycler::Recycler;
use solana_runtime::bank_forks::BankForks;
use solana_sdk::clock::Slot;
use solana_sdk::clock::{Slot, DEFAULT_MS_PER_SLOT};
use solana_streamer::streamer::{self, PacketReceiver, PacketSender};
use std::collections::HashMap;
use std::net::UdpSocket;
use std::sync::atomic::AtomicBool;
use std::sync::mpsc::channel;
@ -21,7 +24,8 @@ use std::sync::RwLock;
use std::thread::{self, Builder, JoinHandle};
use std::time::Instant;
pub type ShredsReceived = HashMap<(Slot, u8), BitVec<u64>>;
const DEFAULT_LRU_SIZE: usize = 10_000;
pub type ShredsReceived = LruCache<u64, ()>;
#[derive(Default)]
struct ShredFetchStats {
@ -73,11 +77,12 @@ impl ShredFetchStage {
last_slot: Slot,
slots_per_epoch: u64,
modify: &F,
seeds: (u128, u128),
) where
F: Fn(&mut Packet),
{
p.meta.discard = true;
if let Some((slot, index)) = Self::get_slot_index(p, stats) {
if let Some((slot, _index)) = Self::get_slot_index(p, stats) {
// Seems reasonable to limit shreds to 2 epochs away
if slot > last_root
&& slot < (last_slot + 2 * slots_per_epoch)
@ -86,16 +91,15 @@ impl ShredFetchStage {
let shred_type = p.data[OFFSET_OF_SHRED_TYPE];
if shred_type == DATA_SHRED || shred_type == CODING_SHRED {
// Shred filter
let slot_received =
shreds_received
.entry((slot, shred_type))
.or_insert_with(|| {
BitVec::new_fill(false, MAX_DATA_SHREDS_PER_SLOT as u64)
});
if !slot_received.get(index.into()) {
let mut hasher = AHasher::new_with_keys(seeds.0, seeds.1);
hasher.write(&p.data[0..p.meta.size]);
let hash = hasher.finish();
if shreds_received.get(&hash).is_none() {
shreds_received.put(hash, ());
p.meta.discard = false;
modify(p);
slot_received.set(index.into(), true);
} else {
stats.duplicate_shred += 1;
}
@ -116,8 +120,8 @@ impl ShredFetchStage {
) where
F: Fn(&mut Packet),
{
let mut shreds_received = ShredsReceived::default();
let mut last_cleared = Instant::now();
let mut shreds_received = LruCache::new(DEFAULT_LRU_SIZE);
let mut last_updated = Instant::now();
// In the case of bank_forks=None, setup to accept any slot range
let mut last_root = 0;
@ -126,11 +130,13 @@ impl ShredFetchStage {
let mut last_stats = Instant::now();
let mut stats = ShredFetchStats::default();
let mut seeds = (thread_rng().gen::<u128>(), thread_rng().gen::<u128>());
while let Some(mut p) = recvr.iter().next() {
if last_cleared.elapsed().as_millis() > 200 {
if last_updated.elapsed().as_millis() as u64 > DEFAULT_MS_PER_SLOT {
last_updated = Instant::now();
seeds = (thread_rng().gen::<u128>(), thread_rng().gen::<u128>());
shreds_received.clear();
last_cleared = Instant::now();
if let Some(bank_forks) = bank_forks.as_ref() {
let bank_forks_r = bank_forks.read().unwrap();
last_root = bank_forks_r.root();
@ -150,6 +156,7 @@ impl ShredFetchStage {
last_slot,
slots_per_epoch,
&modify,
seeds,
);
});
if last_stats.elapsed().as_millis() > 1000 {
@ -215,15 +222,15 @@ impl ShredFetchStage {
) -> Self {
let recycler: PacketsRecycler = Recycler::warmed(100, 1024);
let tvu_threads = sockets.into_iter().map(|socket| {
streamer::receiver(
socket,
&exit,
sender.clone(),
recycler.clone(),
"shred_fetch_stage",
)
});
let (mut tvu_threads, tvu_filter) = Self::packet_modifier(
sockets,
&exit,
sender.clone(),
recycler.clone(),
bank_forks.clone(),
"shred_fetch",
|_| {},
);
let (tvu_forwards_threads, fwd_thread_hdl) = Self::packet_modifier(
forward_sockets,
@ -239,20 +246,21 @@ impl ShredFetchStage {
vec![repair_socket],
&exit,
sender.clone(),
recycler.clone(),
recycler,
bank_forks,
"shred_fetch_repair",
|p| p.meta.repair = true,
);
let mut thread_hdls: Vec<_> = tvu_threads
.chain(tvu_forwards_threads.into_iter())
.collect();
thread_hdls.extend(repair_receiver.into_iter());
thread_hdls.push(fwd_thread_hdl);
thread_hdls.push(repair_handler);
tvu_threads.extend(tvu_forwards_threads.into_iter());
tvu_threads.extend(repair_receiver.into_iter());
tvu_threads.push(tvu_filter);
tvu_threads.push(fwd_thread_hdl);
tvu_threads.push(repair_handler);
Self { thread_hdls }
Self {
thread_hdls: tvu_threads,
}
}
pub fn join(self) -> thread::Result<()> {
@ -271,7 +279,7 @@ mod tests {
#[test]
fn test_data_code_same_index() {
solana_logger::setup();
let mut shreds_received = ShredsReceived::default();
let mut shreds_received = LruCache::new(DEFAULT_LRU_SIZE);
let mut packet = Packet::default();
let mut stats = ShredFetchStats::default();
@ -279,6 +287,8 @@ mod tests {
let shred = Shred::new_from_data(slot, 3, 0, None, true, true, 0, 0, 0);
shred.copy_to_packet(&mut packet);
let seeds = (thread_rng().gen::<u128>(), thread_rng().gen::<u128>());
let last_root = 0;
let last_slot = 100;
let slots_per_epoch = 10;
@ -290,6 +300,7 @@ mod tests {
last_slot,
slots_per_epoch,
&|_p| {},
seeds,
);
assert!(!packet.meta.discard);
@ -304,6 +315,7 @@ mod tests {
last_slot,
slots_per_epoch,
&|_p| {},
seeds,
);
assert!(!packet.meta.discard);
}
@ -311,12 +323,13 @@ mod tests {
#[test]
fn test_shred_filter() {
solana_logger::setup();
let mut shreds_received = ShredsReceived::default();
let mut shreds_received = LruCache::new(DEFAULT_LRU_SIZE);
let mut packet = Packet::default();
let mut stats = ShredFetchStats::default();
let last_root = 0;
let last_slot = 100;
let slots_per_epoch = 10;
let seeds = (thread_rng().gen::<u128>(), thread_rng().gen::<u128>());
// packet size is 0, so cannot get index
ShredFetchStage::process_packet(
&mut packet,
@ -326,6 +339,7 @@ mod tests {
last_slot,
slots_per_epoch,
&|_p| {},
seeds,
);
assert_eq!(stats.index_overrun, 1);
assert!(packet.meta.discard);
@ -341,6 +355,7 @@ mod tests {
last_slot,
slots_per_epoch,
&|_p| {},
seeds,
);
assert!(packet.meta.discard);
@ -353,6 +368,7 @@ mod tests {
last_slot,
slots_per_epoch,
&|_p| {},
seeds,
);
assert!(!packet.meta.discard);
@ -365,6 +381,7 @@ mod tests {
last_slot,
slots_per_epoch,
&|_p| {},
seeds,
);
assert!(packet.meta.discard);
@ -380,6 +397,7 @@ mod tests {
last_slot,
slots_per_epoch,
&|_p| {},
seeds,
);
assert!(packet.meta.discard);
@ -394,6 +412,7 @@ mod tests {
last_slot,
slots_per_epoch,
&|_p| {},
seeds,
);
assert!(packet.meta.discard);
}