filters crds values in parallel when responding to gossip pull-requests (#18877)

When responding to gossip pull-requests, filter_crds_values takes a lot of time
while holding onto read-lock:
https://github.com/solana-labs/solana/blob/f51d64868/gossip/src/crds_gossip_pull.rs#L509-L566

This commit will filter-crds-values in parallel using rayon thread-pools.
This commit is contained in:
behzad nouri 2021-07-26 17:13:11 +00:00 committed by GitHub
parent f51d648681
commit f1198fc6d5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 108 additions and 76 deletions

View File

@ -1635,9 +1635,8 @@ impl ClusterInfo {
bank_forks: Option<Arc<RwLock<BankForks>>>, bank_forks: Option<Arc<RwLock<BankForks>>>,
sender: PacketSender, sender: PacketSender,
gossip_validators: Option<HashSet<Pubkey>>, gossip_validators: Option<HashSet<Pubkey>>,
exit: &Arc<AtomicBool>, exit: Arc<AtomicBool>,
) -> JoinHandle<()> { ) -> JoinHandle<()> {
let exit = exit.clone();
let thread_pool = ThreadPoolBuilder::new() let thread_pool = ThreadPoolBuilder::new()
.num_threads(std::cmp::min(get_thread_count(), 8)) .num_threads(std::cmp::min(get_thread_count(), 8))
.thread_name(|i| format!("ClusterInfo::gossip-{}", i)) .thread_name(|i| format!("ClusterInfo::gossip-{}", i))
@ -1812,8 +1811,13 @@ impl ClusterInfo {
self.stats self.stats
.pull_requests_count .pull_requests_count
.add_relaxed(requests.len() as u64); .add_relaxed(requests.len() as u64);
let response = let response = self.handle_pull_requests(
self.handle_pull_requests(recycler, requests, stakes, require_stake_for_gossip); thread_pool,
recycler,
requests,
stakes,
require_stake_for_gossip,
);
if !response.is_empty() { if !response.is_empty() {
self.stats self.stats
.packets_sent_pull_responses_count .packets_sent_pull_responses_count
@ -1883,6 +1887,7 @@ impl ClusterInfo {
// and tries to send back to them the values it detects are missing. // and tries to send back to them the values it detects are missing.
fn handle_pull_requests( fn handle_pull_requests(
&self, &self,
thread_pool: &ThreadPool,
recycler: &PacketsRecycler, recycler: &PacketsRecycler,
requests: Vec<PullData>, requests: Vec<PullData>,
stakes: &HashMap<Pubkey, u64>, stakes: &HashMap<Pubkey, u64>,
@ -1914,8 +1919,12 @@ impl ClusterInfo {
let self_id = self.id(); let self_id = self.id();
let mut pull_responses = { let mut pull_responses = {
let _st = ScopedTimer::from(&self.stats.generate_pull_responses); let _st = ScopedTimer::from(&self.stats.generate_pull_responses);
self.gossip self.gossip.generate_pull_responses(
.generate_pull_responses(&caller_and_filters, output_size_limit, now) thread_pool,
&caller_and_filters,
output_size_limit,
now,
)
}; };
if require_stake_for_gossip { if require_stake_for_gossip {
for resp in &mut pull_responses { for resp in &mut pull_responses {
@ -2516,6 +2525,9 @@ impl ClusterInfo {
match self.run_socket_consume(&receiver, &sender, &thread_pool) { match self.run_socket_consume(&receiver, &sender, &thread_pool) {
Err(GossipError::RecvTimeoutError(RecvTimeoutError::Disconnected)) => break, Err(GossipError::RecvTimeoutError(RecvTimeoutError::Disconnected)) => break,
Err(GossipError::RecvTimeoutError(RecvTimeoutError::Timeout)) => (), Err(GossipError::RecvTimeoutError(RecvTimeoutError::Timeout)) => (),
// A send operation can only fail if the receiving end of a
// channel is disconnected.
Err(GossipError::SendError) => break,
Err(err) => error!("gossip consume: {}", err), Err(err) => error!("gossip consume: {}", err),
Ok(()) => (), Ok(()) => (),
} }
@ -2531,19 +2543,18 @@ impl ClusterInfo {
requests_receiver: Receiver<Vec<(/*from:*/ SocketAddr, Protocol)>>, requests_receiver: Receiver<Vec<(/*from:*/ SocketAddr, Protocol)>>,
response_sender: PacketSender, response_sender: PacketSender,
should_check_duplicate_instance: bool, should_check_duplicate_instance: bool,
exit: &Arc<AtomicBool>, exit: Arc<AtomicBool>,
) -> JoinHandle<()> { ) -> JoinHandle<()> {
let exit = exit.clone(); let mut last_print = Instant::now();
let recycler = PacketsRecycler::default(); let recycler = PacketsRecycler::default();
let thread_pool = ThreadPoolBuilder::new()
.num_threads(get_thread_count().min(8))
.thread_name(|i| format!("sol-gossip-work-{}", i))
.build()
.unwrap();
Builder::new() Builder::new()
.name("solana-listen".to_string()) .name("solana-listen".to_string())
.spawn(move || { .spawn(move || {
let thread_pool = ThreadPoolBuilder::new()
.num_threads(std::cmp::min(get_thread_count(), 8))
.thread_name(|i| format!("sol-gossip-work-{}", i))
.build()
.unwrap();
let mut last_print = Instant::now();
while !exit.load(Ordering::Relaxed) { while !exit.load(Ordering::Relaxed) {
if let Err(err) = self.run_listen( if let Err(err) = self.run_listen(
&recycler, &recycler,

View File

@ -240,11 +240,18 @@ impl CrdsGossip {
pub fn generate_pull_responses( pub fn generate_pull_responses(
&self, &self,
thread_pool: &ThreadPool,
filters: &[(CrdsValue, CrdsFilter)], filters: &[(CrdsValue, CrdsFilter)],
output_size_limit: usize, // Limit number of crds values returned. output_size_limit: usize, // Limit number of crds values returned.
now: u64, now: u64,
) -> Vec<Vec<CrdsValue>> { ) -> Vec<Vec<CrdsValue>> {
CrdsGossipPull::generate_pull_responses(&self.crds, filters, output_size_limit, now) CrdsGossipPull::generate_pull_responses(
thread_pool,
&self.crds,
filters,
output_size_limit,
now,
)
} }
pub fn filter_pull_responses( pub fn filter_pull_responses(

View File

@ -22,7 +22,6 @@ use {
ping_pong::PingCache, ping_pong::PingCache,
weighted_shuffle::WeightedShuffle, weighted_shuffle::WeightedShuffle,
}, },
itertools::Itertools,
lru::LruCache, lru::LruCache,
rand::Rng, rand::Rng,
rayon::{prelude::*, ThreadPool}, rayon::{prelude::*, ThreadPool},
@ -39,7 +38,7 @@ use {
iter::{repeat, repeat_with}, iter::{repeat, repeat_with},
net::SocketAddr, net::SocketAddr,
sync::{ sync::{
atomic::{AtomicUsize, Ordering}, atomic::{AtomicI64, AtomicUsize, Ordering},
Mutex, RwLock, Mutex, RwLock,
}, },
time::{Duration, Instant}, time::{Duration, Instant},
@ -356,12 +355,13 @@ impl CrdsGossipPull {
/// Create gossip responses to pull requests /// Create gossip responses to pull requests
pub(crate) fn generate_pull_responses( pub(crate) fn generate_pull_responses(
thread_pool: &ThreadPool,
crds: &RwLock<Crds>, crds: &RwLock<Crds>,
requests: &[(CrdsValue, CrdsFilter)], requests: &[(CrdsValue, CrdsFilter)],
output_size_limit: usize, // Limit number of crds values returned. output_size_limit: usize, // Limit number of crds values returned.
now: u64, now: u64,
) -> Vec<Vec<CrdsValue>> { ) -> Vec<Vec<CrdsValue>> {
Self::filter_crds_values(crds, requests, output_size_limit, now) Self::filter_crds_values(thread_pool, crds, requests, output_size_limit, now)
} }
// Checks if responses should be inserted and // Checks if responses should be inserted and
@ -508,9 +508,10 @@ impl CrdsGossipPull {
/// Filter values that fail the bloom filter up to `max_bytes`. /// Filter values that fail the bloom filter up to `max_bytes`.
fn filter_crds_values( fn filter_crds_values(
thread_pool: &ThreadPool,
crds: &RwLock<Crds>, crds: &RwLock<Crds>,
filters: &[(CrdsValue, CrdsFilter)], filters: &[(CrdsValue, CrdsFilter)],
mut output_size_limit: usize, // Limit number of crds values returned. output_size_limit: usize, // Limit number of crds values returned.
now: u64, now: u64,
) -> Vec<Vec<CrdsValue>> { ) -> Vec<Vec<CrdsValue>> {
let msg_timeout = CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS; let msg_timeout = CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS;
@ -518,50 +519,57 @@ impl CrdsGossipPull {
//skip filters from callers that are too old //skip filters from callers that are too old
let caller_wallclock_window = let caller_wallclock_window =
now.saturating_sub(msg_timeout)..now.saturating_add(msg_timeout); now.saturating_sub(msg_timeout)..now.saturating_add(msg_timeout);
let mut dropped_requests = 0; let dropped_requests = AtomicUsize::default();
let mut total_skipped = 0; let total_skipped = AtomicUsize::default();
let output_size_limit = output_size_limit.try_into().unwrap_or(i64::MAX);
let output_size_limit = AtomicI64::new(output_size_limit);
let crds = crds.read().unwrap(); let crds = crds.read().unwrap();
let ret: Vec<_> = filters let apply_filter = |caller: &CrdsValue, filter: &CrdsFilter| {
.iter() if output_size_limit.load(Ordering::Relaxed) <= 0 {
.map(|(caller, filter)| { return Vec::default();
if output_size_limit == 0 { }
return None; let caller_wallclock = caller.wallclock();
if !caller_wallclock_window.contains(&caller_wallclock) {
dropped_requests.fetch_add(1, Ordering::Relaxed);
return Vec::default();
}
let caller_pubkey = caller.pubkey();
let caller_wallclock = caller_wallclock.checked_add(jitter).unwrap_or(0);
let pred = |entry: &&VersionedCrdsValue| {
debug_assert!(filter.test_mask(&entry.value_hash));
// Skip values that are too new.
if entry.value.wallclock() > caller_wallclock {
total_skipped.fetch_add(1, Ordering::Relaxed);
false
} else {
!filter.filter_contains(&entry.value_hash)
&& (entry.value.pubkey() != caller_pubkey
|| entry.value.should_force_push(&caller_pubkey))
} }
let caller_wallclock = caller.wallclock(); };
if !caller_wallclock_window.contains(&caller_wallclock) { let out: Vec<_> = crds
dropped_requests += 1; .filter_bitmask(filter.mask, filter.mask_bits)
return Some(vec![]); .filter(pred)
} .map(|entry| entry.value.clone())
let caller_pubkey = caller.pubkey(); .take(output_size_limit.load(Ordering::Relaxed).max(0) as usize)
let caller_wallclock = caller_wallclock.checked_add(jitter).unwrap_or(0); .collect();
let pred = |entry: &&VersionedCrdsValue| { output_size_limit.fetch_sub(out.len() as i64, Ordering::Relaxed);
debug_assert!(filter.test_mask(&entry.value_hash)); out
// Skip values that are too new. };
if entry.value.wallclock() > caller_wallclock { let ret: Vec<_> = thread_pool.install(|| {
total_skipped += 1; filters
false .par_iter()
} else { .map(|(caller, filter)| apply_filter(caller, filter))
!filter.filter_contains(&entry.value_hash) .collect()
&& (entry.value.pubkey() != caller_pubkey });
|| entry.value.should_force_push(&caller_pubkey))
}
};
let out: Vec<_> = crds
.filter_bitmask(filter.mask, filter.mask_bits)
.filter(pred)
.map(|entry| entry.value.clone())
.take(output_size_limit)
.collect();
output_size_limit -= out.len();
Some(out)
})
.while_some()
.collect();
inc_new_counter_info!( inc_new_counter_info!(
"gossip_filter_crds_values-dropped_requests", "gossip_filter_crds_values-dropped_requests",
dropped_requests + filters.len() - ret.len() dropped_requests.into_inner()
);
inc_new_counter_info!(
"gossip_filter_crds_values-dropped_values",
total_skipped.into_inner()
); );
inc_new_counter_info!("gossip_filter_crds_values-dropped_values", total_skipped);
ret ret
} }
@ -1213,10 +1221,11 @@ pub(crate) mod tests {
let (_, filters) = req.unwrap(); let (_, filters) = req.unwrap();
let mut filters: Vec<_> = filters.into_iter().map(|f| (caller.clone(), f)).collect(); let mut filters: Vec<_> = filters.into_iter().map(|f| (caller.clone(), f)).collect();
let rsp = CrdsGossipPull::generate_pull_responses( let rsp = CrdsGossipPull::generate_pull_responses(
&thread_pool,
&dest_crds, &dest_crds,
&filters, &filters,
/*output_size_limit=*/ usize::MAX, usize::MAX, // output_size_limit
0, 0, // now
); );
assert_eq!(rsp[0].len(), 0); assert_eq!(rsp[0].len(), 0);
@ -1233,10 +1242,11 @@ pub(crate) mod tests {
//should skip new value since caller is to old //should skip new value since caller is to old
let rsp = CrdsGossipPull::generate_pull_responses( let rsp = CrdsGossipPull::generate_pull_responses(
&thread_pool,
&dest_crds, &dest_crds,
&filters, &filters,
/*output_size_limit=*/ usize::MAX, usize::MAX, // output_size_limit
CRDS_GOSSIP_PULL_MSG_TIMEOUT_MS, CRDS_GOSSIP_PULL_MSG_TIMEOUT_MS, // now
); );
assert_eq!(rsp[0].len(), 0); assert_eq!(rsp[0].len(), 0);
assert_eq!(filters.len(), MIN_NUM_BLOOM_FILTERS); assert_eq!(filters.len(), MIN_NUM_BLOOM_FILTERS);
@ -1251,9 +1261,10 @@ pub(crate) mod tests {
.collect::<Vec<_>>() .collect::<Vec<_>>()
}); });
let rsp = CrdsGossipPull::generate_pull_responses( let rsp = CrdsGossipPull::generate_pull_responses(
&thread_pool,
&dest_crds, &dest_crds,
&filters, &filters,
/*output_size_limit=*/ usize::MAX, usize::MAX, // output_size_limit
CRDS_GOSSIP_PULL_MSG_TIMEOUT_MS, CRDS_GOSSIP_PULL_MSG_TIMEOUT_MS,
); );
assert_eq!(rsp.len(), 2 * MIN_NUM_BLOOM_FILTERS); assert_eq!(rsp.len(), 2 * MIN_NUM_BLOOM_FILTERS);
@ -1304,10 +1315,11 @@ pub(crate) mod tests {
let (_, filters) = req.unwrap(); let (_, filters) = req.unwrap();
let filters: Vec<_> = filters.into_iter().map(|f| (caller.clone(), f)).collect(); let filters: Vec<_> = filters.into_iter().map(|f| (caller.clone(), f)).collect();
let rsp = CrdsGossipPull::generate_pull_responses( let rsp = CrdsGossipPull::generate_pull_responses(
&thread_pool,
&dest_crds, &dest_crds,
&filters, &filters,
/*output_size_limit=*/ usize::MAX, usize::MAX, // output_size_limit
0, 0, // now
); );
let callers = filters.into_iter().map(|(caller, _)| caller); let callers = filters.into_iter().map(|(caller, _)| caller);
CrdsGossipPull::process_pull_requests(&dest_crds, callers, 1); CrdsGossipPull::process_pull_requests(&dest_crds, callers, 1);
@ -1382,10 +1394,11 @@ pub(crate) mod tests {
let (_, filters) = req.unwrap(); let (_, filters) = req.unwrap();
let filters: Vec<_> = filters.into_iter().map(|f| (caller.clone(), f)).collect(); let filters: Vec<_> = filters.into_iter().map(|f| (caller.clone(), f)).collect();
let rsp = CrdsGossipPull::generate_pull_responses( let rsp = CrdsGossipPull::generate_pull_responses(
&thread_pool,
&dest_crds, &dest_crds,
&filters, &filters,
/*output_size_limit=*/ usize::MAX, usize::MAX, // output_size_limit
0, 0, // now
); );
CrdsGossipPull::process_pull_requests( CrdsGossipPull::process_pull_requests(
&dest_crds, &dest_crds,

View File

@ -58,27 +58,27 @@ impl GossipService {
1, 1,
false, false,
); );
let (response_sender, response_receiver) = channel();
let (consume_sender, listen_receiver) = channel(); let (consume_sender, listen_receiver) = channel();
// https://github.com/rust-lang/rust/issues/39364#issuecomment-634545136
let _consume_sender = consume_sender.clone();
let t_socket_consume = cluster_info.clone().start_socket_consume_thread( let t_socket_consume = cluster_info.clone().start_socket_consume_thread(
request_receiver, request_receiver,
consume_sender, consume_sender,
exit.clone(), exit.clone(),
); );
let t_listen = ClusterInfo::listen( let (response_sender, response_receiver) = channel();
cluster_info.clone(), let t_listen = cluster_info.clone().listen(
bank_forks.clone(), bank_forks.clone(),
listen_receiver, listen_receiver,
response_sender.clone(), response_sender.clone(),
should_check_duplicate_instance, should_check_duplicate_instance,
exit, exit.clone(),
); );
let t_gossip = ClusterInfo::gossip( let t_gossip = cluster_info.clone().gossip(
cluster_info.clone(),
bank_forks, bank_forks,
response_sender, response_sender,
gossip_validators, gossip_validators,
exit, exit.clone(),
); );
// To work around: // To work around:
// https://github.com/rust-lang/rust/issues/54267 // https://github.com/rust-lang/rust/issues/54267

View File

@ -505,7 +505,7 @@ fn network_run_pull(
.collect() .collect()
}; };
let transfered: Vec<_> = requests let transfered: Vec<_> = requests
.into_par_iter() .into_iter()
.map(|(to, filters, caller_info)| { .map(|(to, filters, caller_info)| {
let mut bytes: usize = 0; let mut bytes: usize = 0;
let mut msgs: usize = 0; let mut msgs: usize = 0;
@ -527,8 +527,9 @@ fn network_run_pull(
let rsp = node let rsp = node
.gossip .gossip
.generate_pull_responses( .generate_pull_responses(
thread_pool,
&filters, &filters,
/*output_size_limit=*/ usize::MAX, usize::MAX, // output_size_limit
now, now,
) )
.into_iter() .into_iter()