improves threads' utilization in processing gossip packets (#12962)
ClusterInfo::process_packets handles incoming packets in a thread_pool: https://github.com/solana-labs/solana/blob/87311cce7/core/src/cluster_info.rs#L2118-L2134 However, profiling runtime shows that threads are not well utilized and a lot of the processing is done sequentially. This commit redistributes the work done in parallel. Testing on a gce cluster shows 20%+ improvement in processing gossip packets with much smaller variations.
This commit is contained in:
parent
cca318f805
commit
75d62ca095
|
@ -211,6 +211,7 @@ struct GossipStats {
|
||||||
new_push_requests2: Counter,
|
new_push_requests2: Counter,
|
||||||
new_push_requests_num: Counter,
|
new_push_requests_num: Counter,
|
||||||
filter_pull_response: Counter,
|
filter_pull_response: Counter,
|
||||||
|
process_gossip_packets_time: Counter,
|
||||||
process_pull_response: Counter,
|
process_pull_response: Counter,
|
||||||
process_pull_response_count: Counter,
|
process_pull_response_count: Counter,
|
||||||
process_pull_response_len: Counter,
|
process_pull_response_len: Counter,
|
||||||
|
@ -366,6 +367,59 @@ enum Protocol {
|
||||||
PruneMessage(Pubkey, PruneData),
|
PruneMessage(Pubkey, PruneData),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Protocol {
|
||||||
|
fn par_verify(self) -> Option<Self> {
|
||||||
|
match self {
|
||||||
|
Protocol::PullRequest(_, ref caller) => {
|
||||||
|
if caller.verify() {
|
||||||
|
Some(self)
|
||||||
|
} else {
|
||||||
|
inc_new_counter_info!("cluster_info-gossip_pull_request_verify_fail", 1);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Protocol::PullResponse(from, data) => {
|
||||||
|
let size = data.len();
|
||||||
|
let data: Vec<_> = data.into_par_iter().filter(Signable::verify).collect();
|
||||||
|
if size != data.len() {
|
||||||
|
inc_new_counter_info!(
|
||||||
|
"cluster_info-gossip_pull_response_verify_fail",
|
||||||
|
size - data.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if data.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(Protocol::PullResponse(from, data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Protocol::PushMessage(from, data) => {
|
||||||
|
let size = data.len();
|
||||||
|
let data: Vec<_> = data.into_par_iter().filter(Signable::verify).collect();
|
||||||
|
if size != data.len() {
|
||||||
|
inc_new_counter_info!(
|
||||||
|
"cluster_info-gossip_push_msg_verify_fail",
|
||||||
|
size - data.len()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if data.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(Protocol::PushMessage(from, data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Protocol::PruneMessage(_, ref data) => {
|
||||||
|
if data.verify() {
|
||||||
|
Some(self)
|
||||||
|
} else {
|
||||||
|
inc_new_counter_debug!("cluster_info-gossip_prune_msg_verify_fail", 1);
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Sanitize for Protocol {
|
impl Sanitize for Protocol {
|
||||||
fn sanitize(&self) -> std::result::Result<(), SanitizeError> {
|
fn sanitize(&self) -> std::result::Result<(), SanitizeError> {
|
||||||
match self {
|
match self {
|
||||||
|
@ -1650,7 +1704,7 @@ impl ClusterInfo {
|
||||||
&self,
|
&self,
|
||||||
recycler: &PacketsRecycler,
|
recycler: &PacketsRecycler,
|
||||||
stakes: &HashMap<Pubkey, u64>,
|
stakes: &HashMap<Pubkey, u64>,
|
||||||
packets: Packets,
|
packets: Vec<(SocketAddr, Protocol)>,
|
||||||
response_sender: &PacketSender,
|
response_sender: &PacketSender,
|
||||||
feature_set: Option<&FeatureSet>,
|
feature_set: Option<&FeatureSet>,
|
||||||
epoch_time_ms: u64,
|
epoch_time_ms: u64,
|
||||||
|
@ -1664,20 +1718,11 @@ impl ClusterInfo {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.make_timeouts(&stakes, epoch_time_ms);
|
.make_timeouts(&stakes, epoch_time_ms);
|
||||||
let mut pull_responses = HashMap::new();
|
let mut pull_responses = HashMap::new();
|
||||||
packets.packets.iter().for_each(|packet| {
|
for (from_addr, packet) in packets {
|
||||||
let from_addr = packet.meta.addr();
|
match packet {
|
||||||
limited_deserialize(&packet.data[..packet.meta.size])
|
|
||||||
.into_iter()
|
|
||||||
.filter(|r: &Protocol| r.sanitize().is_ok())
|
|
||||||
.for_each(|request| match request {
|
|
||||||
Protocol::PullRequest(filter, caller) => {
|
Protocol::PullRequest(filter, caller) => {
|
||||||
let start = allocated.get();
|
let start = allocated.get();
|
||||||
if !caller.verify() {
|
if let Some(contact_info) = caller.contact_info() {
|
||||||
inc_new_counter_info!(
|
|
||||||
"cluster_info-gossip_pull_request_verify_fail",
|
|
||||||
1
|
|
||||||
);
|
|
||||||
} else if let Some(contact_info) = caller.contact_info() {
|
|
||||||
if contact_info.id == self.id() {
|
if contact_info.id == self.id() {
|
||||||
warn!("PullRequest ignored, I'm talking to myself");
|
warn!("PullRequest ignored, I'm talking to myself");
|
||||||
inc_new_counter_debug!("cluster_info-window-request-loopback", 1);
|
inc_new_counter_debug!("cluster_info-window-request-loopback", 1);
|
||||||
|
@ -1699,18 +1744,8 @@ impl ClusterInfo {
|
||||||
("pull_request", (allocated.get() - start) as i64, i64),
|
("pull_request", (allocated.get() - start) as i64, i64),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Protocol::PullResponse(from, mut data) => {
|
Protocol::PullResponse(from, data) => {
|
||||||
let start = allocated.get();
|
let start = allocated.get();
|
||||||
data.retain(|v| {
|
|
||||||
let ret = v.verify();
|
|
||||||
if !ret {
|
|
||||||
inc_new_counter_info!(
|
|
||||||
"cluster_info-gossip_pull_response_verify_fail",
|
|
||||||
1
|
|
||||||
);
|
|
||||||
}
|
|
||||||
ret
|
|
||||||
});
|
|
||||||
let pull_entry = pull_responses.entry(from).or_insert_with(Vec::new);
|
let pull_entry = pull_responses.entry(from).or_insert_with(Vec::new);
|
||||||
pull_entry.extend(data);
|
pull_entry.extend(data);
|
||||||
datapoint_debug!(
|
datapoint_debug!(
|
||||||
|
@ -1718,18 +1753,8 @@ impl ClusterInfo {
|
||||||
("pull_response", (allocated.get() - start) as i64, i64),
|
("pull_response", (allocated.get() - start) as i64, i64),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
Protocol::PushMessage(from, mut data) => {
|
Protocol::PushMessage(from, data) => {
|
||||||
let start = allocated.get();
|
let start = allocated.get();
|
||||||
data.retain(|v| {
|
|
||||||
let ret = v.verify();
|
|
||||||
if !ret {
|
|
||||||
inc_new_counter_info!(
|
|
||||||
"cluster_info-gossip_push_msg_verify_fail",
|
|
||||||
1
|
|
||||||
);
|
|
||||||
}
|
|
||||||
ret
|
|
||||||
});
|
|
||||||
let rsp = self.handle_push_message(recycler, &from, data, stakes);
|
let rsp = self.handle_push_message(recycler, &from, data, stakes);
|
||||||
if let Some(rsp) = rsp {
|
if let Some(rsp) = rsp {
|
||||||
let _ignore_disconnect = response_sender.send(rsp);
|
let _ignore_disconnect = response_sender.send(rsp);
|
||||||
|
@ -1741,7 +1766,6 @@ impl ClusterInfo {
|
||||||
}
|
}
|
||||||
Protocol::PruneMessage(from, data) => {
|
Protocol::PruneMessage(from, data) => {
|
||||||
let start = allocated.get();
|
let start = allocated.get();
|
||||||
if data.verify() {
|
|
||||||
self.stats.prune_message_count.add_relaxed(1);
|
self.stats.prune_message_count.add_relaxed(1);
|
||||||
self.stats
|
self.stats
|
||||||
.prune_message_len
|
.prune_message_len
|
||||||
|
@ -1763,16 +1787,13 @@ impl ClusterInfo {
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
inc_new_counter_debug!("cluster_info-gossip_prune_msg_verify_fail", 1);
|
|
||||||
}
|
|
||||||
datapoint_debug!(
|
datapoint_debug!(
|
||||||
"solana-gossip-listen-memory",
|
"solana-gossip-listen-memory",
|
||||||
("prune_message", (allocated.get() - start) as i64, i64),
|
("prune_message", (allocated.get() - start) as i64, i64),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
})
|
}
|
||||||
});
|
}
|
||||||
|
|
||||||
for (from, data) in pull_responses {
|
for (from, data) in pull_responses {
|
||||||
self.handle_pull_response(&from, data, &timeouts);
|
self.handle_pull_response(&from, data, &timeouts);
|
||||||
|
@ -2125,12 +2146,31 @@ impl ClusterInfo {
|
||||||
feature_set: Option<&FeatureSet>,
|
feature_set: Option<&FeatureSet>,
|
||||||
epoch_time_ms: u64,
|
epoch_time_ms: u64,
|
||||||
) {
|
) {
|
||||||
let sender = response_sender.clone();
|
let mut timer = Measure::start("process_gossip_packets_time");
|
||||||
thread_pool.install(|| {
|
let packets: Vec<_> = thread_pool.install(|| {
|
||||||
requests.into_par_iter().for_each_with(sender, |s, reqs| {
|
requests
|
||||||
self.handle_packets(&recycler, &stakes, reqs, s, feature_set, epoch_time_ms)
|
.into_par_iter()
|
||||||
});
|
.flat_map(|request| request.packets.into_par_iter())
|
||||||
|
.filter_map(|packet| {
|
||||||
|
let protocol: Protocol =
|
||||||
|
limited_deserialize(&packet.data[..packet.meta.size]).ok()?;
|
||||||
|
protocol.sanitize().ok()?;
|
||||||
|
let protocol = protocol.par_verify()?;
|
||||||
|
Some((packet.meta.addr(), protocol))
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
});
|
});
|
||||||
|
self.handle_packets(
|
||||||
|
recycler,
|
||||||
|
&stakes,
|
||||||
|
packets,
|
||||||
|
response_sender,
|
||||||
|
feature_set,
|
||||||
|
epoch_time_ms,
|
||||||
|
);
|
||||||
|
self.stats
|
||||||
|
.process_gossip_packets_time
|
||||||
|
.add_measure(&mut timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process messages from the network
|
/// Process messages from the network
|
||||||
|
@ -2241,6 +2281,11 @@ impl ClusterInfo {
|
||||||
i64
|
i64
|
||||||
),
|
),
|
||||||
("purge", self.stats.purge.clear(), i64),
|
("purge", self.stats.purge.clear(), i64),
|
||||||
|
(
|
||||||
|
"process_gossip_packets_time",
|
||||||
|
self.stats.process_gossip_packets_time.clear(),
|
||||||
|
i64
|
||||||
|
),
|
||||||
(
|
(
|
||||||
"process_pull_resp",
|
"process_pull_resp",
|
||||||
self.stats.process_pull_response.clear(),
|
self.stats.process_pull_response.clear(),
|
||||||
|
|
|
@ -273,9 +273,9 @@ impl EntryVerificationState {
|
||||||
.zip(entries)
|
.zip(entries)
|
||||||
.all(|((hash, tx_hash), answer)| {
|
.all(|((hash, tx_hash), answer)| {
|
||||||
if answer.num_hashes == 0 {
|
if answer.num_hashes == 0 {
|
||||||
*hash == answer.hash
|
hash == answer.hash
|
||||||
} else {
|
} else {
|
||||||
let mut poh = Poh::new(*hash, None);
|
let mut poh = Poh::new(hash, None);
|
||||||
if let Some(mixin) = tx_hash {
|
if let Some(mixin) = tx_hash {
|
||||||
poh.record(*mixin).unwrap().hash == answer.hash
|
poh.record(*mixin).unwrap().hash == answer.hash
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -165,6 +165,21 @@ impl<'a, T: Clone + Send + Sync + Default + Sized> IntoParallelIterator for &'a
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<T: Clone + Default + Send + Sized> IntoParallelIterator for PinnedVec<T> {
|
||||||
|
type Item = T;
|
||||||
|
type Iter = rayon::vec::IntoIter<T>;
|
||||||
|
|
||||||
|
fn into_par_iter(mut self) -> Self::Iter {
|
||||||
|
if self.pinned {
|
||||||
|
unpin(self.x.as_mut_ptr());
|
||||||
|
self.pinned = false;
|
||||||
|
}
|
||||||
|
self.pinnable = false;
|
||||||
|
self.recycler = None;
|
||||||
|
std::mem::take(&mut self.x).into_par_iter()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<T: Clone + Default + Sized> PinnedVec<T> {
|
impl<T: Clone + Default + Sized> PinnedVec<T> {
|
||||||
pub fn reserve_and_pin(&mut self, size: usize) {
|
pub fn reserve_and_pin(&mut self, size: usize) {
|
||||||
if self.x.capacity() < size {
|
if self.x.capacity() < size {
|
||||||
|
|
Loading…
Reference in New Issue