Budget for gossip traffic (#9550)
This commit is contained in:
parent
3205361163
commit
65a9658b13
|
@ -24,6 +24,11 @@ use crate::{
|
||||||
result::{Error, Result},
|
result::{Error, Result},
|
||||||
weighted_shuffle::weighted_shuffle,
|
weighted_shuffle::weighted_shuffle,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use rand::distributions::{Distribution, WeightedIndex};
|
||||||
|
use rand::SeedableRng;
|
||||||
|
use rand_chacha::ChaChaRng;
|
||||||
|
|
||||||
use bincode::{serialize, serialized_size};
|
use bincode::{serialize, serialized_size};
|
||||||
use core::cmp;
|
use core::cmp;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
@ -31,6 +36,7 @@ use rayon::iter::IntoParallelIterator;
|
||||||
use rayon::iter::ParallelIterator;
|
use rayon::iter::ParallelIterator;
|
||||||
use rayon::ThreadPool;
|
use rayon::ThreadPool;
|
||||||
use solana_ledger::{bank_forks::BankForks, staking_utils};
|
use solana_ledger::{bank_forks::BankForks, staking_utils};
|
||||||
|
use solana_measure::measure::Measure;
|
||||||
use solana_measure::thread_mem_usage;
|
use solana_measure::thread_mem_usage;
|
||||||
use solana_metrics::{datapoint_debug, inc_new_counter_debug, inc_new_counter_error};
|
use solana_metrics::{datapoint_debug, inc_new_counter_debug, inc_new_counter_error};
|
||||||
use solana_net_utils::{
|
use solana_net_utils::{
|
||||||
|
@ -94,6 +100,12 @@ pub enum ClusterInfoError {
|
||||||
BadGossipAddress,
|
BadGossipAddress,
|
||||||
}
|
}
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
pub struct DataBudget {
|
||||||
|
bytes: usize, // amount of bytes we have in the budget to send
|
||||||
|
last_timestamp_ms: u64, // Last time that we upped the bytes count,
|
||||||
|
// used to detect when to up the bytes budget again
|
||||||
|
}
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct ClusterInfo {
|
pub struct ClusterInfo {
|
||||||
/// The network
|
/// The network
|
||||||
pub gossip: CrdsGossip,
|
pub gossip: CrdsGossip,
|
||||||
|
@ -101,6 +113,8 @@ pub struct ClusterInfo {
|
||||||
pub(crate) keypair: Arc<Keypair>,
|
pub(crate) keypair: Arc<Keypair>,
|
||||||
/// The network entrypoint
|
/// The network entrypoint
|
||||||
entrypoint: Option<ContactInfo>,
|
entrypoint: Option<ContactInfo>,
|
||||||
|
|
||||||
|
outbound_budget: DataBudget,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default, Clone)]
|
#[derive(Default, Clone)]
|
||||||
|
@ -197,6 +211,17 @@ enum Protocol {
|
||||||
PruneMessage(Pubkey, PruneData),
|
PruneMessage(Pubkey, PruneData),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Rating for pull requests
|
||||||
|
// A response table is generated as a
|
||||||
|
// 2-d table arranged by target nodes and a
|
||||||
|
// list of responses for that node,
|
||||||
|
// to/responses_index is a location in that table.
|
||||||
|
struct ResponseScore {
|
||||||
|
to: usize, // to, index of who the response is to
|
||||||
|
responses_index: usize, // index into the list of responses for a given to
|
||||||
|
score: u64, // Relative score of the response
|
||||||
|
}
|
||||||
|
|
||||||
impl ClusterInfo {
|
impl ClusterInfo {
|
||||||
/// Without a valid keypair gossip will not function. Only useful for tests.
|
/// Without a valid keypair gossip will not function. Only useful for tests.
|
||||||
pub fn new_with_invalid_keypair(contact_info: ContactInfo) -> Self {
|
pub fn new_with_invalid_keypair(contact_info: ContactInfo) -> Self {
|
||||||
|
@ -208,6 +233,10 @@ impl ClusterInfo {
|
||||||
gossip: CrdsGossip::default(),
|
gossip: CrdsGossip::default(),
|
||||||
keypair,
|
keypair,
|
||||||
entrypoint: None,
|
entrypoint: None,
|
||||||
|
outbound_budget: DataBudget {
|
||||||
|
bytes: 0,
|
||||||
|
last_timestamp_ms: 0,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
let id = contact_info.id;
|
let id = contact_info.id;
|
||||||
me.gossip.set_self(&id);
|
me.gossip.set_self(&id);
|
||||||
|
@ -1335,20 +1364,43 @@ impl ClusterInfo {
|
||||||
})
|
})
|
||||||
});
|
});
|
||||||
// process the collected pulls together
|
// process the collected pulls together
|
||||||
let rsp = Self::handle_pull_requests(me, recycler, gossip_pull_data);
|
let rsp = Self::handle_pull_requests(me, recycler, gossip_pull_data, stakes);
|
||||||
if let Some(rsp) = rsp {
|
if let Some(rsp) = rsp {
|
||||||
let _ignore_disconnect = response_sender.send(rsp);
|
let _ignore_disconnect = response_sender.send(rsp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pull requests take an incoming bloom filter of contained entries from a node
|
||||||
|
// and tries to send back to them the values it detects are missing.
|
||||||
fn handle_pull_requests(
|
fn handle_pull_requests(
|
||||||
me: &Arc<RwLock<Self>>,
|
me: &Arc<RwLock<Self>>,
|
||||||
recycler: &PacketsRecycler,
|
recycler: &PacketsRecycler,
|
||||||
requests: Vec<PullData>,
|
requests: Vec<PullData>,
|
||||||
|
stakes: &HashMap<Pubkey, u64>,
|
||||||
) -> Option<Packets> {
|
) -> Option<Packets> {
|
||||||
// split the requests into addrs and filters
|
// split the requests into addrs and filters
|
||||||
let mut caller_and_filters = vec![];
|
let mut caller_and_filters = vec![];
|
||||||
let mut addrs = vec![];
|
let mut addrs = vec![];
|
||||||
|
let mut time = Measure::start("handle_pull_requests");
|
||||||
|
{
|
||||||
|
let mut cluster_info = me.write().unwrap();
|
||||||
|
|
||||||
|
let now = timestamp();
|
||||||
|
const INTERVAL_MS: u64 = 100;
|
||||||
|
// allow 50kBps per staked validator, epoch slots + votes ~= 1.5kB/slot ~= 4kB/s
|
||||||
|
const BYTES_PER_INTERVAL: usize = 5000;
|
||||||
|
const MAX_BUDGET_MULTIPLE: usize = 5; // allow budget build-up to 5x the interval default
|
||||||
|
|
||||||
|
if now - cluster_info.outbound_budget.last_timestamp_ms > INTERVAL_MS {
|
||||||
|
let len = std::cmp::max(stakes.len(), 2);
|
||||||
|
cluster_info.outbound_budget.bytes += len * BYTES_PER_INTERVAL;
|
||||||
|
cluster_info.outbound_budget.bytes = std::cmp::min(
|
||||||
|
cluster_info.outbound_budget.bytes,
|
||||||
|
MAX_BUDGET_MULTIPLE * len * BYTES_PER_INTERVAL,
|
||||||
|
);
|
||||||
|
cluster_info.outbound_budget.last_timestamp_ms = now;
|
||||||
|
}
|
||||||
|
}
|
||||||
for pull_data in requests {
|
for pull_data in requests {
|
||||||
caller_and_filters.push((pull_data.caller, pull_data.filter));
|
caller_and_filters.push((pull_data.caller, pull_data.filter));
|
||||||
addrs.push(pull_data.from_addr);
|
addrs.push(pull_data.from_addr);
|
||||||
|
@ -1360,30 +1412,101 @@ impl ClusterInfo {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.gossip
|
.gossip
|
||||||
.process_pull_requests(caller_and_filters, now);
|
.process_pull_requests(caller_and_filters, now);
|
||||||
let mut packets = Packets::new_with_recycler(recycler.clone(), 64, "handle_pull_requests");
|
|
||||||
pull_responses
|
// Filter bad to addresses
|
||||||
|
let pull_responses: Vec<_> = pull_responses
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.zip(addrs.into_iter())
|
.zip(addrs.into_iter())
|
||||||
.for_each(|(response, from_addr)| {
|
.filter_map(|(responses, from_addr)| {
|
||||||
if !from_addr.ip().is_unspecified() && from_addr.port() != 0 {
|
if !from_addr.ip().is_unspecified()
|
||||||
let len = response.len();
|
&& from_addr.port() != 0
|
||||||
trace!("get updates since response {}", len);
|
&& !responses.is_empty()
|
||||||
inc_new_counter_debug!("cluster_info-pull_request-rsp", len);
|
{
|
||||||
Self::split_gossip_messages(response)
|
Some((responses, from_addr))
|
||||||
.into_iter()
|
|
||||||
.for_each(|payload| {
|
|
||||||
let protocol = Protocol::PullResponse(self_id, payload);
|
|
||||||
// The remote node may not know its public IP:PORT. Instead of responding to the caller's
|
|
||||||
// gossip addr, respond to the origin addr. The last origin addr is picked from the list of
|
|
||||||
// addrs.
|
|
||||||
packets
|
|
||||||
.packets
|
|
||||||
.push(Packet::from_data(&from_addr, protocol))
|
|
||||||
})
|
|
||||||
} else {
|
} else {
|
||||||
trace!("Dropping Gossip pull response, as destination is unknown");
|
None
|
||||||
}
|
}
|
||||||
});
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
if pull_responses.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut stats: Vec<_> = pull_responses
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(i, (responses, _from_addr))| {
|
||||||
|
let score: u64 = if stakes.get(&responses[0].pubkey()).is_some() {
|
||||||
|
2
|
||||||
|
} else {
|
||||||
|
1
|
||||||
|
};
|
||||||
|
responses
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.map(|(j, _response)| ResponseScore {
|
||||||
|
to: i,
|
||||||
|
responses_index: j,
|
||||||
|
score,
|
||||||
|
})
|
||||||
|
.collect::<Vec<ResponseScore>>()
|
||||||
|
})
|
||||||
|
.flatten()
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
stats.sort_by(|a, b| a.score.cmp(&b.score));
|
||||||
|
let weights: Vec<_> = stats.iter().map(|stat| stat.score).collect();
|
||||||
|
|
||||||
|
let seed = [48u8; 32];
|
||||||
|
let rng = &mut ChaChaRng::from_seed(seed);
|
||||||
|
let weighted_index = WeightedIndex::new(weights).unwrap();
|
||||||
|
|
||||||
|
let mut packets = Packets::new_with_recycler(recycler.clone(), 64, "handle_pull_requests");
|
||||||
|
let mut total_bytes = 0;
|
||||||
|
let outbound_budget = me.read().unwrap().outbound_budget.bytes;
|
||||||
|
let mut sent = HashSet::new();
|
||||||
|
while sent.len() < stats.len() {
|
||||||
|
let index = weighted_index.sample(rng);
|
||||||
|
if sent.contains(&index) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
sent.insert(index);
|
||||||
|
let stat = &stats[index];
|
||||||
|
let from_addr = pull_responses[stat.to].1;
|
||||||
|
let response = pull_responses[stat.to].0[stat.responses_index].clone();
|
||||||
|
let protocol = Protocol::PullResponse(self_id, vec![response]);
|
||||||
|
packets
|
||||||
|
.packets
|
||||||
|
.push(Packet::from_data(&from_addr, protocol));
|
||||||
|
let len = packets.packets.len();
|
||||||
|
total_bytes += packets.packets[len - 1].meta.size;
|
||||||
|
|
||||||
|
if total_bytes > outbound_budget {
|
||||||
|
inc_new_counter_info!("gossip_pull_request-no_budget", 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let mut cluster_info = me.write().unwrap();
|
||||||
|
cluster_info.outbound_budget.bytes = cluster_info
|
||||||
|
.outbound_budget
|
||||||
|
.bytes
|
||||||
|
.saturating_sub(total_bytes);
|
||||||
|
}
|
||||||
|
time.stop();
|
||||||
|
inc_new_counter_info!("gossip_pull_request-sent_requests", sent.len());
|
||||||
|
inc_new_counter_info!(
|
||||||
|
"gossip_pull_request-dropped_requests",
|
||||||
|
stats.len() - sent.len()
|
||||||
|
);
|
||||||
|
debug!(
|
||||||
|
"handle_pull_requests: {} sent: {} total: {} total_bytes: {}",
|
||||||
|
time,
|
||||||
|
sent.len(),
|
||||||
|
stats.len(),
|
||||||
|
total_bytes
|
||||||
|
);
|
||||||
if packets.is_empty() {
|
if packets.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue