tracks number of staked/stale/dead nodes in turbine cluster-nodes (#27915)

This commit is contained in:
behzad nouri 2022-09-19 18:16:04 +00:00 committed by GitHub
parent 5afefe7ea9
commit abfb996135
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 31 additions and 30 deletions

View File

@ -381,14 +381,7 @@ fn update_peer_stats(
last_datapoint_submit: &AtomicInterval, last_datapoint_submit: &AtomicInterval,
) { ) {
if last_datapoint_submit.should_update(1000) { if last_datapoint_submit.should_update(1000) {
let now = timestamp(); cluster_nodes.submit_metrics("cluster_nodes_broadcast", timestamp());
let num_live_peers = cluster_nodes.num_peers_live(now);
let broadcast_len = cluster_nodes.num_peers() + 1;
datapoint_info!(
"cluster_info-num_nodes",
("live_count", num_live_peers, i64),
("broadcast_count", broadcast_len, i64)
);
} }
} }

View File

@ -87,25 +87,34 @@ impl Node {
} }
impl<T> ClusterNodes<T> { impl<T> ClusterNodes<T> {
pub(crate) fn num_peers(&self) -> usize { pub(crate) fn submit_metrics(&self, name: &'static str, now: u64) {
self.nodes.len().saturating_sub(1) let mut num_nodes_dead = 0;
} let mut num_nodes_staked = 0;
let mut num_nodes_stale = 0;
// A peer is considered live if they generated their contact info recently. for node in &self.nodes {
pub(crate) fn num_peers_live(&self, now: u64) -> usize { if node.stake != 0u64 {
self.nodes num_nodes_staked += 1;
.iter() }
.filter(|node| node.pubkey() != self.pubkey) match node.contact_info() {
.filter_map(|node| node.contact_info()) None => {
.filter(|node| { num_nodes_dead += 1;
let elapsed = if node.wallclock < now { }
now - node.wallclock Some(node) => {
} else { let age = now.saturating_sub(node.wallclock);
node.wallclock - now if age > CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS {
}; num_nodes_stale += 1;
elapsed < CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS }
}) }
.count() }
}
num_nodes_stale += num_nodes_dead;
datapoint_info!(
name,
("num_nodes", self.nodes.len(), i64),
("num_nodes_dead", num_nodes_dead, i64),
("num_nodes_staked", num_nodes_staked, i64),
("num_nodes_stale", num_nodes_stale, i64),
);
} }
} }

View File

@ -89,10 +89,9 @@ impl RetransmitStats {
if self.since.elapsed() < SUBMIT_CADENCE { if self.since.elapsed() < SUBMIT_CADENCE {
return; return;
} }
let num_peers = cluster_nodes_cache cluster_nodes_cache
.get(root_bank.slot(), root_bank, working_bank, cluster_info) .get(root_bank.slot(), root_bank, working_bank, cluster_info)
.num_peers(); .submit_metrics("cluster_nodes_retransmit", timestamp());
datapoint_info!("retransmit-num_nodes", ("count", num_peers, i64));
datapoint_info!( datapoint_info!(
"retransmit-stage", "retransmit-stage",
("total_time", self.total_time, i64), ("total_time", self.total_time, i64),