tracks number of staked/stale/dead nodes in turbine cluster-nodes (#27915)

This commit is contained in:
behzad nouri 2022-09-19 18:16:04 +00:00 committed by GitHub
parent 5afefe7ea9
commit abfb996135
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 31 additions and 30 deletions

View File

@ -381,14 +381,7 @@ fn update_peer_stats(
last_datapoint_submit: &AtomicInterval,
) {
if last_datapoint_submit.should_update(1000) {
let now = timestamp();
let num_live_peers = cluster_nodes.num_peers_live(now);
let broadcast_len = cluster_nodes.num_peers() + 1;
datapoint_info!(
"cluster_info-num_nodes",
("live_count", num_live_peers, i64),
("broadcast_count", broadcast_len, i64)
);
cluster_nodes.submit_metrics("cluster_nodes_broadcast", timestamp());
}
}

View File

@ -87,25 +87,34 @@ impl Node {
}
impl<T> ClusterNodes<T> {
pub(crate) fn num_peers(&self) -> usize {
self.nodes.len().saturating_sub(1)
}
// A peer is considered live if they generated their contact info recently.
pub(crate) fn num_peers_live(&self, now: u64) -> usize {
self.nodes
.iter()
.filter(|node| node.pubkey() != self.pubkey)
.filter_map(|node| node.contact_info())
.filter(|node| {
let elapsed = if node.wallclock < now {
now - node.wallclock
} else {
node.wallclock - now
};
elapsed < CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS
})
.count()
pub(crate) fn submit_metrics(&self, name: &'static str, now: u64) {
let mut num_nodes_dead = 0;
let mut num_nodes_staked = 0;
let mut num_nodes_stale = 0;
for node in &self.nodes {
if node.stake != 0u64 {
num_nodes_staked += 1;
}
match node.contact_info() {
None => {
num_nodes_dead += 1;
}
Some(node) => {
let age = now.saturating_sub(node.wallclock);
if age > CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS {
num_nodes_stale += 1;
}
}
}
}
num_nodes_stale += num_nodes_dead;
datapoint_info!(
name,
("num_nodes", self.nodes.len(), i64),
("num_nodes_dead", num_nodes_dead, i64),
("num_nodes_staked", num_nodes_staked, i64),
("num_nodes_stale", num_nodes_stale, i64),
);
}
}

View File

@ -89,10 +89,9 @@ impl RetransmitStats {
if self.since.elapsed() < SUBMIT_CADENCE {
return;
}
let num_peers = cluster_nodes_cache
cluster_nodes_cache
.get(root_bank.slot(), root_bank, working_bank, cluster_info)
.num_peers();
datapoint_info!("retransmit-num_nodes", ("count", num_peers, i64));
.submit_metrics("cluster_nodes_retransmit", timestamp());
datapoint_info!(
"retransmit-stage",
("total_time", self.total_time, i64),