tracks number of staked/stale/dead nodes in turbine cluster-nodes (#27915)
This commit is contained in:
parent
5afefe7ea9
commit
abfb996135
|
@ -381,14 +381,7 @@ fn update_peer_stats(
|
||||||
last_datapoint_submit: &AtomicInterval,
|
last_datapoint_submit: &AtomicInterval,
|
||||||
) {
|
) {
|
||||||
if last_datapoint_submit.should_update(1000) {
|
if last_datapoint_submit.should_update(1000) {
|
||||||
let now = timestamp();
|
cluster_nodes.submit_metrics("cluster_nodes_broadcast", timestamp());
|
||||||
let num_live_peers = cluster_nodes.num_peers_live(now);
|
|
||||||
let broadcast_len = cluster_nodes.num_peers() + 1;
|
|
||||||
datapoint_info!(
|
|
||||||
"cluster_info-num_nodes",
|
|
||||||
("live_count", num_live_peers, i64),
|
|
||||||
("broadcast_count", broadcast_len, i64)
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -87,25 +87,34 @@ impl Node {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T> ClusterNodes<T> {
|
impl<T> ClusterNodes<T> {
|
||||||
pub(crate) fn num_peers(&self) -> usize {
|
pub(crate) fn submit_metrics(&self, name: &'static str, now: u64) {
|
||||||
self.nodes.len().saturating_sub(1)
|
let mut num_nodes_dead = 0;
|
||||||
}
|
let mut num_nodes_staked = 0;
|
||||||
|
let mut num_nodes_stale = 0;
|
||||||
// A peer is considered live if they generated their contact info recently.
|
for node in &self.nodes {
|
||||||
pub(crate) fn num_peers_live(&self, now: u64) -> usize {
|
if node.stake != 0u64 {
|
||||||
self.nodes
|
num_nodes_staked += 1;
|
||||||
.iter()
|
}
|
||||||
.filter(|node| node.pubkey() != self.pubkey)
|
match node.contact_info() {
|
||||||
.filter_map(|node| node.contact_info())
|
None => {
|
||||||
.filter(|node| {
|
num_nodes_dead += 1;
|
||||||
let elapsed = if node.wallclock < now {
|
}
|
||||||
now - node.wallclock
|
Some(node) => {
|
||||||
} else {
|
let age = now.saturating_sub(node.wallclock);
|
||||||
node.wallclock - now
|
if age > CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS {
|
||||||
};
|
num_nodes_stale += 1;
|
||||||
elapsed < CRDS_GOSSIP_PULL_CRDS_TIMEOUT_MS
|
}
|
||||||
})
|
}
|
||||||
.count()
|
}
|
||||||
|
}
|
||||||
|
num_nodes_stale += num_nodes_dead;
|
||||||
|
datapoint_info!(
|
||||||
|
name,
|
||||||
|
("num_nodes", self.nodes.len(), i64),
|
||||||
|
("num_nodes_dead", num_nodes_dead, i64),
|
||||||
|
("num_nodes_staked", num_nodes_staked, i64),
|
||||||
|
("num_nodes_stale", num_nodes_stale, i64),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -89,10 +89,9 @@ impl RetransmitStats {
|
||||||
if self.since.elapsed() < SUBMIT_CADENCE {
|
if self.since.elapsed() < SUBMIT_CADENCE {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
let num_peers = cluster_nodes_cache
|
cluster_nodes_cache
|
||||||
.get(root_bank.slot(), root_bank, working_bank, cluster_info)
|
.get(root_bank.slot(), root_bank, working_bank, cluster_info)
|
||||||
.num_peers();
|
.submit_metrics("cluster_nodes_retransmit", timestamp());
|
||||||
datapoint_info!("retransmit-num_nodes", ("count", num_peers, i64));
|
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"retransmit-stage",
|
"retransmit-stage",
|
||||||
("total_time", self.total_time, i64),
|
("total_time", self.total_time, i64),
|
||||||
|
|
Loading…
Reference in New Issue