Add Gossip Loop metrics (#26195)
* add three gossip metrics measuring gossip loop times * add 5 metrics * rm space * rm space * Update SECURITY.md - fix nav link - add bounty split policy for duplicate reports * Add transaction index in slot to geyser plugin TransactionInfo (#25688) * Define shuffle to prep using same shuffle for multiple slices * Determine transaction indexes and plumb to execute_batch * Pair transaction_index with transaction in TransactionStatusService * Add new ReplicaTransactionInfoVersion * Plumb transaction_indexes through BankingStage * Prepare BankingStage to receive transaction indexes from PohRecorder * Determine transaction indexes in PohRecorder; add field to WorkingBank * Add PohRecorder::record unit test * Only pass starting_transaction_index around PohRecorder * Add helper structs to simplify test DashMap * Pass entry and starting-index into process_entries_with_callback together * Add tx-index checks to test_rebatch_transactions * Revert shuffle definition and use zip/unzip * Only zip/unzip if randomize * Add confirm_slot_entries test * Review nits * Add type alias to make sender docs more clear * Update SECURITY.md finish filling out the table.... * rpc: fix possible deadlock in rpc (#26051) * Add StatusCache::root_slot_deltas() and use it (#26170) * Remove InMemAccountsIndex::map() and use map_internal directly (#26189) * [quic]Decrement total_streams correctly (#26158) * remove comment * alphabetical metrics. no abbreviations * remove trailing white space * cargo fmt to update code format/readability Co-authored-by: Trent Nelson <trent@solana.com> Co-authored-by: Tyera Eulberg <tyera@solana.com> Co-authored-by: Boqin Qin(秦 伯钦) <Bobbqqin@gmail.com> Co-authored-by: Brooks Prumo <brooks@solana.com> Co-authored-by: Miles Obare <bdhobare@gmail.com>
This commit is contained in:
parent
c01a8f271e
commit
032bee13ab
|
@ -1556,6 +1556,7 @@ impl ClusterInfo {
|
||||||
sender: &PacketBatchSender,
|
sender: &PacketBatchSender,
|
||||||
generate_pull_requests: bool,
|
generate_pull_requests: bool,
|
||||||
) -> Result<(), GossipError> {
|
) -> Result<(), GossipError> {
|
||||||
|
let _st = ScopedTimer::from(&self.stats.gossip_transmit_loop_time);
|
||||||
let reqs = self.generate_new_gossip_requests(
|
let reqs = self.generate_new_gossip_requests(
|
||||||
thread_pool,
|
thread_pool,
|
||||||
gossip_validators,
|
gossip_validators,
|
||||||
|
@ -1573,6 +1574,9 @@ impl ClusterInfo {
|
||||||
.add_relaxed(packet_batch.len() as u64);
|
.add_relaxed(packet_batch.len() as u64);
|
||||||
sender.send(packet_batch)?;
|
sender.send(packet_batch)?;
|
||||||
}
|
}
|
||||||
|
self.stats
|
||||||
|
.gossip_transmit_loop_iterations_since_last_report
|
||||||
|
.add_relaxed(1);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2435,6 +2439,9 @@ impl ClusterInfo {
|
||||||
stakes,
|
stakes,
|
||||||
response_sender,
|
response_sender,
|
||||||
);
|
);
|
||||||
|
self.stats
|
||||||
|
.process_gossip_packets_iterations_since_last_report
|
||||||
|
.add_relaxed(1);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2490,6 +2497,7 @@ impl ClusterInfo {
|
||||||
last_print: &mut Instant,
|
last_print: &mut Instant,
|
||||||
should_check_duplicate_instance: bool,
|
should_check_duplicate_instance: bool,
|
||||||
) -> Result<(), GossipError> {
|
) -> Result<(), GossipError> {
|
||||||
|
let _st = ScopedTimer::from(&self.stats.gossip_listen_loop_time);
|
||||||
const RECV_TIMEOUT: Duration = Duration::from_secs(1);
|
const RECV_TIMEOUT: Duration = Duration::from_secs(1);
|
||||||
const SUBMIT_GOSSIP_STATS_INTERVAL: Duration = Duration::from_secs(2);
|
const SUBMIT_GOSSIP_STATS_INTERVAL: Duration = Duration::from_secs(2);
|
||||||
let mut packets = VecDeque::from(receiver.recv_timeout(RECV_TIMEOUT)?);
|
let mut packets = VecDeque::from(receiver.recv_timeout(RECV_TIMEOUT)?);
|
||||||
|
@ -2528,6 +2536,9 @@ impl ClusterInfo {
|
||||||
submit_gossip_stats(&self.stats, &self.gossip, &stakes);
|
submit_gossip_stats(&self.stats, &self.gossip, &stakes);
|
||||||
*last_print = Instant::now();
|
*last_print = Instant::now();
|
||||||
}
|
}
|
||||||
|
self.stats
|
||||||
|
.gossip_listen_loop_iterations_since_last_report
|
||||||
|
.add_relaxed(1);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -103,6 +103,8 @@ pub struct GossipStats {
|
||||||
pub(crate) get_epoch_duration_no_working_bank: Counter,
|
pub(crate) get_epoch_duration_no_working_bank: Counter,
|
||||||
pub(crate) get_votes: Counter,
|
pub(crate) get_votes: Counter,
|
||||||
pub(crate) get_votes_count: Counter,
|
pub(crate) get_votes_count: Counter,
|
||||||
|
pub(crate) gossip_listen_loop_iterations_since_last_report: Counter,
|
||||||
|
pub(crate) gossip_listen_loop_time: Counter,
|
||||||
pub(crate) gossip_packets_dropped_count: Counter,
|
pub(crate) gossip_packets_dropped_count: Counter,
|
||||||
pub(crate) gossip_ping_msg_verify_fail: Counter,
|
pub(crate) gossip_ping_msg_verify_fail: Counter,
|
||||||
pub(crate) gossip_pong_msg_verify_fail: Counter,
|
pub(crate) gossip_pong_msg_verify_fail: Counter,
|
||||||
|
@ -113,6 +115,8 @@ pub struct GossipStats {
|
||||||
pub(crate) gossip_pull_request_verify_fail: Counter,
|
pub(crate) gossip_pull_request_verify_fail: Counter,
|
||||||
pub(crate) gossip_pull_response_verify_fail: Counter,
|
pub(crate) gossip_pull_response_verify_fail: Counter,
|
||||||
pub(crate) gossip_push_msg_verify_fail: Counter,
|
pub(crate) gossip_push_msg_verify_fail: Counter,
|
||||||
|
pub(crate) gossip_transmit_loop_iterations_since_last_report: Counter,
|
||||||
|
pub(crate) gossip_transmit_loop_time: Counter,
|
||||||
pub(crate) handle_batch_ping_messages_time: Counter,
|
pub(crate) handle_batch_ping_messages_time: Counter,
|
||||||
pub(crate) handle_batch_pong_messages_time: Counter,
|
pub(crate) handle_batch_pong_messages_time: Counter,
|
||||||
pub(crate) handle_batch_prune_messages_time: Counter,
|
pub(crate) handle_batch_prune_messages_time: Counter,
|
||||||
|
@ -137,6 +141,7 @@ pub struct GossipStats {
|
||||||
pub(crate) packets_sent_pull_requests_count: Counter,
|
pub(crate) packets_sent_pull_requests_count: Counter,
|
||||||
pub(crate) packets_sent_pull_responses_count: Counter,
|
pub(crate) packets_sent_pull_responses_count: Counter,
|
||||||
pub(crate) packets_sent_push_messages_count: Counter,
|
pub(crate) packets_sent_push_messages_count: Counter,
|
||||||
|
pub(crate) process_gossip_packets_iterations_since_last_report: Counter,
|
||||||
pub(crate) process_gossip_packets_time: Counter,
|
pub(crate) process_gossip_packets_time: Counter,
|
||||||
pub(crate) process_prune: Counter,
|
pub(crate) process_prune: Counter,
|
||||||
pub(crate) process_pull_requests: Counter,
|
pub(crate) process_pull_requests: Counter,
|
||||||
|
@ -385,6 +390,37 @@ pub(crate) fn submit_gossip_stats(
|
||||||
stats.gossip_pull_request_dropped_requests.clear(),
|
stats.gossip_pull_request_dropped_requests.clear(),
|
||||||
i64
|
i64
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"gossip_transmit_loop_time",
|
||||||
|
stats.gossip_transmit_loop_time.clear(),
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"gossip_transmit_loop_iterations_since_last_report",
|
||||||
|
stats
|
||||||
|
.gossip_transmit_loop_iterations_since_last_report
|
||||||
|
.clear(),
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"gossip_listen_loop_time",
|
||||||
|
stats.gossip_listen_loop_time.clear(),
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"gossip_listen_loop_iterations_since_last_report",
|
||||||
|
stats
|
||||||
|
.gossip_listen_loop_iterations_since_last_report
|
||||||
|
.clear(),
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"process_gossip_packets_iterations_since_last_report",
|
||||||
|
stats
|
||||||
|
.process_gossip_packets_iterations_since_last_report
|
||||||
|
.clear(),
|
||||||
|
i64
|
||||||
|
),
|
||||||
);
|
);
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"cluster_info_stats4",
|
"cluster_info_stats4",
|
||||||
|
|
Loading…
Reference in New Issue