Add Gossip Loop metrics (#26195)

* add three gossip metrics measuring gossip loop times

* add 5 metrics

* rm space

* rm space

* Update SECURITY.md

- fix nav link
- add bounty split policy for duplicate reports

* Add transaction index in slot to geyser plugin TransactionInfo (#25688)

* Define shuffle to prep using same shuffle for multiple slices

* Determine transaction indexes and plumb to execute_batch

* Pair transaction_index with transaction in TransactionStatusService

* Add new ReplicaTransactionInfoVersion

* Plumb transaction_indexes through BankingStage

* Prepare BankingStage to receive transaction indexes from PohRecorder

* Determine transaction indexes in PohRecorder; add field to WorkingBank

* Add PohRecorder::record unit test

* Only pass starting_transaction_index around PohRecorder

* Add helper structs to simplify test DashMap

* Pass entry and starting-index into process_entries_with_callback together

* Add tx-index checks to test_rebatch_transactions

* Revert shuffle definition and use zip/unzip

* Only zip/unzip if randomize

* Add confirm_slot_entries test

* Review nits

* Add type alias to make sender docs more clear

* Update SECURITY.md

finish filling out the table....

* rpc: fix possible deadlock in rpc (#26051)

* Add StatusCache::root_slot_deltas() and use it (#26170)

* Remove InMemAccountsIndex::map() and use map_internal directly (#26189)

* [quic]Decrement total_streams correctly (#26158)

* remove comment

* alphabetical metrics. no abbreviations

* remove trailing white space

* cargo fmt to update code format/readability

Co-authored-by: Trent Nelson <trent@solana.com>
Co-authored-by: Tyera Eulberg <tyera@solana.com>
Co-authored-by: Boqin Qin(秦 伯钦) <Bobbqqin@gmail.com>
Co-authored-by: Brooks Prumo <brooks@solana.com>
Co-authored-by: Miles Obare <bdhobare@gmail.com>
This commit is contained in:
Greg Cusack 2022-06-29 11:55:41 -06:00 committed by GitHub
parent c01a8f271e
commit 032bee13ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 47 additions and 0 deletions

View File

@ -1556,6 +1556,7 @@ impl ClusterInfo {
sender: &PacketBatchSender,
generate_pull_requests: bool,
) -> Result<(), GossipError> {
let _st = ScopedTimer::from(&self.stats.gossip_transmit_loop_time);
let reqs = self.generate_new_gossip_requests(
thread_pool,
gossip_validators,
@ -1573,6 +1574,9 @@ impl ClusterInfo {
.add_relaxed(packet_batch.len() as u64);
sender.send(packet_batch)?;
}
self.stats
.gossip_transmit_loop_iterations_since_last_report
.add_relaxed(1);
Ok(())
}
@ -2435,6 +2439,9 @@ impl ClusterInfo {
stakes,
response_sender,
);
self.stats
.process_gossip_packets_iterations_since_last_report
.add_relaxed(1);
Ok(())
}
@ -2490,6 +2497,7 @@ impl ClusterInfo {
last_print: &mut Instant,
should_check_duplicate_instance: bool,
) -> Result<(), GossipError> {
let _st = ScopedTimer::from(&self.stats.gossip_listen_loop_time);
const RECV_TIMEOUT: Duration = Duration::from_secs(1);
const SUBMIT_GOSSIP_STATS_INTERVAL: Duration = Duration::from_secs(2);
let mut packets = VecDeque::from(receiver.recv_timeout(RECV_TIMEOUT)?);
@ -2528,6 +2536,9 @@ impl ClusterInfo {
submit_gossip_stats(&self.stats, &self.gossip, &stakes);
*last_print = Instant::now();
}
self.stats
.gossip_listen_loop_iterations_since_last_report
.add_relaxed(1);
Ok(())
}

View File

@ -103,6 +103,8 @@ pub struct GossipStats {
pub(crate) get_epoch_duration_no_working_bank: Counter,
pub(crate) get_votes: Counter,
pub(crate) get_votes_count: Counter,
pub(crate) gossip_listen_loop_iterations_since_last_report: Counter,
pub(crate) gossip_listen_loop_time: Counter,
pub(crate) gossip_packets_dropped_count: Counter,
pub(crate) gossip_ping_msg_verify_fail: Counter,
pub(crate) gossip_pong_msg_verify_fail: Counter,
@ -113,6 +115,8 @@ pub struct GossipStats {
pub(crate) gossip_pull_request_verify_fail: Counter,
pub(crate) gossip_pull_response_verify_fail: Counter,
pub(crate) gossip_push_msg_verify_fail: Counter,
pub(crate) gossip_transmit_loop_iterations_since_last_report: Counter,
pub(crate) gossip_transmit_loop_time: Counter,
pub(crate) handle_batch_ping_messages_time: Counter,
pub(crate) handle_batch_pong_messages_time: Counter,
pub(crate) handle_batch_prune_messages_time: Counter,
@ -137,6 +141,7 @@ pub struct GossipStats {
pub(crate) packets_sent_pull_requests_count: Counter,
pub(crate) packets_sent_pull_responses_count: Counter,
pub(crate) packets_sent_push_messages_count: Counter,
pub(crate) process_gossip_packets_iterations_since_last_report: Counter,
pub(crate) process_gossip_packets_time: Counter,
pub(crate) process_prune: Counter,
pub(crate) process_pull_requests: Counter,
@ -385,6 +390,37 @@ pub(crate) fn submit_gossip_stats(
stats.gossip_pull_request_dropped_requests.clear(),
i64
),
(
"gossip_transmit_loop_time",
stats.gossip_transmit_loop_time.clear(),
i64
),
(
"gossip_transmit_loop_iterations_since_last_report",
stats
.gossip_transmit_loop_iterations_since_last_report
.clear(),
i64
),
(
"gossip_listen_loop_time",
stats.gossip_listen_loop_time.clear(),
i64
),
(
"gossip_listen_loop_iterations_since_last_report",
stats
.gossip_listen_loop_iterations_since_last_report
.clear(),
i64
),
(
"process_gossip_packets_iterations_since_last_report",
stats
.process_gossip_packets_iterations_since_last_report
.clear(),
i64
),
);
datapoint_info!(
"cluster_info_stats4",