Add execute timings (#23097)

This commit is contained in:
carllin 2022-02-17 01:14:32 -05:00 committed by GitHub
parent fa680a35ea
commit 619335df1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 1015 additions and 326 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,5 @@
use {
crate::leader_slot_banking_stage_timing_metrics::*,
solana_poh::poh_recorder::BankStart,
solana_sdk::{clock::Slot, saturating_add_assign},
std::time::Instant,
@ -38,41 +39,12 @@ pub(crate) struct ProcessTransactionsSummary {
// The number of transactions filtered out by the cost model
pub cost_model_throttled_transactions_count: usize,
}
// Metrics capturing wallclock time spent in various parts of BankingStage during this
// validator's leader slot
#[derive(Debug)]
struct LeaderSlotTimingMetrics {
bank_detected_time: Instant,
// Total amount of time spent running the cost model
pub cost_model_us: u64,
// Delay from when the bank was created to when this thread detected it
bank_detected_delay_us: u64,
}
impl LeaderSlotTimingMetrics {
fn new(bank_creation_time: &Instant) -> Self {
Self {
bank_detected_time: Instant::now(),
bank_detected_delay_us: bank_creation_time.elapsed().as_micros() as u64,
}
}
fn report(&self, id: u32, slot: Slot) {
let bank_detected_to_now = self.bank_detected_time.elapsed().as_micros() as u64;
datapoint_info!(
"banking_stage-leader_slot_loop_timings",
("id", id as i64, i64),
("slot", slot as i64, i64),
("bank_detected_to_now_us", bank_detected_to_now, i64),
(
"bank_creation_to_now_us",
bank_detected_to_now + self.bank_detected_delay_us,
i64
),
("bank_detected_delay_us", self.bank_detected_delay_us, i64),
);
}
// Breakdown of time spent executing and comitting transactions
pub execute_and_commit_timings: LeaderExecuteAndCommitTimings,
}
// Metrics describing packets ingested/processed in various parts of BankingStage during this
@ -362,6 +334,8 @@ impl LeaderSlotMetricsTracker {
failed_commit_count,
ref retryable_transaction_indexes,
cost_model_throttled_transactions_count,
cost_model_us,
ref execute_and_commit_timings,
..
} = process_transactions_summary;
@ -415,9 +389,23 @@ impl LeaderSlotMetricsTracker {
.cost_model_throttled_transactions_count,
*cost_model_throttled_transactions_count as u64
);
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.process_packets_timings
.cost_model_us,
*cost_model_us as u64
);
leader_slot_metrics
.timing_metrics
.execute_and_commit_timings
.accumulate(execute_and_commit_timings);
}
}
// Packet inflow/outflow/processing metrics
pub(crate) fn increment_total_new_valid_packets(&mut self, count: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
@ -527,6 +515,166 @@ impl LeaderSlotMetricsTracker {
);
}
}
// Outermost banking thread's loop timing metrics
pub(crate) fn increment_process_buffered_packets_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.outer_loop_timings
.process_buffered_packets_us,
us
);
}
}
pub(crate) fn increment_slot_metrics_check_slot_boundary_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.outer_loop_timings
.slot_metrics_check_slot_boundary_us,
us
);
}
}
pub(crate) fn increment_receive_and_buffer_packets_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.outer_loop_timings
.receive_and_buffer_packets_us,
us
);
}
}
// Processing buffer timing metrics
pub(crate) fn increment_make_decision_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.process_buffered_packets_timings
.make_decision_us,
us
);
}
}
pub(crate) fn increment_consume_buffered_packets_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.process_buffered_packets_timings
.consume_buffered_packets_us,
us
);
}
}
pub(crate) fn increment_forward_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.process_buffered_packets_timings
.forward_us,
us
);
}
}
pub(crate) fn increment_forward_and_hold_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.process_buffered_packets_timings
.forward_and_hold_us,
us
);
}
}
// Consuming buffered packets timing metrics
pub(crate) fn increment_end_of_slot_filtering_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.consume_buffered_packets_timings
.end_of_slot_filtering_us,
us
);
}
}
pub(crate) fn increment_consume_buffered_packets_poh_recorder_lock_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.consume_buffered_packets_timings
.poh_recorder_lock_us,
us
);
}
}
pub(crate) fn increment_process_packets_transactions_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.consume_buffered_packets_timings
.process_packets_transactions_us,
us
);
}
}
// Processing packets timing metrics
pub(crate) fn increment_transactions_from_packets_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.process_packets_timings
.transactions_from_packets_us,
us
);
}
}
pub(crate) fn increment_process_transactions_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.process_packets_timings
.process_transactions_us,
us
);
}
}
pub(crate) fn increment_filter_retryable_packets_us(&mut self, us: u64) {
if let Some(leader_slot_metrics) = &mut self.leader_slot_metrics {
saturating_add_assign!(
leader_slot_metrics
.timing_metrics
.process_packets_timings
.filter_retryable_packets_us,
us
);
}
}
}
#[cfg(test)]

View File

@ -0,0 +1,286 @@
use {
solana_program_runtime::timings::ExecuteTimings,
solana_sdk::{clock::Slot, saturating_add_assign},
std::time::Instant,
};
#[derive(Default, Debug)]
pub struct LeaderExecuteAndCommitTimings {
pub collect_balances_us: u64,
pub load_execute_us: u64,
pub freeze_lock_us: u64,
pub record_us: u64,
pub commit_us: u64,
pub find_and_send_votes_us: u64,
pub record_transactions_timings: RecordTransactionsTimings,
pub execute_timings: ExecuteTimings,
}
impl LeaderExecuteAndCommitTimings {
pub fn accumulate(&mut self, other: &LeaderExecuteAndCommitTimings) {
saturating_add_assign!(self.collect_balances_us, other.collect_balances_us);
saturating_add_assign!(self.load_execute_us, other.load_execute_us);
saturating_add_assign!(self.freeze_lock_us, other.freeze_lock_us);
saturating_add_assign!(self.record_us, other.record_us);
saturating_add_assign!(self.commit_us, other.commit_us);
saturating_add_assign!(self.find_and_send_votes_us, other.find_and_send_votes_us);
saturating_add_assign!(self.commit_us, other.commit_us);
self.record_transactions_timings
.accumulate(&other.record_transactions_timings);
self.execute_timings.accumulate(&other.execute_timings);
}
pub fn report(&self, id: u32, slot: Slot) {
datapoint_info!(
"banking_stage-leader_slot_execute_and_commit_timings",
("id", id as i64, i64),
("slot", slot as i64, i64),
("collect_balances_us", self.collect_balances_us as i64, i64),
("load_execute_us", self.load_execute_us as i64, i64),
("freeze_lock_us", self.freeze_lock_us as i64, i64),
("record_us", self.record_us as i64, i64),
("commit_us", self.commit_us as i64, i64),
(
"find_and_send_votes_us",
self.find_and_send_votes_us as i64,
i64
),
);
datapoint_info!(
"banking_stage-leader_slot_record_timings",
("id", id as i64, i64),
("slot", slot as i64, i64),
(
"execution_results_to_transactions_us",
self.record_transactions_timings
.execution_results_to_transactions_us as i64,
i64
),
(
"hash_us",
self.record_transactions_timings.hash_us as i64,
i64
),
(
"poh_record_us",
self.record_transactions_timings.poh_record_us as i64,
i64
),
);
}
}
#[derive(Default, Debug)]
pub struct RecordTransactionsTimings {
pub execution_results_to_transactions_us: u64,
pub hash_us: u64,
pub poh_record_us: u64,
}
impl RecordTransactionsTimings {
pub fn accumulate(&mut self, other: &RecordTransactionsTimings) {
saturating_add_assign!(
self.execution_results_to_transactions_us,
other.execution_results_to_transactions_us
);
saturating_add_assign!(self.hash_us, other.hash_us);
saturating_add_assign!(self.poh_record_us, other.poh_record_us);
}
}
// Metrics capturing wallclock time spent in various parts of BankingStage during this
// validator's leader slot
#[derive(Debug)]
pub(crate) struct LeaderSlotTimingMetrics {
pub outer_loop_timings: OuterLoopTimings,
pub process_buffered_packets_timings: ProcessBufferedPacketsTimings,
pub consume_buffered_packets_timings: ConsumeBufferedPacketsTimings,
pub process_packets_timings: ProcessPacketsTimings,
pub execute_and_commit_timings: LeaderExecuteAndCommitTimings,
}
impl LeaderSlotTimingMetrics {
pub(crate) fn new(bank_creation_time: &Instant) -> Self {
Self {
outer_loop_timings: OuterLoopTimings::new(bank_creation_time),
process_buffered_packets_timings: ProcessBufferedPacketsTimings::default(),
consume_buffered_packets_timings: ConsumeBufferedPacketsTimings::default(),
process_packets_timings: ProcessPacketsTimings::default(),
execute_and_commit_timings: LeaderExecuteAndCommitTimings::default(),
}
}
pub(crate) fn report(&self, id: u32, slot: Slot) {
self.outer_loop_timings.report(id, slot);
self.process_buffered_packets_timings.report(id, slot);
self.consume_buffered_packets_timings.report(id, slot);
self.process_packets_timings.report(id, slot);
self.execute_and_commit_timings.report(id, slot);
}
}
#[derive(Debug)]
pub(crate) struct OuterLoopTimings {
pub bank_detected_time: Instant,
// Delay from when the bank was created to when this thread detected it
pub bank_detected_delay_us: u64,
// Time spent processing buffered packets
pub process_buffered_packets_us: u64,
// Time spent checking for slot boundary and reporting leader slot metrics
pub slot_metrics_check_slot_boundary_us: u64,
// Time spent processing new incoming packets to the banking thread
pub receive_and_buffer_packets_us: u64,
}
impl OuterLoopTimings {
fn new(bank_creation_time: &Instant) -> Self {
Self {
bank_detected_time: Instant::now(),
bank_detected_delay_us: bank_creation_time.elapsed().as_micros() as u64,
process_buffered_packets_us: 0,
slot_metrics_check_slot_boundary_us: 0,
receive_and_buffer_packets_us: 0,
}
}
fn report(&self, id: u32, slot: Slot) {
let bank_detected_to_now_us = self.bank_detected_time.elapsed().as_micros() as u64;
datapoint_info!(
"banking_stage-leader_slot_loop_timings",
("id", id as i64, i64),
("slot", slot as i64, i64),
(
"bank_detected_to_slot_end_detected_us",
bank_detected_to_now_us,
i64
),
(
"bank_creation_to_slot_end_detected_us",
bank_detected_to_now_us + self.bank_detected_delay_us,
i64
),
("bank_detected_delay_us", self.bank_detected_delay_us, i64),
(
"process_buffered_packets_us",
self.process_buffered_packets_us,
i64
),
(
"slot_metrics_check_slot_boundary_us",
self.slot_metrics_check_slot_boundary_us,
i64
),
(
"receive_and_buffer_packets_us",
self.receive_and_buffer_packets_us,
i64
),
);
}
}
#[derive(Debug, Default)]
pub(crate) struct ProcessBufferedPacketsTimings {
pub make_decision_us: u64,
pub consume_buffered_packets_us: u64,
pub forward_us: u64,
pub forward_and_hold_us: u64,
}
impl ProcessBufferedPacketsTimings {
fn report(&self, id: u32, slot: Slot) {
datapoint_info!(
"banking_stage-leader_slot_process_buffered_packets_timings",
("id", id as i64, i64),
("slot", slot as i64, i64),
("make_decision_us", self.make_decision_us as i64, i64),
(
"consume_buffered_packets_us",
self.consume_buffered_packets_us as i64,
i64
),
("forward_us", self.forward_us as i64, i64),
("forward_and_hold_us", self.forward_and_hold_us as i64, i64),
);
}
}
#[derive(Debug, Default)]
pub(crate) struct ConsumeBufferedPacketsTimings {
// Time spent grabbing poh recorder lock
pub poh_recorder_lock_us: u64,
// Time spent filtering invalid packets after leader slot has ended
pub end_of_slot_filtering_us: u64,
// Time spent processing transactions
pub process_packets_transactions_us: u64,
}
impl ConsumeBufferedPacketsTimings {
fn report(&self, id: u32, slot: Slot) {
datapoint_info!(
"banking_stage-leader_slot_consume_buffered_packets_timings",
("id", id as i64, i64),
("slot", slot as i64, i64),
(
"poh_recorder_lock_us",
self.poh_recorder_lock_us as i64,
i64
),
(
"end_of_slot_filtering_us",
self.end_of_slot_filtering_us as i64,
i64
),
(
"process_packets_transactions_us",
self.process_packets_transactions_us as i64,
i64
),
);
}
}
#[derive(Debug, Default)]
pub(crate) struct ProcessPacketsTimings {
// Time spent converting packets to transactions
pub transactions_from_packets_us: u64,
// Time spent processing transactions
pub process_transactions_us: u64,
// Time spent filtering retryable packets that were returned after transaction
// processing
pub filter_retryable_packets_us: u64,
// Time spent running the cost model in processing transactions before executing
// transactions
pub cost_model_us: u64,
}
impl ProcessPacketsTimings {
fn report(&self, id: u32, slot: Slot) {
datapoint_info!(
"banking_stage-leader_slot_process_packets_timings",
("id", id as i64, i64),
("slot", slot as i64, i64),
(
"transactions_from_packets_us",
self.transactions_from_packets_us,
i64
),
("process_transactions_us", self.process_transactions_us, i64),
(
"filter_retryable_packets_us",
self.filter_retryable_packets_us,
i64
),
("cost_model_us", self.cost_model_us, i64),
);
}
}

View File

@ -29,6 +29,7 @@ pub mod gen_keys;
pub mod heaviest_subtree_fork_choice;
pub mod latest_validator_votes_for_frozen_banks;
pub mod leader_slot_banking_stage_metrics;
pub mod leader_slot_banking_stage_timing_metrics;
pub mod ledger_cleanup_service;
pub mod optimistic_confirmation_verifier;
pub mod outstanding_requests;

View File

@ -129,6 +129,7 @@ pub struct ExecuteDetailsTimings {
pub create_executor_jit_compile_us: u64,
pub per_program_timings: HashMap<Pubkey, ProgramTiming>,
}
impl ExecuteDetailsTimings {
pub fn accumulate(&mut self, other: &ExecuteDetailsTimings) {
saturating_add_assign!(self.serialize_us, other.serialize_us);