diff --git a/core/src/system_monitor_service.rs b/core/src/system_monitor_service.rs index 5586196846..c545718b2c 100644 --- a/core/src/system_monitor_service.rs +++ b/core/src/system_monitor_service.rs @@ -12,6 +12,7 @@ use { thread::{self, sleep, Builder, JoinHandle}, time::Duration, }, + sys_info::{Error, LoadAvg}, }; const MS_PER_S: u64 = 1_000; @@ -20,6 +21,7 @@ const MS_PER_H: u64 = MS_PER_M * 60; const SAMPLE_INTERVAL_UDP_MS: u64 = 2 * MS_PER_S; const SAMPLE_INTERVAL_OS_NETWORK_LIMITS_MS: u64 = MS_PER_H; const SAMPLE_INTERVAL_MEM_MS: u64 = MS_PER_S; +const SAMPLE_INTERVAL_CPU_MS: u64 = MS_PER_S; const SLEEP_INTERVAL: Duration = Duration::from_millis(500); #[cfg(target_os = "linux")] @@ -41,6 +43,13 @@ struct UdpStats { ignored_multi: usize, } +struct CpuInfo { + cpu_num: u32, + cpu_freq_mhz: u64, + load_avg: LoadAvg, + num_threads: u64, +} + impl UdpStats { fn from_map(udp_stats: &HashMap) -> Self { Self { @@ -121,12 +130,18 @@ impl SystemMonitorService { exit: Arc, report_os_memory_stats: bool, report_os_network_stats: bool, + report_os_cpu_stats: bool, ) -> Self { info!("Starting SystemMonitorService"); let thread_hdl = Builder::new() .name("system-monitor".to_string()) .spawn(move || { - Self::run(exit, report_os_memory_stats, report_os_network_stats); + Self::run( + exit, + report_os_memory_stats, + report_os_network_stats, + report_os_cpu_stats, + ); }) .unwrap(); @@ -335,11 +350,45 @@ impl SystemMonitorService { } } - pub fn run(exit: Arc, report_os_memory_stats: bool, report_os_network_stats: bool) { + fn cpu_info() -> Result { + let cpu_num = sys_info::cpu_num()?; + let cpu_freq_mhz = sys_info::cpu_speed()?; + let load_avg = sys_info::loadavg()?; + let num_threads = sys_info::proc_total()?; + + Ok(CpuInfo { + cpu_num, + cpu_freq_mhz, + load_avg, + num_threads, + }) + } + + fn report_cpu_stats() { + if let Ok(info) = Self::cpu_info() { + datapoint_info!( + "cpu-stats", + ("cpu_num", info.cpu_num as i64, i64), + ("cpu0_freq_mhz", info.cpu_freq_mhz as i64, i64), + ("average_load_one_minute", info.load_avg.one, f64), + ("average_load_five_minutes", info.load_avg.five, f64), + ("average_load_fifteen_minutes", info.load_avg.fifteen, f64), + ("total_num_threads", info.num_threads as i64, i64), + ) + } + } + + pub fn run( + exit: Arc, + report_os_memory_stats: bool, + report_os_network_stats: bool, + report_os_cpu_stats: bool, + ) { let mut udp_stats = None; let network_limits_timer = AtomicInterval::default(); let udp_timer = AtomicInterval::default(); let mem_timer = AtomicInterval::default(); + let cpu_timer = AtomicInterval::default(); loop { if exit.load(Ordering::Relaxed) { @@ -356,6 +405,9 @@ impl SystemMonitorService { if report_os_memory_stats && mem_timer.should_update(SAMPLE_INTERVAL_MEM_MS) { Self::report_mem_stats(); } + if report_os_cpu_stats && cpu_timer.should_update(SAMPLE_INTERVAL_CPU_MS) { + Self::report_cpu_stats(); + } sleep(SLEEP_INTERVAL); } } diff --git a/core/src/validator.rs b/core/src/validator.rs index fe08d11ab8..01fa201cf7 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -159,6 +159,7 @@ pub struct ValidatorConfig { pub no_poh_speed_test: bool, pub no_os_memory_stats_reporting: bool, pub no_os_network_stats_reporting: bool, + pub no_os_cpu_stats_reporting: bool, pub poh_pinned_cpu_core: usize, pub poh_hashes_per_batch: u64, pub account_indexes: AccountSecondaryIndexes, @@ -219,6 +220,7 @@ impl Default for ValidatorConfig { no_poh_speed_test: true, no_os_memory_stats_reporting: true, no_os_network_stats_reporting: true, + no_os_cpu_stats_reporting: true, poh_pinned_cpu_core: poh_service::DEFAULT_PINNED_CPU_CORE, poh_hashes_per_batch: poh_service::DEFAULT_HASHES_PER_BATCH, account_indexes: AccountSecondaryIndexes::default(), @@ -497,6 +499,7 @@ impl Validator { Arc::clone(&exit), !config.no_os_memory_stats_reporting, !config.no_os_network_stats_reporting, + !config.no_os_cpu_stats_reporting, )); let (poh_timing_point_sender, poh_timing_point_receiver) = unbounded(); diff --git a/ledger-tool/src/main.rs b/ledger-tool/src/main.rs index 722a6412af..ace00ef1d0 100644 --- a/ledger-tool/src/main.rs +++ b/ledger-tool/src/main.rs @@ -2182,6 +2182,7 @@ fn main() { Arc::clone(&exit_signal), !no_os_memory_stats_reporting, false, + false, ); accounts_index_config.index_limit_mb = if let Some(limit) = diff --git a/local-cluster/src/validator_configs.rs b/local-cluster/src/validator_configs.rs index 4cad12dfc8..76f56251b4 100644 --- a/local-cluster/src/validator_configs.rs +++ b/local-cluster/src/validator_configs.rs @@ -47,6 +47,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig { no_poh_speed_test: config.no_poh_speed_test, no_os_memory_stats_reporting: config.no_os_memory_stats_reporting, no_os_network_stats_reporting: config.no_os_network_stats_reporting, + no_os_cpu_stats_reporting: config.no_os_cpu_stats_reporting, poh_pinned_cpu_core: config.poh_pinned_cpu_core, account_indexes: config.account_indexes.clone(), accounts_db_caching_enabled: config.accounts_db_caching_enabled, diff --git a/validator/src/main.rs b/validator/src/main.rs index 4f0cd24b40..5cb11628f3 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -965,6 +965,11 @@ pub fn main() { .long("no-os-network-stats-reporting") .help("Disable reporting of OS network statistics.") ) + .arg( + Arg::with_name("no_os_cpu_stats_reporting") + .long("no-os-cpu-stats-reporting") + .help("Disable reporting of OS CPU statistics.") + ) .arg( Arg::with_name("accounts-hash-interval-slots") .long("accounts-hash-interval-slots") @@ -2529,6 +2534,7 @@ pub fn main() { no_poh_speed_test: matches.is_present("no_poh_speed_test"), no_os_memory_stats_reporting: matches.is_present("no_os_memory_stats_reporting"), no_os_network_stats_reporting: matches.is_present("no_os_network_stats_reporting"), + no_os_cpu_stats_reporting: matches.is_present("no_os_cpu_stats_reporting"), poh_pinned_cpu_core: value_of(&matches, "poh_pinned_cpu_core") .unwrap_or(poh_service::DEFAULT_PINNED_CPU_CORE), poh_hashes_per_batch: value_of(&matches, "poh_hashes_per_batch")