Add CPUmetrics (#25802)
Add in some CPU utilization metrics such as: number of vCPUs, clock frequency, average load across different time intervals, and number of total threads
This commit is contained in:
parent
d4e4871c47
commit
ba04063956
|
@ -12,6 +12,7 @@ use {
|
|||
thread::{self, sleep, Builder, JoinHandle},
|
||||
time::Duration,
|
||||
},
|
||||
sys_info::{Error, LoadAvg},
|
||||
};
|
||||
|
||||
const MS_PER_S: u64 = 1_000;
|
||||
|
@ -20,6 +21,7 @@ const MS_PER_H: u64 = MS_PER_M * 60;
|
|||
const SAMPLE_INTERVAL_UDP_MS: u64 = 2 * MS_PER_S;
|
||||
const SAMPLE_INTERVAL_OS_NETWORK_LIMITS_MS: u64 = MS_PER_H;
|
||||
const SAMPLE_INTERVAL_MEM_MS: u64 = MS_PER_S;
|
||||
const SAMPLE_INTERVAL_CPU_MS: u64 = MS_PER_S;
|
||||
const SLEEP_INTERVAL: Duration = Duration::from_millis(500);
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
|
@ -41,6 +43,13 @@ struct UdpStats {
|
|||
ignored_multi: usize,
|
||||
}
|
||||
|
||||
struct CpuInfo {
|
||||
cpu_num: u32,
|
||||
cpu_freq_mhz: u64,
|
||||
load_avg: LoadAvg,
|
||||
num_threads: u64,
|
||||
}
|
||||
|
||||
impl UdpStats {
|
||||
fn from_map(udp_stats: &HashMap<String, usize>) -> Self {
|
||||
Self {
|
||||
|
@ -121,12 +130,18 @@ impl SystemMonitorService {
|
|||
exit: Arc<AtomicBool>,
|
||||
report_os_memory_stats: bool,
|
||||
report_os_network_stats: bool,
|
||||
report_os_cpu_stats: bool,
|
||||
) -> Self {
|
||||
info!("Starting SystemMonitorService");
|
||||
let thread_hdl = Builder::new()
|
||||
.name("system-monitor".to_string())
|
||||
.spawn(move || {
|
||||
Self::run(exit, report_os_memory_stats, report_os_network_stats);
|
||||
Self::run(
|
||||
exit,
|
||||
report_os_memory_stats,
|
||||
report_os_network_stats,
|
||||
report_os_cpu_stats,
|
||||
);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -335,11 +350,45 @@ impl SystemMonitorService {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn run(exit: Arc<AtomicBool>, report_os_memory_stats: bool, report_os_network_stats: bool) {
|
||||
fn cpu_info() -> Result<CpuInfo, Error> {
|
||||
let cpu_num = sys_info::cpu_num()?;
|
||||
let cpu_freq_mhz = sys_info::cpu_speed()?;
|
||||
let load_avg = sys_info::loadavg()?;
|
||||
let num_threads = sys_info::proc_total()?;
|
||||
|
||||
Ok(CpuInfo {
|
||||
cpu_num,
|
||||
cpu_freq_mhz,
|
||||
load_avg,
|
||||
num_threads,
|
||||
})
|
||||
}
|
||||
|
||||
fn report_cpu_stats() {
|
||||
if let Ok(info) = Self::cpu_info() {
|
||||
datapoint_info!(
|
||||
"cpu-stats",
|
||||
("cpu_num", info.cpu_num as i64, i64),
|
||||
("cpu0_freq_mhz", info.cpu_freq_mhz as i64, i64),
|
||||
("average_load_one_minute", info.load_avg.one, f64),
|
||||
("average_load_five_minutes", info.load_avg.five, f64),
|
||||
("average_load_fifteen_minutes", info.load_avg.fifteen, f64),
|
||||
("total_num_threads", info.num_threads as i64, i64),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(
|
||||
exit: Arc<AtomicBool>,
|
||||
report_os_memory_stats: bool,
|
||||
report_os_network_stats: bool,
|
||||
report_os_cpu_stats: bool,
|
||||
) {
|
||||
let mut udp_stats = None;
|
||||
let network_limits_timer = AtomicInterval::default();
|
||||
let udp_timer = AtomicInterval::default();
|
||||
let mem_timer = AtomicInterval::default();
|
||||
let cpu_timer = AtomicInterval::default();
|
||||
|
||||
loop {
|
||||
if exit.load(Ordering::Relaxed) {
|
||||
|
@ -356,6 +405,9 @@ impl SystemMonitorService {
|
|||
if report_os_memory_stats && mem_timer.should_update(SAMPLE_INTERVAL_MEM_MS) {
|
||||
Self::report_mem_stats();
|
||||
}
|
||||
if report_os_cpu_stats && cpu_timer.should_update(SAMPLE_INTERVAL_CPU_MS) {
|
||||
Self::report_cpu_stats();
|
||||
}
|
||||
sleep(SLEEP_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -159,6 +159,7 @@ pub struct ValidatorConfig {
|
|||
pub no_poh_speed_test: bool,
|
||||
pub no_os_memory_stats_reporting: bool,
|
||||
pub no_os_network_stats_reporting: bool,
|
||||
pub no_os_cpu_stats_reporting: bool,
|
||||
pub poh_pinned_cpu_core: usize,
|
||||
pub poh_hashes_per_batch: u64,
|
||||
pub account_indexes: AccountSecondaryIndexes,
|
||||
|
@ -219,6 +220,7 @@ impl Default for ValidatorConfig {
|
|||
no_poh_speed_test: true,
|
||||
no_os_memory_stats_reporting: true,
|
||||
no_os_network_stats_reporting: true,
|
||||
no_os_cpu_stats_reporting: true,
|
||||
poh_pinned_cpu_core: poh_service::DEFAULT_PINNED_CPU_CORE,
|
||||
poh_hashes_per_batch: poh_service::DEFAULT_HASHES_PER_BATCH,
|
||||
account_indexes: AccountSecondaryIndexes::default(),
|
||||
|
@ -497,6 +499,7 @@ impl Validator {
|
|||
Arc::clone(&exit),
|
||||
!config.no_os_memory_stats_reporting,
|
||||
!config.no_os_network_stats_reporting,
|
||||
!config.no_os_cpu_stats_reporting,
|
||||
));
|
||||
|
||||
let (poh_timing_point_sender, poh_timing_point_receiver) = unbounded();
|
||||
|
|
|
@ -2182,6 +2182,7 @@ fn main() {
|
|||
Arc::clone(&exit_signal),
|
||||
!no_os_memory_stats_reporting,
|
||||
false,
|
||||
false,
|
||||
);
|
||||
|
||||
accounts_index_config.index_limit_mb = if let Some(limit) =
|
||||
|
|
|
@ -47,6 +47,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
|
|||
no_poh_speed_test: config.no_poh_speed_test,
|
||||
no_os_memory_stats_reporting: config.no_os_memory_stats_reporting,
|
||||
no_os_network_stats_reporting: config.no_os_network_stats_reporting,
|
||||
no_os_cpu_stats_reporting: config.no_os_cpu_stats_reporting,
|
||||
poh_pinned_cpu_core: config.poh_pinned_cpu_core,
|
||||
account_indexes: config.account_indexes.clone(),
|
||||
accounts_db_caching_enabled: config.accounts_db_caching_enabled,
|
||||
|
|
|
@ -965,6 +965,11 @@ pub fn main() {
|
|||
.long("no-os-network-stats-reporting")
|
||||
.help("Disable reporting of OS network statistics.")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("no_os_cpu_stats_reporting")
|
||||
.long("no-os-cpu-stats-reporting")
|
||||
.help("Disable reporting of OS CPU statistics.")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("accounts-hash-interval-slots")
|
||||
.long("accounts-hash-interval-slots")
|
||||
|
@ -2529,6 +2534,7 @@ pub fn main() {
|
|||
no_poh_speed_test: matches.is_present("no_poh_speed_test"),
|
||||
no_os_memory_stats_reporting: matches.is_present("no_os_memory_stats_reporting"),
|
||||
no_os_network_stats_reporting: matches.is_present("no_os_network_stats_reporting"),
|
||||
no_os_cpu_stats_reporting: matches.is_present("no_os_cpu_stats_reporting"),
|
||||
poh_pinned_cpu_core: value_of(&matches, "poh_pinned_cpu_core")
|
||||
.unwrap_or(poh_service::DEFAULT_PINNED_CPU_CORE),
|
||||
poh_hashes_per_batch: value_of(&matches, "poh_hashes_per_batch")
|
||||
|
|
Loading…
Reference in New Issue