add monitoring for open file descriptors stat
This commit is contained in:
parent
703511c3e8
commit
e1ba5a2a63
|
@ -5202,6 +5202,7 @@ dependencies = [
|
|||
"matches",
|
||||
"min-max-heap",
|
||||
"num_enum",
|
||||
"procfs",
|
||||
"rand 0.7.3",
|
||||
"rand_chacha 0.2.2",
|
||||
"raptorq",
|
||||
|
|
|
@ -31,6 +31,7 @@ log = "0.4.17"
|
|||
lru = "0.7.7"
|
||||
min-max-heap = "1.3.0"
|
||||
num_enum = "0.5.7"
|
||||
procfs = "0.14.1"
|
||||
rand = "0.7.0"
|
||||
rand_chacha = "0.2.2"
|
||||
rayon = "1.5.3"
|
||||
|
|
|
@ -7,6 +7,7 @@ use num_enum::{IntoPrimitive, TryFromPrimitive};
|
|||
#[cfg(target_os = "linux")]
|
||||
use std::{fs::File, io::BufReader};
|
||||
use {
|
||||
procfs::process::{LimitValue, Process},
|
||||
solana_sdk::timing::AtomicInterval,
|
||||
std::{
|
||||
collections::HashMap,
|
||||
|
@ -29,6 +30,7 @@ const SAMPLE_INTERVAL_OS_NETWORK_LIMITS_MS: u64 = MS_PER_H;
|
|||
const SAMPLE_INTERVAL_MEM_MS: u64 = 5 * MS_PER_S;
|
||||
const SAMPLE_INTERVAL_CPU_MS: u64 = 10 * MS_PER_S;
|
||||
const SAMPLE_INTERVAL_DISK_MS: u64 = 5 * MS_PER_S;
|
||||
const SAMPLE_INTERVAL_OPEN_FD_MS: u64 = 30 * MS_PER_S;
|
||||
const SLEEP_INTERVAL: Duration = Duration::from_millis(500);
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
|
@ -392,6 +394,7 @@ impl SystemMonitorService {
|
|||
report_os_network_stats: bool,
|
||||
report_os_cpu_stats: bool,
|
||||
report_os_disk_stats: bool,
|
||||
report_os_open_fd_stats: bool,
|
||||
) -> Self {
|
||||
info!("Starting SystemMonitorService");
|
||||
let thread_hdl = Builder::new()
|
||||
|
@ -403,6 +406,7 @@ impl SystemMonitorService {
|
|||
report_os_network_stats,
|
||||
report_os_cpu_stats,
|
||||
report_os_disk_stats,
|
||||
report_os_open_fd_stats,
|
||||
);
|
||||
})
|
||||
.unwrap();
|
||||
|
@ -832,6 +836,40 @@ impl SystemMonitorService {
|
|||
Self::report_cpuid_values();
|
||||
}
|
||||
|
||||
fn get_open_fd_stats() -> Option<(usize, usize, usize)> {
|
||||
let proc = Process::myself().ok()?;
|
||||
let curr_num_open_fd = proc.fd_count().unwrap();
|
||||
let max_open_fd_limit = proc.limits().unwrap().max_open_files;
|
||||
|
||||
let max_open_fd_soft_limit = match max_open_fd_limit.soft_limit {
|
||||
LimitValue::Unlimited => usize::MAX,
|
||||
LimitValue::Value(x) => x as usize,
|
||||
};
|
||||
let max_open_fd_hard_limit = match max_open_fd_limit.hard_limit {
|
||||
LimitValue::Unlimited => usize::MAX,
|
||||
LimitValue::Value(x) => x as usize,
|
||||
};
|
||||
|
||||
Some((
|
||||
curr_num_open_fd,
|
||||
max_open_fd_soft_limit,
|
||||
max_open_fd_hard_limit,
|
||||
))
|
||||
}
|
||||
|
||||
fn report_open_fd_stats() {
|
||||
if let Some((curr_num_open_fd, max_open_fd_soft_limit, max_open_fd_hard_limit)) =
|
||||
Self::get_open_fd_stats()
|
||||
{
|
||||
datapoint_info!(
|
||||
"open-fd-stats",
|
||||
("number_open_files", curr_num_open_fd, i64),
|
||||
("max_open_files_hard_limit", max_open_fd_hard_limit, i64),
|
||||
("max_open_files_soft_limit", max_open_fd_soft_limit, i64),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
fn process_disk_stats(disk_stats: &mut Option<DiskStats>) {
|
||||
match read_disk_stats() {
|
||||
|
@ -973,6 +1011,7 @@ impl SystemMonitorService {
|
|||
report_os_network_stats: bool,
|
||||
report_os_cpu_stats: bool,
|
||||
report_os_disk_stats: bool,
|
||||
report_os_open_fd_stats: bool,
|
||||
) {
|
||||
let mut udp_stats = None;
|
||||
let mut disk_stats = None;
|
||||
|
@ -981,6 +1020,7 @@ impl SystemMonitorService {
|
|||
let mem_timer = AtomicInterval::default();
|
||||
let cpu_timer = AtomicInterval::default();
|
||||
let disk_timer = AtomicInterval::default();
|
||||
let open_fd_timer = AtomicInterval::default();
|
||||
|
||||
loop {
|
||||
if exit.load(Ordering::Relaxed) {
|
||||
|
@ -1003,6 +1043,9 @@ impl SystemMonitorService {
|
|||
if report_os_disk_stats && disk_timer.should_update(SAMPLE_INTERVAL_DISK_MS) {
|
||||
Self::process_disk_stats(&mut disk_stats);
|
||||
}
|
||||
if report_os_open_fd_stats && open_fd_timer.should_update(SAMPLE_INTERVAL_OPEN_FD_MS) {
|
||||
Self::report_open_fd_stats();
|
||||
}
|
||||
sleep(SLEEP_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -156,6 +156,7 @@ pub struct ValidatorConfig {
|
|||
pub no_os_network_stats_reporting: bool,
|
||||
pub no_os_cpu_stats_reporting: bool,
|
||||
pub no_os_disk_stats_reporting: bool,
|
||||
pub no_os_open_fd_stats_reporting: bool,
|
||||
pub poh_pinned_cpu_core: usize,
|
||||
pub poh_hashes_per_batch: u64,
|
||||
pub process_ledger_before_services: bool,
|
||||
|
@ -218,6 +219,7 @@ impl Default for ValidatorConfig {
|
|||
no_os_network_stats_reporting: true,
|
||||
no_os_cpu_stats_reporting: true,
|
||||
no_os_disk_stats_reporting: true,
|
||||
no_os_open_fd_stats_reporting: true,
|
||||
poh_pinned_cpu_core: poh_service::DEFAULT_PINNED_CPU_CORE,
|
||||
poh_hashes_per_batch: poh_service::DEFAULT_HASHES_PER_BATCH,
|
||||
process_ledger_before_services: false,
|
||||
|
@ -500,6 +502,7 @@ impl Validator {
|
|||
!config.no_os_network_stats_reporting,
|
||||
!config.no_os_cpu_stats_reporting,
|
||||
!config.no_os_disk_stats_reporting,
|
||||
!config.no_os_open_fd_stats_reporting,
|
||||
));
|
||||
|
||||
let (poh_timing_point_sender, poh_timing_point_receiver) = unbounded();
|
||||
|
|
|
@ -2711,6 +2711,7 @@ fn main() {
|
|||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
);
|
||||
|
||||
accounts_index_config.index_limit_mb = if let Some(limit) =
|
||||
|
|
|
@ -46,6 +46,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
|
|||
no_os_network_stats_reporting: config.no_os_network_stats_reporting,
|
||||
no_os_cpu_stats_reporting: config.no_os_cpu_stats_reporting,
|
||||
no_os_disk_stats_reporting: config.no_os_disk_stats_reporting,
|
||||
no_os_open_fd_stats_reporting: config.no_os_open_fd_stats_reporting,
|
||||
poh_pinned_cpu_core: config.poh_pinned_cpu_core,
|
||||
account_indexes: config.account_indexes.clone(),
|
||||
accounts_db_caching_enabled: config.accounts_db_caching_enabled,
|
||||
|
|
|
@ -4491,6 +4491,7 @@ dependencies = [
|
|||
"lru",
|
||||
"min-max-heap",
|
||||
"num_enum",
|
||||
"procfs",
|
||||
"rand 0.7.3",
|
||||
"rand_chacha 0.2.2",
|
||||
"rayon",
|
||||
|
|
|
@ -513,6 +513,11 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
|
|||
.long("no-os-disk-stats-reporting")
|
||||
.help("Disable reporting of OS disk statistics.")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("no_os_open_fd_stats_reporting")
|
||||
.long("no-os-open-fd-stats-reporting")
|
||||
.help("Disable reporting of open file descriptors statistics for current process.")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("accounts-hash-interval-slots")
|
||||
.long("accounts-hash-interval-slots")
|
||||
|
|
|
@ -1141,6 +1141,7 @@ pub fn main() {
|
|||
no_os_network_stats_reporting: matches.is_present("no_os_network_stats_reporting"),
|
||||
no_os_cpu_stats_reporting: matches.is_present("no_os_cpu_stats_reporting"),
|
||||
no_os_disk_stats_reporting: matches.is_present("no_os_disk_stats_reporting"),
|
||||
no_os_open_fd_stats_reporting: matches.is_present("no_os_open_fd_stats_reporting"),
|
||||
poh_pinned_cpu_core: value_of(&matches, "poh_pinned_cpu_core")
|
||||
.unwrap_or(poh_service::DEFAULT_PINNED_CPU_CORE),
|
||||
poh_hashes_per_batch: value_of(&matches, "poh_hashes_per_batch")
|
||||
|
|
Loading…
Reference in New Issue