report mem stats (#21258)
This commit is contained in:
parent
778b5d736c
commit
f8dcb2f38b
|
@ -4782,6 +4782,7 @@ dependencies = [
|
|||
"solana-version",
|
||||
"solana-vote-program",
|
||||
"static_assertions",
|
||||
"sys-info",
|
||||
"systemstat",
|
||||
"tempfile",
|
||||
"thiserror",
|
||||
|
@ -6294,6 +6295,16 @@ dependencies = [
|
|||
"unicode-xid 0.2.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sys-info"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sysctl"
|
||||
version = "0.4.3"
|
||||
|
|
|
@ -58,6 +58,7 @@ solana-vote-program = { path = "../programs/vote", version = "=1.9.0" }
|
|||
tempfile = "3.2.0"
|
||||
thiserror = "1.0"
|
||||
solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "=1.9.0" }
|
||||
sys-info = "0.9.1"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
trees = "0.4.2"
|
||||
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use solana_sdk::timing::AtomicInterval;
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
io::BufRead,
|
||||
|
@ -6,13 +7,15 @@ use std::{
|
|||
Arc,
|
||||
},
|
||||
thread::{self, sleep, Builder, JoinHandle},
|
||||
time::{Duration, Instant},
|
||||
time::Duration,
|
||||
};
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
use std::{fs::File, io::BufReader, path::Path};
|
||||
|
||||
const SAMPLE_INTERVAL: Duration = Duration::from_secs(60);
|
||||
const MS_PER_S: u64 = 1_000;
|
||||
const SAMPLE_INTERVAL_UDP_MS: u64 = 60 * MS_PER_S;
|
||||
const SAMPLE_INTERVAL_MEM_MS: u64 = MS_PER_S;
|
||||
const SLEEP_INTERVAL: Duration = Duration::from_millis(500);
|
||||
|
||||
#[cfg(target_os = "linux")]
|
||||
|
@ -173,21 +176,68 @@ impl SystemMonitorService {
|
|||
);
|
||||
}
|
||||
|
||||
fn calc_percent(numerator: u64, denom: u64) -> f32 {
|
||||
if denom == 0 {
|
||||
0.0
|
||||
} else {
|
||||
(numerator as f32 / denom as f32) * 100.0
|
||||
}
|
||||
}
|
||||
|
||||
fn report_mem_stats() {
|
||||
if let Ok(info) = sys_info::mem_info() {
|
||||
datapoint_info!(
|
||||
"memory-stats",
|
||||
("total", info.total, i64),
|
||||
("swap_total", info.swap_total, i64),
|
||||
(
|
||||
"free_percent",
|
||||
Self::calc_percent(info.free, info.total),
|
||||
f64
|
||||
),
|
||||
("used_bytes", info.total.saturating_sub(info.avail), i64),
|
||||
(
|
||||
"avail_percent",
|
||||
Self::calc_percent(info.avail, info.total),
|
||||
f64
|
||||
),
|
||||
(
|
||||
"buffers_percent",
|
||||
Self::calc_percent(info.buffers, info.total),
|
||||
f64
|
||||
),
|
||||
(
|
||||
"cached_percent",
|
||||
Self::calc_percent(info.cached, info.total),
|
||||
f64
|
||||
),
|
||||
(
|
||||
"swap_free_percent",
|
||||
Self::calc_percent(info.swap_free, info.swap_total),
|
||||
f64
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run(exit: Arc<AtomicBool>) {
|
||||
let mut udp_stats = None;
|
||||
|
||||
let mut now = Instant::now();
|
||||
let udp_timer = AtomicInterval::default();
|
||||
let mem_timer = AtomicInterval::default();
|
||||
loop {
|
||||
if exit.load(Ordering::Relaxed) {
|
||||
break;
|
||||
}
|
||||
|
||||
if now.elapsed() >= SAMPLE_INTERVAL {
|
||||
now = Instant::now();
|
||||
|
||||
if udp_timer.should_update(SAMPLE_INTERVAL_UDP_MS) {
|
||||
SystemMonitorService::process_udp_stats(&mut udp_stats);
|
||||
}
|
||||
|
||||
if mem_timer.should_update(SAMPLE_INTERVAL_MEM_MS) {
|
||||
SystemMonitorService::report_mem_stats();
|
||||
}
|
||||
|
||||
sleep(SLEEP_INTERVAL);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -125,7 +125,7 @@ impl BucketMapHolderStats {
|
|||
}
|
||||
}
|
||||
|
||||
fn calc_percent(&self, ms: u64, elapsed_ms: u64) -> f32 {
|
||||
fn calc_percent(ms: u64, elapsed_ms: u64) -> f32 {
|
||||
if elapsed_ms == 0 {
|
||||
0.0
|
||||
} else {
|
||||
|
@ -183,7 +183,7 @@ impl BucketMapHolderStats {
|
|||
("count", self.count.load(Ordering::Relaxed), i64),
|
||||
(
|
||||
"bg_waiting_percent",
|
||||
self.calc_percent(
|
||||
Self::calc_percent(
|
||||
self.bg_waiting_us.swap(0, Ordering::Relaxed) / US_PER_MS,
|
||||
thread_time_elapsed_ms
|
||||
),
|
||||
|
@ -191,7 +191,7 @@ impl BucketMapHolderStats {
|
|||
),
|
||||
(
|
||||
"bg_throttling_wait_percent",
|
||||
self.calc_percent(
|
||||
Self::calc_percent(
|
||||
self.bg_throttling_wait_us.swap(0, Ordering::Relaxed) / US_PER_MS,
|
||||
thread_time_elapsed_ms
|
||||
),
|
||||
|
|
|
@ -28,6 +28,7 @@ pub fn duration_as_s(d: &Duration) -> f32 {
|
|||
d.as_secs() as f32 + (d.subsec_nanos() as f32 / 1_000_000_000.0)
|
||||
}
|
||||
|
||||
/// return timestamp as ms
|
||||
pub fn timestamp() -> u64 {
|
||||
let now = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
|
@ -66,14 +67,18 @@ pub struct AtomicInterval {
|
|||
}
|
||||
|
||||
impl AtomicInterval {
|
||||
pub fn should_update(&self, interval_time: u64) -> bool {
|
||||
self.should_update_ext(interval_time, true)
|
||||
/// true if 'interval_time_ms' has elapsed since last time we returned true as long as it has been 'interval_time_ms' since this struct was created
|
||||
pub fn should_update(&self, interval_time_ms: u64) -> bool {
|
||||
self.should_update_ext(interval_time_ms, true)
|
||||
}
|
||||
|
||||
pub fn should_update_ext(&self, interval_time: u64, skip_first: bool) -> bool {
|
||||
/// a primary use case is periodic metric reporting, potentially from different threads
|
||||
/// true if 'interval_time_ms' has elapsed since last time we returned true
|
||||
/// except, if skip_first=false, false until 'interval_time_ms' has elapsed since this struct was created
|
||||
pub fn should_update_ext(&self, interval_time_ms: u64, skip_first: bool) -> bool {
|
||||
let now = timestamp();
|
||||
let last = self.last_update.load(Ordering::Relaxed);
|
||||
now.saturating_sub(last) > interval_time
|
||||
now.saturating_sub(last) > interval_time_ms
|
||||
&& self
|
||||
.last_update
|
||||
.compare_exchange(last, now, Ordering::Relaxed, Ordering::Relaxed)
|
||||
|
|
Loading…
Reference in New Issue