report mem stats (#21258)
This commit is contained in:
parent
778b5d736c
commit
f8dcb2f38b
|
@ -4782,6 +4782,7 @@ dependencies = [
|
||||||
"solana-version",
|
"solana-version",
|
||||||
"solana-vote-program",
|
"solana-vote-program",
|
||||||
"static_assertions",
|
"static_assertions",
|
||||||
|
"sys-info",
|
||||||
"systemstat",
|
"systemstat",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
|
@ -6294,6 +6295,16 @@ dependencies = [
|
||||||
"unicode-xid 0.2.2",
|
"unicode-xid 0.2.2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "sys-info"
|
||||||
|
version = "0.9.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
|
||||||
|
dependencies = [
|
||||||
|
"cc",
|
||||||
|
"libc",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "sysctl"
|
name = "sysctl"
|
||||||
version = "0.4.3"
|
version = "0.4.3"
|
||||||
|
|
|
@ -58,6 +58,7 @@ solana-vote-program = { path = "../programs/vote", version = "=1.9.0" }
|
||||||
tempfile = "3.2.0"
|
tempfile = "3.2.0"
|
||||||
thiserror = "1.0"
|
thiserror = "1.0"
|
||||||
solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "=1.9.0" }
|
solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "=1.9.0" }
|
||||||
|
sys-info = "0.9.1"
|
||||||
tokio = { version = "1", features = ["full"] }
|
tokio = { version = "1", features = ["full"] }
|
||||||
trees = "0.4.2"
|
trees = "0.4.2"
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
use solana_sdk::timing::AtomicInterval;
|
||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashMap,
|
||||||
io::BufRead,
|
io::BufRead,
|
||||||
|
@ -6,13 +7,15 @@ use std::{
|
||||||
Arc,
|
Arc,
|
||||||
},
|
},
|
||||||
thread::{self, sleep, Builder, JoinHandle},
|
thread::{self, sleep, Builder, JoinHandle},
|
||||||
time::{Duration, Instant},
|
time::Duration,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
use std::{fs::File, io::BufReader, path::Path};
|
use std::{fs::File, io::BufReader, path::Path};
|
||||||
|
|
||||||
const SAMPLE_INTERVAL: Duration = Duration::from_secs(60);
|
const MS_PER_S: u64 = 1_000;
|
||||||
|
const SAMPLE_INTERVAL_UDP_MS: u64 = 60 * MS_PER_S;
|
||||||
|
const SAMPLE_INTERVAL_MEM_MS: u64 = MS_PER_S;
|
||||||
const SLEEP_INTERVAL: Duration = Duration::from_millis(500);
|
const SLEEP_INTERVAL: Duration = Duration::from_millis(500);
|
||||||
|
|
||||||
#[cfg(target_os = "linux")]
|
#[cfg(target_os = "linux")]
|
||||||
|
@ -173,21 +176,68 @@ impl SystemMonitorService {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn calc_percent(numerator: u64, denom: u64) -> f32 {
|
||||||
|
if denom == 0 {
|
||||||
|
0.0
|
||||||
|
} else {
|
||||||
|
(numerator as f32 / denom as f32) * 100.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn report_mem_stats() {
|
||||||
|
if let Ok(info) = sys_info::mem_info() {
|
||||||
|
datapoint_info!(
|
||||||
|
"memory-stats",
|
||||||
|
("total", info.total, i64),
|
||||||
|
("swap_total", info.swap_total, i64),
|
||||||
|
(
|
||||||
|
"free_percent",
|
||||||
|
Self::calc_percent(info.free, info.total),
|
||||||
|
f64
|
||||||
|
),
|
||||||
|
("used_bytes", info.total.saturating_sub(info.avail), i64),
|
||||||
|
(
|
||||||
|
"avail_percent",
|
||||||
|
Self::calc_percent(info.avail, info.total),
|
||||||
|
f64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"buffers_percent",
|
||||||
|
Self::calc_percent(info.buffers, info.total),
|
||||||
|
f64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"cached_percent",
|
||||||
|
Self::calc_percent(info.cached, info.total),
|
||||||
|
f64
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"swap_free_percent",
|
||||||
|
Self::calc_percent(info.swap_free, info.swap_total),
|
||||||
|
f64
|
||||||
|
),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn run(exit: Arc<AtomicBool>) {
|
pub fn run(exit: Arc<AtomicBool>) {
|
||||||
let mut udp_stats = None;
|
let mut udp_stats = None;
|
||||||
|
|
||||||
let mut now = Instant::now();
|
let udp_timer = AtomicInterval::default();
|
||||||
|
let mem_timer = AtomicInterval::default();
|
||||||
loop {
|
loop {
|
||||||
if exit.load(Ordering::Relaxed) {
|
if exit.load(Ordering::Relaxed) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if now.elapsed() >= SAMPLE_INTERVAL {
|
if udp_timer.should_update(SAMPLE_INTERVAL_UDP_MS) {
|
||||||
now = Instant::now();
|
|
||||||
|
|
||||||
SystemMonitorService::process_udp_stats(&mut udp_stats);
|
SystemMonitorService::process_udp_stats(&mut udp_stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if mem_timer.should_update(SAMPLE_INTERVAL_MEM_MS) {
|
||||||
|
SystemMonitorService::report_mem_stats();
|
||||||
|
}
|
||||||
|
|
||||||
sleep(SLEEP_INTERVAL);
|
sleep(SLEEP_INTERVAL);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -125,7 +125,7 @@ impl BucketMapHolderStats {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calc_percent(&self, ms: u64, elapsed_ms: u64) -> f32 {
|
fn calc_percent(ms: u64, elapsed_ms: u64) -> f32 {
|
||||||
if elapsed_ms == 0 {
|
if elapsed_ms == 0 {
|
||||||
0.0
|
0.0
|
||||||
} else {
|
} else {
|
||||||
|
@ -183,7 +183,7 @@ impl BucketMapHolderStats {
|
||||||
("count", self.count.load(Ordering::Relaxed), i64),
|
("count", self.count.load(Ordering::Relaxed), i64),
|
||||||
(
|
(
|
||||||
"bg_waiting_percent",
|
"bg_waiting_percent",
|
||||||
self.calc_percent(
|
Self::calc_percent(
|
||||||
self.bg_waiting_us.swap(0, Ordering::Relaxed) / US_PER_MS,
|
self.bg_waiting_us.swap(0, Ordering::Relaxed) / US_PER_MS,
|
||||||
thread_time_elapsed_ms
|
thread_time_elapsed_ms
|
||||||
),
|
),
|
||||||
|
@ -191,7 +191,7 @@ impl BucketMapHolderStats {
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
"bg_throttling_wait_percent",
|
"bg_throttling_wait_percent",
|
||||||
self.calc_percent(
|
Self::calc_percent(
|
||||||
self.bg_throttling_wait_us.swap(0, Ordering::Relaxed) / US_PER_MS,
|
self.bg_throttling_wait_us.swap(0, Ordering::Relaxed) / US_PER_MS,
|
||||||
thread_time_elapsed_ms
|
thread_time_elapsed_ms
|
||||||
),
|
),
|
||||||
|
|
|
@ -28,6 +28,7 @@ pub fn duration_as_s(d: &Duration) -> f32 {
|
||||||
d.as_secs() as f32 + (d.subsec_nanos() as f32 / 1_000_000_000.0)
|
d.as_secs() as f32 + (d.subsec_nanos() as f32 / 1_000_000_000.0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// return timestamp as ms
|
||||||
pub fn timestamp() -> u64 {
|
pub fn timestamp() -> u64 {
|
||||||
let now = SystemTime::now()
|
let now = SystemTime::now()
|
||||||
.duration_since(UNIX_EPOCH)
|
.duration_since(UNIX_EPOCH)
|
||||||
|
@ -66,14 +67,18 @@ pub struct AtomicInterval {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AtomicInterval {
|
impl AtomicInterval {
|
||||||
pub fn should_update(&self, interval_time: u64) -> bool {
|
/// true if 'interval_time_ms' has elapsed since last time we returned true as long as it has been 'interval_time_ms' since this struct was created
|
||||||
self.should_update_ext(interval_time, true)
|
pub fn should_update(&self, interval_time_ms: u64) -> bool {
|
||||||
|
self.should_update_ext(interval_time_ms, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn should_update_ext(&self, interval_time: u64, skip_first: bool) -> bool {
|
/// a primary use case is periodic metric reporting, potentially from different threads
|
||||||
|
/// true if 'interval_time_ms' has elapsed since last time we returned true
|
||||||
|
/// except, if skip_first=false, false until 'interval_time_ms' has elapsed since this struct was created
|
||||||
|
pub fn should_update_ext(&self, interval_time_ms: u64, skip_first: bool) -> bool {
|
||||||
let now = timestamp();
|
let now = timestamp();
|
||||||
let last = self.last_update.load(Ordering::Relaxed);
|
let last = self.last_update.load(Ordering::Relaxed);
|
||||||
now.saturating_sub(last) > interval_time
|
now.saturating_sub(last) > interval_time_ms
|
||||||
&& self
|
&& self
|
||||||
.last_update
|
.last_update
|
||||||
.compare_exchange(last, now, Ordering::Relaxed, Ordering::Relaxed)
|
.compare_exchange(last, now, Ordering::Relaxed, Ordering::Relaxed)
|
||||||
|
|
Loading…
Reference in New Issue