report mem stats (#21258)

This commit is contained in:
Jeff Washington (jwash) 2021-11-12 18:59:41 -06:00 committed by GitHub
parent 778b5d736c
commit f8dcb2f38b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 80 additions and 13 deletions

11
Cargo.lock generated
View File

@ -4782,6 +4782,7 @@ dependencies = [
"solana-version",
"solana-vote-program",
"static_assertions",
"sys-info",
"systemstat",
"tempfile",
"thiserror",
@ -6294,6 +6295,16 @@ dependencies = [
"unicode-xid 0.2.2",
]
[[package]]
name = "sys-info"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c"
dependencies = [
"cc",
"libc",
]
[[package]]
name = "sysctl"
version = "0.4.3"

View File

@ -58,6 +58,7 @@ solana-vote-program = { path = "../programs/vote", version = "=1.9.0" }
tempfile = "3.2.0"
thiserror = "1.0"
solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "=1.9.0" }
sys-info = "0.9.1"
tokio = { version = "1", features = ["full"] }
trees = "0.4.2"

View File

@ -1,3 +1,4 @@
use solana_sdk::timing::AtomicInterval;
use std::{
collections::HashMap,
io::BufRead,
@ -6,13 +7,15 @@ use std::{
Arc,
},
thread::{self, sleep, Builder, JoinHandle},
time::{Duration, Instant},
time::Duration,
};
#[cfg(target_os = "linux")]
use std::{fs::File, io::BufReader, path::Path};
const SAMPLE_INTERVAL: Duration = Duration::from_secs(60);
const MS_PER_S: u64 = 1_000;
const SAMPLE_INTERVAL_UDP_MS: u64 = 60 * MS_PER_S;
const SAMPLE_INTERVAL_MEM_MS: u64 = MS_PER_S;
const SLEEP_INTERVAL: Duration = Duration::from_millis(500);
#[cfg(target_os = "linux")]
@ -173,21 +176,68 @@ impl SystemMonitorService {
);
}
fn calc_percent(numerator: u64, denom: u64) -> f32 {
if denom == 0 {
0.0
} else {
(numerator as f32 / denom as f32) * 100.0
}
}
fn report_mem_stats() {
if let Ok(info) = sys_info::mem_info() {
datapoint_info!(
"memory-stats",
("total", info.total, i64),
("swap_total", info.swap_total, i64),
(
"free_percent",
Self::calc_percent(info.free, info.total),
f64
),
("used_bytes", info.total.saturating_sub(info.avail), i64),
(
"avail_percent",
Self::calc_percent(info.avail, info.total),
f64
),
(
"buffers_percent",
Self::calc_percent(info.buffers, info.total),
f64
),
(
"cached_percent",
Self::calc_percent(info.cached, info.total),
f64
),
(
"swap_free_percent",
Self::calc_percent(info.swap_free, info.swap_total),
f64
),
)
}
}
pub fn run(exit: Arc<AtomicBool>) {
let mut udp_stats = None;
let mut now = Instant::now();
let udp_timer = AtomicInterval::default();
let mem_timer = AtomicInterval::default();
loop {
if exit.load(Ordering::Relaxed) {
break;
}
if now.elapsed() >= SAMPLE_INTERVAL {
now = Instant::now();
if udp_timer.should_update(SAMPLE_INTERVAL_UDP_MS) {
SystemMonitorService::process_udp_stats(&mut udp_stats);
}
if mem_timer.should_update(SAMPLE_INTERVAL_MEM_MS) {
SystemMonitorService::report_mem_stats();
}
sleep(SLEEP_INTERVAL);
}
}

View File

@ -125,7 +125,7 @@ impl BucketMapHolderStats {
}
}
fn calc_percent(&self, ms: u64, elapsed_ms: u64) -> f32 {
fn calc_percent(ms: u64, elapsed_ms: u64) -> f32 {
if elapsed_ms == 0 {
0.0
} else {
@ -183,7 +183,7 @@ impl BucketMapHolderStats {
("count", self.count.load(Ordering::Relaxed), i64),
(
"bg_waiting_percent",
self.calc_percent(
Self::calc_percent(
self.bg_waiting_us.swap(0, Ordering::Relaxed) / US_PER_MS,
thread_time_elapsed_ms
),
@ -191,7 +191,7 @@ impl BucketMapHolderStats {
),
(
"bg_throttling_wait_percent",
self.calc_percent(
Self::calc_percent(
self.bg_throttling_wait_us.swap(0, Ordering::Relaxed) / US_PER_MS,
thread_time_elapsed_ms
),

View File

@ -28,6 +28,7 @@ pub fn duration_as_s(d: &Duration) -> f32 {
d.as_secs() as f32 + (d.subsec_nanos() as f32 / 1_000_000_000.0)
}
/// return timestamp as ms
pub fn timestamp() -> u64 {
let now = SystemTime::now()
.duration_since(UNIX_EPOCH)
@ -66,14 +67,18 @@ pub struct AtomicInterval {
}
impl AtomicInterval {
pub fn should_update(&self, interval_time: u64) -> bool {
self.should_update_ext(interval_time, true)
/// true if 'interval_time_ms' has elapsed since last time we returned true as long as it has been 'interval_time_ms' since this struct was created
pub fn should_update(&self, interval_time_ms: u64) -> bool {
self.should_update_ext(interval_time_ms, true)
}
pub fn should_update_ext(&self, interval_time: u64, skip_first: bool) -> bool {
/// a primary use case is periodic metric reporting, potentially from different threads
/// true if 'interval_time_ms' has elapsed since last time we returned true
/// except, if skip_first=false, false until 'interval_time_ms' has elapsed since this struct was created
pub fn should_update_ext(&self, interval_time_ms: u64, skip_first: bool) -> bool {
let now = timestamp();
let last = self.last_update.load(Ordering::Relaxed);
now.saturating_sub(last) > interval_time
now.saturating_sub(last) > interval_time_ms
&& self
.last_update
.compare_exchange(last, now, Ordering::Relaxed, Ordering::Relaxed)