metrics/metrics-runtime/src/data/histogram.rs

376 lines
14 KiB
Rust

use crate::common::{Delta, ValueHandle};
use crate::helper::duration_as_nanos;
use atomic_shim::AtomicU64;
use crossbeam_utils::Backoff;
use metrics_util::{AtomicBucket, StreamingIntegers};
use quanta::Clock;
use std::cmp;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Duration;
/// A reference to a [`Histogram`].
///
/// A [`Histogram`] is used for directly updating a gauge, without any lookup overhead.
#[derive(Clone)]
pub struct Histogram {
handle: ValueHandle,
}
impl Histogram {
/// Records a timing for the histogram.
pub fn record_timing<D: Delta>(&self, start: D, end: D) {
let value = end.delta(start);
self.handle.update_histogram(value);
}
/// Records a value for the histogram.
pub fn record_value(&self, value: u64) {
self.handle.update_histogram(value);
}
}
impl From<ValueHandle> for Histogram {
fn from(handle: ValueHandle) -> Self {
Self { handle }
}
}
/// An atomic windowed histogram.
///
/// This histogram provides a windowed view of values that rolls forward over time, dropping old
/// values as they exceed the window of the histogram. Writes into the histogram are lock-free, as
/// well as snapshots of the histogram.
#[derive(Debug)]
pub struct AtomicWindowedHistogram {
buckets: Vec<AtomicBucket<u64>>,
bucket_count: usize,
granularity: u64,
upkeep_index: AtomicUsize,
index: AtomicUsize,
next_upkeep: AtomicU64,
clock: Clock,
}
impl AtomicWindowedHistogram {
/// Creates a new [`AtomicWindowedHistogram`].
///
/// Internally, a number of buckets will be created, based on how many times `granularity` goes
/// into `window`. As time passes, buckets will be cleared to avoid values older than the
/// `window` duration.
///
/// As buckets will hold values represneting a period of time up to `granularity`, the
/// granularity can be lowered or raised to roll values off more precisely, or less precisely,
/// against the provided clock.
///
/// # Panics
/// Panics if `granularity` is larger than `window`.
pub fn new(window: Duration, granularity: Duration, clock: Clock) -> Self {
let window_ns = duration_as_nanos(window);
let granularity_ns = duration_as_nanos(granularity);
assert!(window_ns > granularity_ns);
let now = clock.recent();
let bucket_count = ((window_ns / granularity_ns) as usize) + 1;
let mut buckets = Vec::new();
for _ in 0..bucket_count {
buckets.push(AtomicBucket::new());
}
let next_upkeep = now + granularity_ns;
AtomicWindowedHistogram {
buckets,
bucket_count,
granularity: granularity_ns,
upkeep_index: AtomicUsize::new(0),
index: AtomicUsize::new(0),
next_upkeep: AtomicU64::new(next_upkeep),
clock,
}
}
/// Takes a snapshot of the current histogram.
///
/// Returns a [`StreamingIntegers`] value, representing all observed values in the
/// histogram. As writes happen concurrently, along with buckets being cleared, a snapshot is
/// not guaranteed to have all values present at the time the method was called.
pub fn snapshot(&self) -> StreamingIntegers {
// Run upkeep to make sure our window reflects any time passage since the last write.
let index = self.upkeep();
let mut streaming = StreamingIntegers::new();
// Start from the bucket ahead of the currently-being-written-to-bucket so that we outrace
// any upkeep and get access to more of the data.
for i in 0..self.bucket_count {
let bucket_index = (index + i + 1) % self.bucket_count;
let bucket = &self.buckets[bucket_index];
bucket.data_with(|block| streaming.compress(block));
}
streaming
}
/// Records a value to the histogram.
pub fn record(&self, value: u64) {
let index = self.upkeep();
self.buckets[index].push(value);
}
fn upkeep(&self) -> usize {
let backoff = Backoff::new();
loop {
// Start by figuring out if the histogram needs to perform upkeep.
let now = self.clock.recent();
let next_upkeep = self.next_upkeep.load(Ordering::Acquire);
if now <= next_upkeep {
let index = self.index.load(Ordering::Acquire);
let actual_index = index % self.bucket_count;
return actual_index;
}
// We do need to perform upkeep, but someone *else* might actually be doing it already,
// so go ahead and wait until the index is caught up with the upkeep index: the upkeep
// index will be ahead of index until upkeep is complete.
let mut upkeep_in_progress = false;
let mut index;
loop {
index = self.index.load(Ordering::Acquire);
let upkeep_index = self.upkeep_index.load(Ordering::Acquire);
if index == upkeep_index {
break;
}
upkeep_in_progress = true;
backoff.snooze();
}
// If we waited for another upkeep operation to complete, then there's the chance that
// enough time has passed that we're due for upkeep again, so restart our loop.
if upkeep_in_progress {
continue;
}
// Figure out how many buckets, up to the maximum, need to be cleared based on the
// delta between the target upkeep time and the actual time. We always clear at least
// one bucket, but may need to clear them all.
let delta = now - next_upkeep;
let bucket_depth = cmp::min((delta / self.granularity) as usize, self.bucket_count) + 1;
// Now that we we know how many buckets we need to clear, update the index to pointer
// writers at the next bucket past the last one that we will be clearing.
let new_index = index + bucket_depth;
let prev_index = self
.index
.compare_and_swap(index, new_index, Ordering::SeqCst);
if prev_index == index {
// Clear the target bucket first, and then update the upkeep target time so new
// writers can proceed. We may still have other buckets to clean up if we had
// multiple rounds worth of upkeep to do, but this will let new writes proceed as
// soon as possible.
let clear_index = new_index % self.bucket_count;
self.buckets[clear_index].clear();
let now = self.clock.now();
let next_upkeep = now + self.granularity;
self.next_upkeep.store(next_upkeep, Ordering::Release);
// Now that we've cleared the actual bucket that writers will use going forward, we
// have to clear any older buckets that we skipped over. If our granularity was 1
// second, and we skipped over 4 seconds worth of buckets, we would still have
// 3 buckets to clear, etc.
let last_index = new_index - 1;
while index < last_index {
index += 1;
let clear_index = index % self.bucket_count;
self.buckets[clear_index].clear();
}
// We've cleared the old buckets, so upkeep is done. Push our upkeep index forward
// so that writers who were blocked waiting for upkeep to conclude can restart.
self.upkeep_index.store(new_index, Ordering::Release);
}
}
}
}
#[cfg(test)]
mod tests {
use super::{AtomicWindowedHistogram, Clock};
use crossbeam_utils::thread;
use std::time::Duration;
#[test]
fn test_histogram_simple_update() {
let (clock, _ctl) = Clock::mock();
let h = AtomicWindowedHistogram::new(Duration::from_secs(5), Duration::from_secs(1), clock);
h.record(1245);
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 1);
let values = snapshot.decompress();
assert_eq!(values.len(), 1);
assert_eq!(values.get(0).unwrap(), &1245);
}
#[test]
fn test_histogram_complex_update() {
let (clock, _ctl) = Clock::mock();
let h = AtomicWindowedHistogram::new(Duration::from_secs(5), Duration::from_secs(1), clock);
h.record(1245);
h.record(213);
h.record(1022);
h.record(1248);
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 4);
let values = snapshot.decompress();
assert_eq!(values.len(), 4);
assert_eq!(values.get(0).unwrap(), &1245);
assert_eq!(values.get(1).unwrap(), &213);
assert_eq!(values.get(2).unwrap(), &1022);
assert_eq!(values.get(3).unwrap(), &1248);
}
#[test]
fn test_windowed_histogram_rollover() {
let (clock, ctl) = Clock::mock();
// Set our granularity at right below a second, so that when we when add a second, we don't
// land on the same exact value, and our "now" time should always be ahead of the upkeep
// time when we expect it to be.
let h =
AtomicWindowedHistogram::new(Duration::from_secs(5), Duration::from_millis(999), clock);
// Histogram is empty, snapshot is empty.
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 0);
// Immediately add two values, and observe the histogram and snapshot having two values.
h.record(1);
h.record(2);
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 2);
let total: u64 = snapshot.decompress().iter().sum();
assert_eq!(total, 3);
// Roll forward 3 seconds, should still have everything.
ctl.increment(Duration::from_secs(3));
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 2);
let total: u64 = snapshot.decompress().iter().sum();
assert_eq!(total, 3);
// Roll forward 1 second, should still have everything.
ctl.increment(Duration::from_secs(1));
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 2);
let total: u64 = snapshot.decompress().iter().sum();
assert_eq!(total, 3);
// Roll forward 1 second, should still have everything.
ctl.increment(Duration::from_secs(1));
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 2);
let total: u64 = snapshot.decompress().iter().sum();
assert_eq!(total, 3);
// Pump in some new values. We should have a total of 5 values now.
h.record(3);
h.record(4);
h.record(5);
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 5);
let total: u64 = snapshot.decompress().iter().sum();
assert_eq!(total, 15);
// Roll forward 6 seconds, in increments. The first one rolls over a single bucket, and
// cleans bucket #0, the first one we wrote to. The second and third ones get us right up
// to the last three values, and then clear them out.
ctl.increment(Duration::from_secs(1));
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 3);
let total: u64 = snapshot.decompress().iter().sum();
assert_eq!(total, 12);
ctl.increment(Duration::from_secs(4));
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 3);
let total: u64 = snapshot.decompress().iter().sum();
assert_eq!(total, 12);
ctl.increment(Duration::from_secs(1));
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 0);
// We should also be able to advance by vast periods of time and observe not only old
// values going away but no weird overflow issues or index or anything. This ensures that
// our upkeep code functions not just for under-load single bucket rollovers but also "been
// idle for a while and just got a write" scenarios.
h.record(42);
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 1);
let total: u64 = snapshot.decompress().iter().sum();
assert_eq!(total, 42);
ctl.increment(Duration::from_secs(1000));
let snapshot = h.snapshot();
assert_eq!(snapshot.len(), 0);
}
#[test]
fn test_histogram_write_gauntlet_mt() {
let clock = Clock::new();
let clock2 = clock.clone();
let target = clock.now() + Duration::from_secs(5).as_nanos() as u64;
let h = AtomicWindowedHistogram::new(
Duration::from_secs(20),
Duration::from_millis(500),
clock,
);
thread::scope(|s| {
let t1 = s.spawn(|_| {
let mut total = 0;
while clock2.now() < target {
h.record(42);
total += 1;
}
total
});
let t2 = s.spawn(|_| {
let mut total = 0;
while clock2.now() < target {
h.record(42);
total += 1;
}
total
});
let t3 = s.spawn(|_| {
let mut total = 0;
while clock2.now() < target {
h.record(42);
total += 1;
}
total
});
let t1_total = t1.join().expect("thread 1 panicked during test");
let t2_total = t2.join().expect("thread 2 panicked during test");
let t3_total = t3.join().expect("thread 3 panicked during test");
let total = t1_total + t2_total + t3_total;
let snap = h.snapshot();
assert_eq!(total, snap.len());
})
.unwrap();
}
}