tower/tower/src/hedge/mod.rs

268 lines
7.9 KiB
Rust

//! Pre-emptively retry requests which have been outstanding for longer
//! than a given latency percentile.
#![warn(
missing_debug_implementations,
missing_docs,
rust_2018_idioms,
unreachable_pub
)]
use crate::filter::Filter;
use futures_util::future;
use pin_project::pin_project;
use std::sync::{Arc, Mutex};
use std::time::Duration;
use std::{
pin::Pin,
task::{Context, Poll},
};
use tracing::error;
mod delay;
mod latency;
mod rotating_histogram;
mod select;
use delay::Delay;
use latency::Latency;
use rotating_histogram::RotatingHistogram;
use select::Select;
type Histo = Arc<Mutex<RotatingHistogram>>;
type Service<S, P> = select::Select<
SelectPolicy<P>,
Latency<Histo, S>,
Delay<DelayPolicy, Filter<Latency<Histo, S>, PolicyPredicate<P>>>,
>;
/// A middleware that pre-emptively retries requests which have been outstanding
/// for longer than a given latency percentile. If either of the original
/// future or the retry future completes, that value is used.
#[derive(Debug)]
pub struct Hedge<S, P>(Service<S, P>);
/// The Future returned by the hedge Service.
#[pin_project]
#[derive(Debug)]
pub struct Future<S, Request>
where
S: tower_service::Service<Request>,
{
#[pin]
inner: S::Future,
}
type Error = Box<dyn std::error::Error + Send + Sync>;
/// A policy which describes which requests can be cloned and then whether those
/// requests should be retried.
pub trait Policy<Request> {
/// clone_request is called when the request is first received to determine
/// if the request is retryable.
fn clone_request(&self, req: &Request) -> Option<Request>;
/// can_retry is called after the hedge timeout to determine if the hedge
/// retry should be issued.
fn can_retry(&self, req: &Request) -> bool;
}
// NOTE: these are pub only because they appear inside a Future<F>
#[doc(hidden)]
#[derive(Clone, Debug)]
pub struct PolicyPredicate<P>(P);
#[doc(hidden)]
#[derive(Debug)]
pub struct DelayPolicy {
histo: Histo,
latency_percentile: f32,
}
#[doc(hidden)]
#[derive(Debug)]
pub struct SelectPolicy<P> {
policy: P,
histo: Histo,
min_data_points: u64,
}
impl<S, P> Hedge<S, P> {
/// Create a new hedge middleware.
pub fn new<Request>(
service: S,
policy: P,
min_data_points: u64,
latency_percentile: f32,
period: Duration,
) -> Hedge<S, P>
where
S: tower_service::Service<Request> + Clone,
S::Error: Into<Error>,
P: Policy<Request> + Clone,
{
let histo = Arc::new(Mutex::new(RotatingHistogram::new(period)));
Self::new_with_histo(service, policy, min_data_points, latency_percentile, histo)
}
/// A hedge middleware with a prepopulated latency histogram. This is usedful
/// for integration tests.
pub fn new_with_mock_latencies<Request>(
service: S,
policy: P,
min_data_points: u64,
latency_percentile: f32,
period: Duration,
latencies_ms: &[u64],
) -> Hedge<S, P>
where
S: tower_service::Service<Request> + Clone,
S::Error: Into<Error>,
P: Policy<Request> + Clone,
{
let histo = Arc::new(Mutex::new(RotatingHistogram::new(period)));
{
let mut locked = histo.lock().unwrap();
for latency in latencies_ms.iter() {
locked.read().record(*latency).unwrap();
}
}
Self::new_with_histo(service, policy, min_data_points, latency_percentile, histo)
}
fn new_with_histo<Request>(
service: S,
policy: P,
min_data_points: u64,
latency_percentile: f32,
histo: Histo,
) -> Hedge<S, P>
where
S: tower_service::Service<Request> + Clone,
S::Error: Into<Error>,
P: Policy<Request> + Clone,
{
// Clone the underlying service and wrap both copies in a middleware that
// records the latencies in a rotating histogram.
let recorded_a = Latency::new(histo.clone(), service.clone());
let recorded_b = Latency::new(histo.clone(), service);
// Check policy to see if the hedge request should be issued.
let filtered = Filter::new(recorded_b, PolicyPredicate(policy.clone()));
// Delay the second request by a percentile of the recorded request latency
// histogram.
let delay_policy = DelayPolicy {
histo: histo.clone(),
latency_percentile,
};
let delayed = Delay::new(delay_policy, filtered);
// If the request is retryable, issue two requests -- the second one delayed
// by a latency percentile. Use the first result to complete.
let select_policy = SelectPolicy {
policy,
histo,
min_data_points,
};
Hedge(Select::new(select_policy, recorded_a, delayed))
}
}
impl<S, P, Request> tower_service::Service<Request> for Hedge<S, P>
where
S: tower_service::Service<Request> + Clone,
S::Error: Into<Error>,
P: Policy<Request> + Clone,
{
type Response = S::Response;
type Error = Error;
type Future = Future<Service<S, P>, Request>;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
self.0.poll_ready(cx)
}
fn call(&mut self, request: Request) -> Self::Future {
Future {
inner: self.0.call(request),
}
}
}
impl<S, Request> std::future::Future for Future<S, Request>
where
S: tower_service::Service<Request>,
S::Error: Into<Error>,
{
type Output = Result<S::Response, Error>;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
self.project().inner.poll(cx).map_err(Into::into)
}
}
// TODO: Remove when Duration::as_millis() becomes stable.
const NANOS_PER_MILLI: u32 = 1_000_000;
const MILLIS_PER_SEC: u64 = 1_000;
fn millis(duration: Duration) -> u64 {
// Round up.
let millis = (duration.subsec_nanos() + NANOS_PER_MILLI - 1) / NANOS_PER_MILLI;
duration
.as_secs()
.saturating_mul(MILLIS_PER_SEC)
.saturating_add(u64::from(millis))
}
impl latency::Record for Histo {
fn record(&mut self, latency: Duration) {
let mut locked = self.lock().unwrap();
locked.write().record(millis(latency)).unwrap_or_else(|e| {
error!("Failed to write to hedge histogram: {:?}", e);
})
}
}
impl<P, Request> crate::filter::Predicate<Request> for PolicyPredicate<P>
where
P: Policy<Request>,
{
type Future = future::Either<
future::Ready<Result<(), crate::filter::error::Error>>,
future::Pending<Result<(), crate::filter::error::Error>>,
>;
fn check(&mut self, request: &Request) -> Self::Future {
if self.0.can_retry(request) {
future::Either::Left(future::ready(Ok(())))
} else {
// If the hedge retry should not be issued, we simply want to wait
// for the result of the original request. Therefore we don't want
// to return an error here. Instead, we use future::pending to ensure
// that the original request wins the select.
future::Either::Right(future::pending())
}
}
}
impl<Request> delay::Policy<Request> for DelayPolicy {
fn delay(&self, _req: &Request) -> Duration {
let mut locked = self.histo.lock().unwrap();
let millis = locked
.read()
.value_at_quantile(self.latency_percentile.into());
Duration::from_millis(millis)
}
}
impl<P, Request> select::Policy<Request> for SelectPolicy<P>
where
P: Policy<Request>,
{
fn clone_request(&self, req: &Request) -> Option<Request> {
self.policy.clone_request(req).filter(|_| {
let mut locked = self.histo.lock().unwrap();
// Do not attempt a retry if there are insufficiently many data
// points in the histogram.
locked.read().len() >= self.min_data_points
})
}
}