From 96a1b661f06c6c72bc8999a55497ee9ebcab2d41 Mon Sep 17 00:00:00 2001 From: Alfredo Garcia Date: Wed, 9 Jun 2021 20:39:51 -0300 Subject: [PATCH] Rate limit initial genesis block download retries, Credit: Equilibrium (#2255) * implement and test a rate limit in `request_genesis()` * add `request_genesis_is_rate_limited` test to sync * add ensure_timeouts constraint for GENESIS_TIMEOUT_RETRY * Suppress expected warning logs in zebrad tests Co-authored-by: teor --- zebra-test/src/lib.rs | 1 + zebrad/src/components/sync.rs | 72 +++------- zebrad/src/components/sync/tests.rs | 1 + zebrad/src/components/sync/tests/timing.rs | 148 +++++++++++++++++++++ 4 files changed, 167 insertions(+), 55 deletions(-) create mode 100644 zebrad/src/components/sync/tests.rs create mode 100644 zebrad/src/components/sync/tests/timing.rs diff --git a/zebra-test/src/lib.rs b/zebra-test/src/lib.rs index 5b1bff001..e8ed7b8ed 100644 --- a/zebra-test/src/lib.rs +++ b/zebra-test/src/lib.rs @@ -37,6 +37,7 @@ pub fn init() { EnvFilter::try_new("warn") .unwrap() .add_directive("zebra_consensus=error".parse().unwrap()) + .add_directive("zebrad=error".parse().unwrap()) }); tracing_subscriber::registry() diff --git a/zebrad/src/components/sync.rs b/zebrad/src/components/sync.rs index 159e9e8d2..50c31276f 100644 --- a/zebrad/src/components/sync.rs +++ b/zebrad/src/components/sync.rs @@ -21,6 +21,9 @@ use zebra_state as zs; use crate::{config::ZebradConfig, BoxError}; mod downloads; +#[cfg(test)] +mod tests; + use downloads::{AlwaysHedge, Downloads}; /// Controls the number of peers used for each ObtainTips and ExtendTips request. @@ -130,6 +133,18 @@ pub(super) const BLOCK_VERIFY_TIMEOUT: Duration = Duration::from_secs(180); /// previous sync runs. const SYNC_RESTART_DELAY: Duration = Duration::from_secs(61); +/// Controls how long we wait to retry a failed attempt to download +/// and verify the genesis block. +/// +/// This timeout gives the crawler time to find better peers. +/// +/// ## Security +/// +/// If this timeout is removed (or set too low), Zebra will immediately retry +/// to download and verify the genesis block from its peers. This can cause +/// a denial of service on those peers. +const GENESIS_TIMEOUT_RETRY: Duration = Duration::from_secs(5); + /// Helps work around defects in the bitcoin protocol by checking whether /// the returned hashes actually extend a chain tip. #[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)] @@ -615,7 +630,8 @@ where match self.downloads.next().await.expect("downloads is nonempty") { Ok(hash) => tracing::trace!(?hash, "verified and committed block to state"), Err(e) => { - tracing::warn!(?e, "could not download or verify genesis block, retrying") + tracing::warn!(?e, "could not download or verify genesis block, retrying"); + tokio::time::sleep(GENESIS_TIMEOUT_RETRY).await; } } } @@ -665,57 +681,3 @@ where ); } } - -#[cfg(test)] -mod test { - use zebra_chain::parameters::NetworkUpgrade; - - use super::*; - - /// Make sure the timeout values are consistent with each other. - #[test] - fn ensure_timeouts_consistent() { - zebra_test::init(); - - // This constraint clears the download pipeline during a restart - assert!( - SYNC_RESTART_DELAY.as_secs() > 2 * BLOCK_DOWNLOAD_TIMEOUT.as_secs(), - "Sync restart should allow for pending and buffered requests to complete" - ); - - // This constraint avoids spurious failures due to block retries timing out. - // We multiply by 2, because the Hedge can wait up to BLOCK_DOWNLOAD_TIMEOUT - // seconds before retrying. - const BLOCK_DOWNLOAD_HEDGE_TIMEOUT: u64 = - 2 * BLOCK_DOWNLOAD_RETRY_LIMIT as u64 * BLOCK_DOWNLOAD_TIMEOUT.as_secs(); - assert!( - SYNC_RESTART_DELAY.as_secs() > BLOCK_DOWNLOAD_HEDGE_TIMEOUT, - "Sync restart should allow for block downloads to time out on every retry" - ); - - // This constraint avoids spurious failures due to block download timeouts - assert!( - BLOCK_VERIFY_TIMEOUT.as_secs() - > SYNC_RESTART_DELAY.as_secs() - + BLOCK_DOWNLOAD_HEDGE_TIMEOUT - + BLOCK_DOWNLOAD_TIMEOUT.as_secs(), - "Block verify should allow for a block timeout, a sync restart, and some block fetches" - ); - - // The minimum recommended network speed for Zebra, in bytes per second. - const MIN_NETWORK_SPEED_BYTES_PER_SEC: u64 = 10 * 1024 * 1024 / 8; - - // This constraint avoids spurious failures when restarting large checkpoints - assert!( - BLOCK_VERIFY_TIMEOUT.as_secs() > SYNC_RESTART_DELAY.as_secs() + 2 * zebra_consensus::MAX_CHECKPOINT_BYTE_COUNT / MIN_NETWORK_SPEED_BYTES_PER_SEC, - "Block verify should allow for a full checkpoint download, a sync restart, then a full checkpoint re-download" - ); - - // This constraint avoids spurious failures after checkpointing has finished - assert!( - BLOCK_VERIFY_TIMEOUT.as_secs() - > 2 * NetworkUpgrade::Blossom.target_spacing().num_seconds() as u64, - "Block verify should allow for at least one new block to be generated and distributed" - ); - } -} diff --git a/zebrad/src/components/sync/tests.rs b/zebrad/src/components/sync/tests.rs new file mode 100644 index 000000000..bc99ae6fc --- /dev/null +++ b/zebrad/src/components/sync/tests.rs @@ -0,0 +1 @@ +mod timing; diff --git a/zebrad/src/components/sync/tests/timing.rs b/zebrad/src/components/sync/tests/timing.rs new file mode 100644 index 000000000..eafd57e25 --- /dev/null +++ b/zebrad/src/components/sync/tests/timing.rs @@ -0,0 +1,148 @@ +use futures::future; +use std::sync::{ + atomic::{AtomicU8, Ordering}, + Arc, +}; +use tokio::{ + runtime::Runtime, + time::{timeout, Duration}, +}; + +use super::super::*; +use crate::config::ZebradConfig; + +/// Make sure the timeout values are consistent with each other. +#[test] +fn ensure_timeouts_consistent() { + zebra_test::init(); + + // This constraint clears the download pipeline during a restart + assert!( + SYNC_RESTART_DELAY.as_secs() > 2 * BLOCK_DOWNLOAD_TIMEOUT.as_secs(), + "Sync restart should allow for pending and buffered requests to complete" + ); + + // This constraint avoids spurious failures due to block retries timing out. + // We multiply by 2, because the Hedge can wait up to BLOCK_DOWNLOAD_TIMEOUT + // seconds before retrying. + const BLOCK_DOWNLOAD_HEDGE_TIMEOUT: u64 = + 2 * BLOCK_DOWNLOAD_RETRY_LIMIT as u64 * BLOCK_DOWNLOAD_TIMEOUT.as_secs(); + assert!( + SYNC_RESTART_DELAY.as_secs() > BLOCK_DOWNLOAD_HEDGE_TIMEOUT, + "Sync restart should allow for block downloads to time out on every retry" + ); + + // This constraint avoids spurious failures due to block download timeouts + assert!( + BLOCK_VERIFY_TIMEOUT.as_secs() + > SYNC_RESTART_DELAY.as_secs() + + BLOCK_DOWNLOAD_HEDGE_TIMEOUT + + BLOCK_DOWNLOAD_TIMEOUT.as_secs(), + "Block verify should allow for a block timeout, a sync restart, and some block fetches" + ); + + // The minimum recommended network speed for Zebra, in bytes per second. + const MIN_NETWORK_SPEED_BYTES_PER_SEC: u64 = 10 * 1024 * 1024 / 8; + + // This constraint avoids spurious failures when restarting large checkpoints + assert!( + BLOCK_VERIFY_TIMEOUT.as_secs() > SYNC_RESTART_DELAY.as_secs() + 2 * zebra_consensus::MAX_CHECKPOINT_BYTE_COUNT / MIN_NETWORK_SPEED_BYTES_PER_SEC, + "Block verify should allow for a full checkpoint download, a sync restart, then a full checkpoint re-download" + ); + + // This constraint avoids spurious failures after checkpointing has finished + assert!( + BLOCK_VERIFY_TIMEOUT.as_secs() + > 2 * zebra_chain::parameters::NetworkUpgrade::Blossom + .target_spacing() + .num_seconds() as u64, + "Block verify should allow for at least one new block to be generated and distributed" + ); + + // This constraint makes genesis retries more likely to succeed + assert!( + GENESIS_TIMEOUT_RETRY.as_secs() > zebra_network::constants::HANDSHAKE_TIMEOUT.as_secs() + && GENESIS_TIMEOUT_RETRY.as_secs() < BLOCK_DOWNLOAD_TIMEOUT.as_secs(), + "Genesis retries should wait for new peers, but they shouldn't wait too long" + ); +} + +/// Test that calls to [`ChainSync::request_genesis`] are rate limited. +#[test] +fn request_genesis_is_rate_limited() { + zebra_test::init(); + + // The number of calls to `request_genesis()` we are going to be testing for + const RETRIES_TO_RUN: u8 = 3; + + // create some counters that will be updated inside async blocks + let peer_requests_counter = Arc::new(AtomicU8::new(0)); + let peer_requests_counter_in_service = Arc::clone(&peer_requests_counter); + let state_requests_counter = Arc::new(AtomicU8::new(0)); + let state_requests_counter_in_service = Arc::clone(&state_requests_counter); + + let runtime = Runtime::new().expect("Failed to create Tokio runtime"); + let _guard = runtime.enter(); + + // create a fake peer service that respond with `Error` to `BlocksByHash` or + // panic in any other type of request. + let peer_service = tower::service_fn(move |request| { + match request { + zebra_network::Request::BlocksByHash(_) => { + // Track the call + peer_requests_counter_in_service.fetch_add(1, Ordering::SeqCst); + // Respond with `Error` + future::err("block not found".into()) + } + _ => unreachable!("no other request is allowed"), + } + }); + + // create a state service that respond with `None` to `Depth` or + // panic in any other type of request. + let state_service = tower::service_fn(move |request| { + match request { + zebra_state::Request::Depth(_) => { + // Track the call + state_requests_counter_in_service.fetch_add(1, Ordering::SeqCst); + // Respond with `None` + future::ok(zebra_state::Response::Depth(None)) + } + _ => unreachable!("no other request is allowed"), + } + }); + + // create a verifier service that will always panic as it will never be called + let verifier_service = + tower::service_fn( + move |_| async move { unreachable!("no request to this service is allowed") }, + ); + + // start the sync + let mut chain_sync = ChainSync::new( + &ZebradConfig::default(), + peer_service, + state_service, + verifier_service, + ); + + // run `request_genesis()` with a timeout of 13 seconds + runtime.block_on(async move { + // allow extra wall clock time for tests on CPU-bound machines + let retries_timeout = (RETRIES_TO_RUN - 1) as u64 * GENESIS_TIMEOUT_RETRY.as_secs() + + GENESIS_TIMEOUT_RETRY.as_secs() / 2; + let _ = timeout( + Duration::from_secs(retries_timeout), + chain_sync.request_genesis(), + ) + .await; + }); + + let peer_requests_counter = peer_requests_counter.load(Ordering::SeqCst); + assert!(peer_requests_counter >= RETRIES_TO_RUN); + assert!(peer_requests_counter <= RETRIES_TO_RUN * (BLOCK_DOWNLOAD_RETRY_LIMIT as u8) * 2); + assert_eq!( + state_requests_counter.load(Ordering::SeqCst), + RETRIES_TO_RUN + ); +}