Security: only apply the outbound connection rate-limit to actual connections (#2278)
* Only advance the outbound connection timer when it returns an address Previously, we were advancing the timer even when we returned `None`. This created large wait times when there were no eligible peers. * Refactor to avoid overlapping sleep timers * Add a maximum next peer delay test Also refactor peer numbers into constants. * Make the number of proptests overridable by the standard env var Also cleanup the test constants. * Test that skipping peer connections also skips their rate limits * Allow an extra second after each sleep on loaded machines macOS VMs seem to need this extra time to pass their tests. * Restart test time bounds from the current time This change avoids test failures due to cumulative errors. Also use a single call to `Instant::now` for each test round. And print the times when the tests fail. * Stop generating invalid outbound peers in proptests The candidate set proptests will fail if enough generated peers are invalid for outbound connections.
This commit is contained in:
parent
71b41f0206
commit
86f23f7960
|
@ -0,0 +1,8 @@
|
|||
# Seeds for failure cases proptest has generated in the past. It is
|
||||
# automatically read and these particular cases re-run before any
|
||||
# novel cases are generated.
|
||||
#
|
||||
# It is recommended to check this file in to source control so that
|
||||
# everyone who runs the test benefits from these saved cases.
|
||||
cc e27ea62986a1ace4e16dde4b99d8ae8557a6b1e92e3f9a7c6aba0d7b179eb817 # shrinks to peers = [MetaAddr { addr: 0.0.0.0:0, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:0, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:1151, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 129.65.108.101:9019, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 95.68.199.21:46887, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 167.206.253.131:43927, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 160.138.155.239:63430, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: [6a4e:a543:8c93:e074:8a2a:5cc4:96f8:ce95]:51071, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }], initial_candidates = 0, extra_candidates = 2
|
||||
cc 788fe2c47a4af559e9a921764d1ce8fcfd0743361c19e6e7a06adc093bc19078 # shrinks to peers = [MetaAddr { addr: [::e9:8050:dbf7:5665]:27188, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 106.27.232.177:51742, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 127.0.0.1:30827, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:21948, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:38186, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: [4db2:96e6:38c5:fd62:e964:9338:4ce8:bfa0]:32686, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:22952, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 114.164.73.233:10203, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }], initial_candidates = 3, extra_candidates = 3
|
|
@ -5,6 +5,8 @@ use super::{MetaAddr, PeerAddrState, PeerServices};
|
|||
use zebra_chain::serialization::{arbitrary::canonical_socket_addr, DateTime32};
|
||||
|
||||
impl MetaAddr {
|
||||
/// Create a strategy that generates [`MetaAddr`]s in the
|
||||
/// [`PeerAddrState::NeverAttemptedGossiped`] state.
|
||||
pub fn gossiped_strategy() -> BoxedStrategy<Self> {
|
||||
(
|
||||
canonical_socket_addr(),
|
||||
|
@ -17,9 +19,31 @@ impl MetaAddr {
|
|||
.boxed()
|
||||
}
|
||||
|
||||
pub fn alternate_node_strategy() -> BoxedStrategy<Self> {
|
||||
/// Create a strategy that generates [`MetaAddr`]s in the
|
||||
/// [`PeerAddrState::NeverAttemptedAlternate`] state.
|
||||
pub fn alternate_strategy() -> BoxedStrategy<Self> {
|
||||
(canonical_socket_addr(), any::<PeerServices>())
|
||||
.prop_map(|(socket_addr, services)| MetaAddr::new_alternate(&socket_addr, &services))
|
||||
.boxed()
|
||||
}
|
||||
|
||||
/// Create a strategy that generates [`MetaAddr`]s which are ready for outbound connections.
|
||||
///
|
||||
/// TODO: Generate all [`PeerAddrState`] variants, and give them ready fields.
|
||||
/// (After PR #2276 merges.)
|
||||
pub fn ready_outbound_strategy() -> BoxedStrategy<Self> {
|
||||
canonical_socket_addr()
|
||||
.prop_map(|address| MetaAddr::new_alternate(&address, &PeerServices::NODE_NETWORK))
|
||||
.prop_filter_map("failed MetaAddr::is_valid_for_outbound", |address| {
|
||||
// Alternate nodes use the current time, so they're always ready
|
||||
//
|
||||
// TODO: create a "Zebra supported services" constant
|
||||
let meta_addr = MetaAddr::new_alternate(&address, &PeerServices::NODE_NETWORK);
|
||||
if meta_addr.is_valid_for_outbound() {
|
||||
Some(meta_addr)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.boxed()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::{cmp::min, mem, sync::Arc, time::Duration};
|
||||
|
||||
use futures::stream::{FuturesUnordered, StreamExt};
|
||||
use tokio::time::{sleep, sleep_until, timeout, Instant, Sleep};
|
||||
use tokio::time::{sleep, timeout, Instant, Sleep};
|
||||
use tower::{Service, ServiceExt};
|
||||
|
||||
use zebra_chain::serialization::DateTime32;
|
||||
|
@ -290,10 +290,6 @@ where
|
|||
/// new peer connections are initiated at least
|
||||
/// [`MIN_PEER_CONNECTION_INTERVAL`][constants::MIN_PEER_CONNECTION_INTERVAL] apart.
|
||||
pub async fn next(&mut self) -> Option<MetaAddr> {
|
||||
let current_deadline = self.wait_next_handshake.deadline().max(Instant::now());
|
||||
let mut sleep = sleep_until(current_deadline + constants::MIN_PEER_CONNECTION_INTERVAL);
|
||||
mem::swap(&mut self.wait_next_handshake, &mut sleep);
|
||||
|
||||
// # Correctness
|
||||
//
|
||||
// In this critical section, we hold the address mutex, blocking the
|
||||
|
@ -316,8 +312,10 @@ where
|
|||
reconnect
|
||||
};
|
||||
|
||||
// SECURITY: rate-limit new candidate connections
|
||||
sleep.await;
|
||||
// SECURITY: rate-limit new outbound peer connections
|
||||
(&mut self.wait_next_handshake).await;
|
||||
let mut sleep = sleep(constants::MIN_PEER_CONNECTION_INTERVAL);
|
||||
mem::swap(&mut self.wait_next_handshake, &mut sleep);
|
||||
|
||||
Some(reconnect)
|
||||
}
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
use std::{
|
||||
env,
|
||||
sync::{Arc, Mutex},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use futures::FutureExt;
|
||||
use proptest::{collection::vec, prelude::*};
|
||||
use tokio::{
|
||||
runtime::Runtime,
|
||||
|
@ -18,11 +20,25 @@ use crate::{
|
|||
Request, Response,
|
||||
};
|
||||
|
||||
/// The maximum number of candidates for a "next peer" test.
|
||||
const MAX_TEST_CANDIDATES: u32 = 4;
|
||||
|
||||
/// The number of random test addresses for each test.
|
||||
const TEST_ADDRESSES: usize = 2 * MAX_TEST_CANDIDATES as usize;
|
||||
|
||||
/// The default number of proptest cases for each test that includes sleeps.
|
||||
const DEFAULT_SLEEP_TEST_PROPTEST_CASES: u32 = 16;
|
||||
|
||||
/// The largest extra delay we allow after every test sleep.
|
||||
///
|
||||
/// This makes the tests more reliable on machines with high CPU load.
|
||||
const MAX_SLEEP_EXTRA_DELAY: Duration = Duration::from_secs(1);
|
||||
|
||||
proptest! {
|
||||
/// Test that validated gossiped peers never have a `last_seen` time that's in the future.
|
||||
#[test]
|
||||
fn no_last_seen_times_are_in_the_future(
|
||||
gossiped_peers in vec(MetaAddr::gossiped_strategy(), 1..10),
|
||||
gossiped_peers in vec(MetaAddr::gossiped_strategy(), 1..TEST_ADDRESSES),
|
||||
last_seen_limit in any::<DateTime32>(),
|
||||
) {
|
||||
zebra_test::init();
|
||||
|
@ -33,17 +49,50 @@ proptest! {
|
|||
prop_assert![peer.get_last_seen() <= last_seen_limit];
|
||||
}
|
||||
}
|
||||
|
||||
/// Test that the outbound peer connection rate limit is only applied when
|
||||
/// a peer address is actually returned.
|
||||
///
|
||||
/// TODO: after PR #2275 merges, add a similar proptest,
|
||||
/// using a "not ready for attempt" peer generation strategy
|
||||
#[test]
|
||||
fn skipping_outbound_peer_connection_skips_rate_limit(next_peer_attempts in 0..TEST_ADDRESSES) {
|
||||
zebra_test::init();
|
||||
|
||||
let runtime = Runtime::new().expect("Failed to create Tokio runtime");
|
||||
let _guard = runtime.enter();
|
||||
|
||||
let peer_service = tower::service_fn(|_| async {
|
||||
unreachable!("Mock peer service is never used");
|
||||
});
|
||||
|
||||
// Since the address book is empty, there won't be any available peers
|
||||
let address_book = AddressBook::new(&Config::default(), Span::none());
|
||||
|
||||
let mut candidate_set = CandidateSet::new(Arc::new(Mutex::new(address_book)), peer_service);
|
||||
|
||||
// Make sure that the rate-limit is never triggered, even after multiple calls
|
||||
for _ in 0..next_peer_attempts {
|
||||
// An empty address book immediately returns "no next peer"
|
||||
assert!(matches!(candidate_set.next().now_or_never(), Some(None)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#![proptest_config(ProptestConfig::with_cases(16))]
|
||||
// These tests contain sleeps, so we use a small number of cases by default.
|
||||
// Set the PROPTEST_CASES env var to override this default.
|
||||
#![proptest_config(proptest::test_runner::Config::with_cases(env::var("PROPTEST_CASES")
|
||||
.ok()
|
||||
.and_then(|v| v.parse().ok())
|
||||
.unwrap_or(DEFAULT_SLEEP_TEST_PROPTEST_CASES)))]
|
||||
|
||||
/// Test that new outbound peer connections are rate-limited.
|
||||
#[test]
|
||||
fn new_outbound_peer_connections_are_rate_limited(
|
||||
peers in vec(MetaAddr::alternate_node_strategy(), 10),
|
||||
initial_candidates in 0..4usize,
|
||||
extra_candidates in 0..4usize,
|
||||
peers in vec(MetaAddr::ready_outbound_strategy(), TEST_ADDRESSES),
|
||||
initial_candidates in 0..MAX_TEST_CANDIDATES,
|
||||
extra_candidates in 0..MAX_TEST_CANDIDATES,
|
||||
) {
|
||||
zebra_test::init();
|
||||
|
||||
|
@ -63,13 +112,18 @@ proptest! {
|
|||
// Check rate limiting for initial peers
|
||||
check_candidates_rate_limiting(&mut candidate_set, initial_candidates).await;
|
||||
// Sleep more than the rate limiting delay
|
||||
sleep(Duration::from_millis(400)).await;
|
||||
sleep(MAX_TEST_CANDIDATES * MIN_PEER_CONNECTION_INTERVAL).await;
|
||||
// Check that the next peers are still respecting the rate limiting, without causing a
|
||||
// burst of reconnections
|
||||
check_candidates_rate_limiting(&mut candidate_set, extra_candidates).await;
|
||||
};
|
||||
|
||||
assert!(runtime.block_on(timeout(Duration::from_secs(10), checks)).is_ok());
|
||||
// Allow enough time for the maximum number of candidates,
|
||||
// plus some extra time for test machines with high CPU load.
|
||||
// (The max delay asserts usually fail before hitting this timeout.)
|
||||
let max_rate_limit_sleep = 3 * MAX_TEST_CANDIDATES * MIN_PEER_CONNECTION_INTERVAL;
|
||||
let max_extra_delay = (2 * MAX_TEST_CANDIDATES + 1) * MAX_SLEEP_EXTRA_DELAY;
|
||||
assert!(runtime.block_on(timeout(max_rate_limit_sleep + max_extra_delay, checks)).is_ok());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -77,19 +131,41 @@ proptest! {
|
|||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Will panic if a reconnection peer is returned too quickly, or if no reconnection peer is
|
||||
/// returned.
|
||||
async fn check_candidates_rate_limiting<S>(candidate_set: &mut CandidateSet<S>, candidates: usize)
|
||||
/// Will panic if:
|
||||
/// - a connection peer is returned too quickly,
|
||||
/// - a connection peer is returned too slowly, or
|
||||
/// - if no reconnection peer is returned at all.
|
||||
async fn check_candidates_rate_limiting<S>(candidate_set: &mut CandidateSet<S>, candidates: u32)
|
||||
where
|
||||
S: tower::Service<Request, Response = Response, Error = BoxError>,
|
||||
S::Future: Send + 'static,
|
||||
{
|
||||
let mut minimum_reconnect_instant = Instant::now();
|
||||
let mut now = Instant::now();
|
||||
let mut minimum_reconnect_instant = now;
|
||||
// Allow extra time for test machines with high CPU load
|
||||
let mut maximum_reconnect_instant = now + MAX_SLEEP_EXTRA_DELAY;
|
||||
|
||||
for _ in 0..candidates {
|
||||
assert!(candidate_set.next().await.is_some());
|
||||
assert!(Instant::now() >= minimum_reconnect_instant);
|
||||
assert!(
|
||||
candidate_set.next().await.is_some(),
|
||||
"there are enough available candidates"
|
||||
);
|
||||
|
||||
minimum_reconnect_instant += MIN_PEER_CONNECTION_INTERVAL;
|
||||
now = Instant::now();
|
||||
assert!(
|
||||
now >= minimum_reconnect_instant,
|
||||
"all candidates should obey the minimum rate-limit: now: {:?} min: {:?}",
|
||||
now,
|
||||
minimum_reconnect_instant,
|
||||
);
|
||||
assert!(
|
||||
now <= maximum_reconnect_instant,
|
||||
"rate-limited candidates should not be delayed too long: now: {:?} max: {:?}. Hint: is the test machine overloaded?",
|
||||
now,
|
||||
maximum_reconnect_instant,
|
||||
);
|
||||
|
||||
minimum_reconnect_instant = now + MIN_PEER_CONNECTION_INTERVAL;
|
||||
maximum_reconnect_instant = now + MIN_PEER_CONNECTION_INTERVAL + MAX_SLEEP_EXTRA_DELAY;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue