Security: only apply the outbound connection rate-limit to actual connections (#2278)
* Only advance the outbound connection timer when it returns an address Previously, we were advancing the timer even when we returned `None`. This created large wait times when there were no eligible peers. * Refactor to avoid overlapping sleep timers * Add a maximum next peer delay test Also refactor peer numbers into constants. * Make the number of proptests overridable by the standard env var Also cleanup the test constants. * Test that skipping peer connections also skips their rate limits * Allow an extra second after each sleep on loaded machines macOS VMs seem to need this extra time to pass their tests. * Restart test time bounds from the current time This change avoids test failures due to cumulative errors. Also use a single call to `Instant::now` for each test round. And print the times when the tests fail. * Stop generating invalid outbound peers in proptests The candidate set proptests will fail if enough generated peers are invalid for outbound connections.
This commit is contained in:
parent
71b41f0206
commit
86f23f7960
|
@ -0,0 +1,8 @@
|
||||||
|
# Seeds for failure cases proptest has generated in the past. It is
|
||||||
|
# automatically read and these particular cases re-run before any
|
||||||
|
# novel cases are generated.
|
||||||
|
#
|
||||||
|
# It is recommended to check this file in to source control so that
|
||||||
|
# everyone who runs the test benefits from these saved cases.
|
||||||
|
cc e27ea62986a1ace4e16dde4b99d8ae8557a6b1e92e3f9a7c6aba0d7b179eb817 # shrinks to peers = [MetaAddr { addr: 0.0.0.0:0, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:0, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:1151, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 129.65.108.101:9019, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 95.68.199.21:46887, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 167.206.253.131:43927, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 160.138.155.239:63430, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: [6a4e:a543:8c93:e074:8a2a:5cc4:96f8:ce95]:51071, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }], initial_candidates = 0, extra_candidates = 2
|
||||||
|
cc 788fe2c47a4af559e9a921764d1ce8fcfd0743361c19e6e7a06adc093bc19078 # shrinks to peers = [MetaAddr { addr: [::e9:8050:dbf7:5665]:27188, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 106.27.232.177:51742, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 127.0.0.1:30827, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:21948, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:38186, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: [4db2:96e6:38c5:fd62:e964:9338:4ce8:bfa0]:32686, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:22952, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 114.164.73.233:10203, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }], initial_candidates = 3, extra_candidates = 3
|
|
@ -5,6 +5,8 @@ use super::{MetaAddr, PeerAddrState, PeerServices};
|
||||||
use zebra_chain::serialization::{arbitrary::canonical_socket_addr, DateTime32};
|
use zebra_chain::serialization::{arbitrary::canonical_socket_addr, DateTime32};
|
||||||
|
|
||||||
impl MetaAddr {
|
impl MetaAddr {
|
||||||
|
/// Create a strategy that generates [`MetaAddr`]s in the
|
||||||
|
/// [`PeerAddrState::NeverAttemptedGossiped`] state.
|
||||||
pub fn gossiped_strategy() -> BoxedStrategy<Self> {
|
pub fn gossiped_strategy() -> BoxedStrategy<Self> {
|
||||||
(
|
(
|
||||||
canonical_socket_addr(),
|
canonical_socket_addr(),
|
||||||
|
@ -17,9 +19,31 @@ impl MetaAddr {
|
||||||
.boxed()
|
.boxed()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn alternate_node_strategy() -> BoxedStrategy<Self> {
|
/// Create a strategy that generates [`MetaAddr`]s in the
|
||||||
|
/// [`PeerAddrState::NeverAttemptedAlternate`] state.
|
||||||
|
pub fn alternate_strategy() -> BoxedStrategy<Self> {
|
||||||
|
(canonical_socket_addr(), any::<PeerServices>())
|
||||||
|
.prop_map(|(socket_addr, services)| MetaAddr::new_alternate(&socket_addr, &services))
|
||||||
|
.boxed()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a strategy that generates [`MetaAddr`]s which are ready for outbound connections.
|
||||||
|
///
|
||||||
|
/// TODO: Generate all [`PeerAddrState`] variants, and give them ready fields.
|
||||||
|
/// (After PR #2276 merges.)
|
||||||
|
pub fn ready_outbound_strategy() -> BoxedStrategy<Self> {
|
||||||
canonical_socket_addr()
|
canonical_socket_addr()
|
||||||
.prop_map(|address| MetaAddr::new_alternate(&address, &PeerServices::NODE_NETWORK))
|
.prop_filter_map("failed MetaAddr::is_valid_for_outbound", |address| {
|
||||||
|
// Alternate nodes use the current time, so they're always ready
|
||||||
|
//
|
||||||
|
// TODO: create a "Zebra supported services" constant
|
||||||
|
let meta_addr = MetaAddr::new_alternate(&address, &PeerServices::NODE_NETWORK);
|
||||||
|
if meta_addr.is_valid_for_outbound() {
|
||||||
|
Some(meta_addr)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
.boxed()
|
.boxed()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use std::{cmp::min, mem, sync::Arc, time::Duration};
|
use std::{cmp::min, mem, sync::Arc, time::Duration};
|
||||||
|
|
||||||
use futures::stream::{FuturesUnordered, StreamExt};
|
use futures::stream::{FuturesUnordered, StreamExt};
|
||||||
use tokio::time::{sleep, sleep_until, timeout, Instant, Sleep};
|
use tokio::time::{sleep, timeout, Instant, Sleep};
|
||||||
use tower::{Service, ServiceExt};
|
use tower::{Service, ServiceExt};
|
||||||
|
|
||||||
use zebra_chain::serialization::DateTime32;
|
use zebra_chain::serialization::DateTime32;
|
||||||
|
@ -290,10 +290,6 @@ where
|
||||||
/// new peer connections are initiated at least
|
/// new peer connections are initiated at least
|
||||||
/// [`MIN_PEER_CONNECTION_INTERVAL`][constants::MIN_PEER_CONNECTION_INTERVAL] apart.
|
/// [`MIN_PEER_CONNECTION_INTERVAL`][constants::MIN_PEER_CONNECTION_INTERVAL] apart.
|
||||||
pub async fn next(&mut self) -> Option<MetaAddr> {
|
pub async fn next(&mut self) -> Option<MetaAddr> {
|
||||||
let current_deadline = self.wait_next_handshake.deadline().max(Instant::now());
|
|
||||||
let mut sleep = sleep_until(current_deadline + constants::MIN_PEER_CONNECTION_INTERVAL);
|
|
||||||
mem::swap(&mut self.wait_next_handshake, &mut sleep);
|
|
||||||
|
|
||||||
// # Correctness
|
// # Correctness
|
||||||
//
|
//
|
||||||
// In this critical section, we hold the address mutex, blocking the
|
// In this critical section, we hold the address mutex, blocking the
|
||||||
|
@ -316,8 +312,10 @@ where
|
||||||
reconnect
|
reconnect
|
||||||
};
|
};
|
||||||
|
|
||||||
// SECURITY: rate-limit new candidate connections
|
// SECURITY: rate-limit new outbound peer connections
|
||||||
sleep.await;
|
(&mut self.wait_next_handshake).await;
|
||||||
|
let mut sleep = sleep(constants::MIN_PEER_CONNECTION_INTERVAL);
|
||||||
|
mem::swap(&mut self.wait_next_handshake, &mut sleep);
|
||||||
|
|
||||||
Some(reconnect)
|
Some(reconnect)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
use std::{
|
use std::{
|
||||||
|
env,
|
||||||
sync::{Arc, Mutex},
|
sync::{Arc, Mutex},
|
||||||
time::{Duration, Instant},
|
time::{Duration, Instant},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use futures::FutureExt;
|
||||||
use proptest::{collection::vec, prelude::*};
|
use proptest::{collection::vec, prelude::*};
|
||||||
use tokio::{
|
use tokio::{
|
||||||
runtime::Runtime,
|
runtime::Runtime,
|
||||||
|
@ -18,11 +20,25 @@ use crate::{
|
||||||
Request, Response,
|
Request, Response,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// The maximum number of candidates for a "next peer" test.
|
||||||
|
const MAX_TEST_CANDIDATES: u32 = 4;
|
||||||
|
|
||||||
|
/// The number of random test addresses for each test.
|
||||||
|
const TEST_ADDRESSES: usize = 2 * MAX_TEST_CANDIDATES as usize;
|
||||||
|
|
||||||
|
/// The default number of proptest cases for each test that includes sleeps.
|
||||||
|
const DEFAULT_SLEEP_TEST_PROPTEST_CASES: u32 = 16;
|
||||||
|
|
||||||
|
/// The largest extra delay we allow after every test sleep.
|
||||||
|
///
|
||||||
|
/// This makes the tests more reliable on machines with high CPU load.
|
||||||
|
const MAX_SLEEP_EXTRA_DELAY: Duration = Duration::from_secs(1);
|
||||||
|
|
||||||
proptest! {
|
proptest! {
|
||||||
/// Test that validated gossiped peers never have a `last_seen` time that's in the future.
|
/// Test that validated gossiped peers never have a `last_seen` time that's in the future.
|
||||||
#[test]
|
#[test]
|
||||||
fn no_last_seen_times_are_in_the_future(
|
fn no_last_seen_times_are_in_the_future(
|
||||||
gossiped_peers in vec(MetaAddr::gossiped_strategy(), 1..10),
|
gossiped_peers in vec(MetaAddr::gossiped_strategy(), 1..TEST_ADDRESSES),
|
||||||
last_seen_limit in any::<DateTime32>(),
|
last_seen_limit in any::<DateTime32>(),
|
||||||
) {
|
) {
|
||||||
zebra_test::init();
|
zebra_test::init();
|
||||||
|
@ -33,17 +49,50 @@ proptest! {
|
||||||
prop_assert![peer.get_last_seen() <= last_seen_limit];
|
prop_assert![peer.get_last_seen() <= last_seen_limit];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Test that the outbound peer connection rate limit is only applied when
|
||||||
|
/// a peer address is actually returned.
|
||||||
|
///
|
||||||
|
/// TODO: after PR #2275 merges, add a similar proptest,
|
||||||
|
/// using a "not ready for attempt" peer generation strategy
|
||||||
|
#[test]
|
||||||
|
fn skipping_outbound_peer_connection_skips_rate_limit(next_peer_attempts in 0..TEST_ADDRESSES) {
|
||||||
|
zebra_test::init();
|
||||||
|
|
||||||
|
let runtime = Runtime::new().expect("Failed to create Tokio runtime");
|
||||||
|
let _guard = runtime.enter();
|
||||||
|
|
||||||
|
let peer_service = tower::service_fn(|_| async {
|
||||||
|
unreachable!("Mock peer service is never used");
|
||||||
|
});
|
||||||
|
|
||||||
|
// Since the address book is empty, there won't be any available peers
|
||||||
|
let address_book = AddressBook::new(&Config::default(), Span::none());
|
||||||
|
|
||||||
|
let mut candidate_set = CandidateSet::new(Arc::new(Mutex::new(address_book)), peer_service);
|
||||||
|
|
||||||
|
// Make sure that the rate-limit is never triggered, even after multiple calls
|
||||||
|
for _ in 0..next_peer_attempts {
|
||||||
|
// An empty address book immediately returns "no next peer"
|
||||||
|
assert!(matches!(candidate_set.next().now_or_never(), Some(None)));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
proptest! {
|
proptest! {
|
||||||
#![proptest_config(ProptestConfig::with_cases(16))]
|
// These tests contain sleeps, so we use a small number of cases by default.
|
||||||
|
// Set the PROPTEST_CASES env var to override this default.
|
||||||
|
#![proptest_config(proptest::test_runner::Config::with_cases(env::var("PROPTEST_CASES")
|
||||||
|
.ok()
|
||||||
|
.and_then(|v| v.parse().ok())
|
||||||
|
.unwrap_or(DEFAULT_SLEEP_TEST_PROPTEST_CASES)))]
|
||||||
|
|
||||||
/// Test that new outbound peer connections are rate-limited.
|
/// Test that new outbound peer connections are rate-limited.
|
||||||
#[test]
|
#[test]
|
||||||
fn new_outbound_peer_connections_are_rate_limited(
|
fn new_outbound_peer_connections_are_rate_limited(
|
||||||
peers in vec(MetaAddr::alternate_node_strategy(), 10),
|
peers in vec(MetaAddr::ready_outbound_strategy(), TEST_ADDRESSES),
|
||||||
initial_candidates in 0..4usize,
|
initial_candidates in 0..MAX_TEST_CANDIDATES,
|
||||||
extra_candidates in 0..4usize,
|
extra_candidates in 0..MAX_TEST_CANDIDATES,
|
||||||
) {
|
) {
|
||||||
zebra_test::init();
|
zebra_test::init();
|
||||||
|
|
||||||
|
@ -63,13 +112,18 @@ proptest! {
|
||||||
// Check rate limiting for initial peers
|
// Check rate limiting for initial peers
|
||||||
check_candidates_rate_limiting(&mut candidate_set, initial_candidates).await;
|
check_candidates_rate_limiting(&mut candidate_set, initial_candidates).await;
|
||||||
// Sleep more than the rate limiting delay
|
// Sleep more than the rate limiting delay
|
||||||
sleep(Duration::from_millis(400)).await;
|
sleep(MAX_TEST_CANDIDATES * MIN_PEER_CONNECTION_INTERVAL).await;
|
||||||
// Check that the next peers are still respecting the rate limiting, without causing a
|
// Check that the next peers are still respecting the rate limiting, without causing a
|
||||||
// burst of reconnections
|
// burst of reconnections
|
||||||
check_candidates_rate_limiting(&mut candidate_set, extra_candidates).await;
|
check_candidates_rate_limiting(&mut candidate_set, extra_candidates).await;
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(runtime.block_on(timeout(Duration::from_secs(10), checks)).is_ok());
|
// Allow enough time for the maximum number of candidates,
|
||||||
|
// plus some extra time for test machines with high CPU load.
|
||||||
|
// (The max delay asserts usually fail before hitting this timeout.)
|
||||||
|
let max_rate_limit_sleep = 3 * MAX_TEST_CANDIDATES * MIN_PEER_CONNECTION_INTERVAL;
|
||||||
|
let max_extra_delay = (2 * MAX_TEST_CANDIDATES + 1) * MAX_SLEEP_EXTRA_DELAY;
|
||||||
|
assert!(runtime.block_on(timeout(max_rate_limit_sleep + max_extra_delay, checks)).is_ok());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -77,19 +131,41 @@ proptest! {
|
||||||
///
|
///
|
||||||
/// # Panics
|
/// # Panics
|
||||||
///
|
///
|
||||||
/// Will panic if a reconnection peer is returned too quickly, or if no reconnection peer is
|
/// Will panic if:
|
||||||
/// returned.
|
/// - a connection peer is returned too quickly,
|
||||||
async fn check_candidates_rate_limiting<S>(candidate_set: &mut CandidateSet<S>, candidates: usize)
|
/// - a connection peer is returned too slowly, or
|
||||||
|
/// - if no reconnection peer is returned at all.
|
||||||
|
async fn check_candidates_rate_limiting<S>(candidate_set: &mut CandidateSet<S>, candidates: u32)
|
||||||
where
|
where
|
||||||
S: tower::Service<Request, Response = Response, Error = BoxError>,
|
S: tower::Service<Request, Response = Response, Error = BoxError>,
|
||||||
S::Future: Send + 'static,
|
S::Future: Send + 'static,
|
||||||
{
|
{
|
||||||
let mut minimum_reconnect_instant = Instant::now();
|
let mut now = Instant::now();
|
||||||
|
let mut minimum_reconnect_instant = now;
|
||||||
|
// Allow extra time for test machines with high CPU load
|
||||||
|
let mut maximum_reconnect_instant = now + MAX_SLEEP_EXTRA_DELAY;
|
||||||
|
|
||||||
for _ in 0..candidates {
|
for _ in 0..candidates {
|
||||||
assert!(candidate_set.next().await.is_some());
|
assert!(
|
||||||
assert!(Instant::now() >= minimum_reconnect_instant);
|
candidate_set.next().await.is_some(),
|
||||||
|
"there are enough available candidates"
|
||||||
|
);
|
||||||
|
|
||||||
minimum_reconnect_instant += MIN_PEER_CONNECTION_INTERVAL;
|
now = Instant::now();
|
||||||
|
assert!(
|
||||||
|
now >= minimum_reconnect_instant,
|
||||||
|
"all candidates should obey the minimum rate-limit: now: {:?} min: {:?}",
|
||||||
|
now,
|
||||||
|
minimum_reconnect_instant,
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
now <= maximum_reconnect_instant,
|
||||||
|
"rate-limited candidates should not be delayed too long: now: {:?} max: {:?}. Hint: is the test machine overloaded?",
|
||||||
|
now,
|
||||||
|
maximum_reconnect_instant,
|
||||||
|
);
|
||||||
|
|
||||||
|
minimum_reconnect_instant = now + MIN_PEER_CONNECTION_INTERVAL;
|
||||||
|
maximum_reconnect_instant = now + MIN_PEER_CONNECTION_INTERVAL + MAX_SLEEP_EXTRA_DELAY;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue