Security: only apply the outbound connection rate-limit to actual connections (#2278)

* Only advance the outbound connection timer when it returns an address

Previously, we were advancing the timer even when we returned `None`.
This created large wait times when there were no eligible peers.

* Refactor to avoid overlapping sleep timers

* Add a maximum next peer delay test

Also refactor peer numbers into constants.

* Make the number of proptests overridable by the standard env var

Also cleanup the test constants.

* Test that skipping peer connections also skips their rate limits

* Allow an extra second after each sleep on loaded machines

macOS VMs seem to need this extra time to pass their tests.

* Restart test time bounds from the current time

This change avoids test failures due to cumulative errors.

Also use a single call to `Instant::now` for each test round.
And print the times when the tests fail.

* Stop generating invalid outbound peers in proptests

The candidate set proptests will fail if enough generated peers are
invalid for outbound connections.
This commit is contained in:
teor 2021-06-15 08:29:17 +10:00 committed by GitHub
parent 71b41f0206
commit 86f23f7960
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 129 additions and 23 deletions

View File

@ -0,0 +1,8 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc e27ea62986a1ace4e16dde4b99d8ae8557a6b1e92e3f9a7c6aba0d7b179eb817 # shrinks to peers = [MetaAddr { addr: 0.0.0.0:0, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:0, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:1151, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 129.65.108.101:9019, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 95.68.199.21:46887, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 167.206.253.131:43927, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 160.138.155.239:63430, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: [6a4e:a543:8c93:e074:8a2a:5cc4:96f8:ce95]:51071, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }], initial_candidates = 0, extra_candidates = 2
cc 788fe2c47a4af559e9a921764d1ce8fcfd0743361c19e6e7a06adc093bc19078 # shrinks to peers = [MetaAddr { addr: [::e9:8050:dbf7:5665]:27188, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 106.27.232.177:51742, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 127.0.0.1:30827, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:21948, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:38186, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: [4db2:96e6:38c5:fd62:e964:9338:4ce8:bfa0]:32686, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:22952, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 114.164.73.233:10203, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }], initial_candidates = 3, extra_candidates = 3

View File

@ -5,6 +5,8 @@ use super::{MetaAddr, PeerAddrState, PeerServices};
use zebra_chain::serialization::{arbitrary::canonical_socket_addr, DateTime32};
impl MetaAddr {
/// Create a strategy that generates [`MetaAddr`]s in the
/// [`PeerAddrState::NeverAttemptedGossiped`] state.
pub fn gossiped_strategy() -> BoxedStrategy<Self> {
(
canonical_socket_addr(),
@ -17,9 +19,31 @@ impl MetaAddr {
.boxed()
}
pub fn alternate_node_strategy() -> BoxedStrategy<Self> {
/// Create a strategy that generates [`MetaAddr`]s in the
/// [`PeerAddrState::NeverAttemptedAlternate`] state.
pub fn alternate_strategy() -> BoxedStrategy<Self> {
(canonical_socket_addr(), any::<PeerServices>())
.prop_map(|(socket_addr, services)| MetaAddr::new_alternate(&socket_addr, &services))
.boxed()
}
/// Create a strategy that generates [`MetaAddr`]s which are ready for outbound connections.
///
/// TODO: Generate all [`PeerAddrState`] variants, and give them ready fields.
/// (After PR #2276 merges.)
pub fn ready_outbound_strategy() -> BoxedStrategy<Self> {
canonical_socket_addr()
.prop_map(|address| MetaAddr::new_alternate(&address, &PeerServices::NODE_NETWORK))
.prop_filter_map("failed MetaAddr::is_valid_for_outbound", |address| {
// Alternate nodes use the current time, so they're always ready
//
// TODO: create a "Zebra supported services" constant
let meta_addr = MetaAddr::new_alternate(&address, &PeerServices::NODE_NETWORK);
if meta_addr.is_valid_for_outbound() {
Some(meta_addr)
} else {
None
}
})
.boxed()
}
}

View File

@ -1,7 +1,7 @@
use std::{cmp::min, mem, sync::Arc, time::Duration};
use futures::stream::{FuturesUnordered, StreamExt};
use tokio::time::{sleep, sleep_until, timeout, Instant, Sleep};
use tokio::time::{sleep, timeout, Instant, Sleep};
use tower::{Service, ServiceExt};
use zebra_chain::serialization::DateTime32;
@ -290,10 +290,6 @@ where
/// new peer connections are initiated at least
/// [`MIN_PEER_CONNECTION_INTERVAL`][constants::MIN_PEER_CONNECTION_INTERVAL] apart.
pub async fn next(&mut self) -> Option<MetaAddr> {
let current_deadline = self.wait_next_handshake.deadline().max(Instant::now());
let mut sleep = sleep_until(current_deadline + constants::MIN_PEER_CONNECTION_INTERVAL);
mem::swap(&mut self.wait_next_handshake, &mut sleep);
// # Correctness
//
// In this critical section, we hold the address mutex, blocking the
@ -316,8 +312,10 @@ where
reconnect
};
// SECURITY: rate-limit new candidate connections
sleep.await;
// SECURITY: rate-limit new outbound peer connections
(&mut self.wait_next_handshake).await;
let mut sleep = sleep(constants::MIN_PEER_CONNECTION_INTERVAL);
mem::swap(&mut self.wait_next_handshake, &mut sleep);
Some(reconnect)
}

View File

@ -1,8 +1,10 @@
use std::{
env,
sync::{Arc, Mutex},
time::{Duration, Instant},
};
use futures::FutureExt;
use proptest::{collection::vec, prelude::*};
use tokio::{
runtime::Runtime,
@ -18,11 +20,25 @@ use crate::{
Request, Response,
};
/// The maximum number of candidates for a "next peer" test.
const MAX_TEST_CANDIDATES: u32 = 4;
/// The number of random test addresses for each test.
const TEST_ADDRESSES: usize = 2 * MAX_TEST_CANDIDATES as usize;
/// The default number of proptest cases for each test that includes sleeps.
const DEFAULT_SLEEP_TEST_PROPTEST_CASES: u32 = 16;
/// The largest extra delay we allow after every test sleep.
///
/// This makes the tests more reliable on machines with high CPU load.
const MAX_SLEEP_EXTRA_DELAY: Duration = Duration::from_secs(1);
proptest! {
/// Test that validated gossiped peers never have a `last_seen` time that's in the future.
#[test]
fn no_last_seen_times_are_in_the_future(
gossiped_peers in vec(MetaAddr::gossiped_strategy(), 1..10),
gossiped_peers in vec(MetaAddr::gossiped_strategy(), 1..TEST_ADDRESSES),
last_seen_limit in any::<DateTime32>(),
) {
zebra_test::init();
@ -33,17 +49,50 @@ proptest! {
prop_assert![peer.get_last_seen() <= last_seen_limit];
}
}
/// Test that the outbound peer connection rate limit is only applied when
/// a peer address is actually returned.
///
/// TODO: after PR #2275 merges, add a similar proptest,
/// using a "not ready for attempt" peer generation strategy
#[test]
fn skipping_outbound_peer_connection_skips_rate_limit(next_peer_attempts in 0..TEST_ADDRESSES) {
zebra_test::init();
let runtime = Runtime::new().expect("Failed to create Tokio runtime");
let _guard = runtime.enter();
let peer_service = tower::service_fn(|_| async {
unreachable!("Mock peer service is never used");
});
// Since the address book is empty, there won't be any available peers
let address_book = AddressBook::new(&Config::default(), Span::none());
let mut candidate_set = CandidateSet::new(Arc::new(Mutex::new(address_book)), peer_service);
// Make sure that the rate-limit is never triggered, even after multiple calls
for _ in 0..next_peer_attempts {
// An empty address book immediately returns "no next peer"
assert!(matches!(candidate_set.next().now_or_never(), Some(None)));
}
}
}
proptest! {
#![proptest_config(ProptestConfig::with_cases(16))]
// These tests contain sleeps, so we use a small number of cases by default.
// Set the PROPTEST_CASES env var to override this default.
#![proptest_config(proptest::test_runner::Config::with_cases(env::var("PROPTEST_CASES")
.ok()
.and_then(|v| v.parse().ok())
.unwrap_or(DEFAULT_SLEEP_TEST_PROPTEST_CASES)))]
/// Test that new outbound peer connections are rate-limited.
#[test]
fn new_outbound_peer_connections_are_rate_limited(
peers in vec(MetaAddr::alternate_node_strategy(), 10),
initial_candidates in 0..4usize,
extra_candidates in 0..4usize,
peers in vec(MetaAddr::ready_outbound_strategy(), TEST_ADDRESSES),
initial_candidates in 0..MAX_TEST_CANDIDATES,
extra_candidates in 0..MAX_TEST_CANDIDATES,
) {
zebra_test::init();
@ -63,13 +112,18 @@ proptest! {
// Check rate limiting for initial peers
check_candidates_rate_limiting(&mut candidate_set, initial_candidates).await;
// Sleep more than the rate limiting delay
sleep(Duration::from_millis(400)).await;
sleep(MAX_TEST_CANDIDATES * MIN_PEER_CONNECTION_INTERVAL).await;
// Check that the next peers are still respecting the rate limiting, without causing a
// burst of reconnections
check_candidates_rate_limiting(&mut candidate_set, extra_candidates).await;
};
assert!(runtime.block_on(timeout(Duration::from_secs(10), checks)).is_ok());
// Allow enough time for the maximum number of candidates,
// plus some extra time for test machines with high CPU load.
// (The max delay asserts usually fail before hitting this timeout.)
let max_rate_limit_sleep = 3 * MAX_TEST_CANDIDATES * MIN_PEER_CONNECTION_INTERVAL;
let max_extra_delay = (2 * MAX_TEST_CANDIDATES + 1) * MAX_SLEEP_EXTRA_DELAY;
assert!(runtime.block_on(timeout(max_rate_limit_sleep + max_extra_delay, checks)).is_ok());
}
}
@ -77,19 +131,41 @@ proptest! {
///
/// # Panics
///
/// Will panic if a reconnection peer is returned too quickly, or if no reconnection peer is
/// returned.
async fn check_candidates_rate_limiting<S>(candidate_set: &mut CandidateSet<S>, candidates: usize)
/// Will panic if:
/// - a connection peer is returned too quickly,
/// - a connection peer is returned too slowly, or
/// - if no reconnection peer is returned at all.
async fn check_candidates_rate_limiting<S>(candidate_set: &mut CandidateSet<S>, candidates: u32)
where
S: tower::Service<Request, Response = Response, Error = BoxError>,
S::Future: Send + 'static,
{
let mut minimum_reconnect_instant = Instant::now();
let mut now = Instant::now();
let mut minimum_reconnect_instant = now;
// Allow extra time for test machines with high CPU load
let mut maximum_reconnect_instant = now + MAX_SLEEP_EXTRA_DELAY;
for _ in 0..candidates {
assert!(candidate_set.next().await.is_some());
assert!(Instant::now() >= minimum_reconnect_instant);
assert!(
candidate_set.next().await.is_some(),
"there are enough available candidates"
);
minimum_reconnect_instant += MIN_PEER_CONNECTION_INTERVAL;
now = Instant::now();
assert!(
now >= minimum_reconnect_instant,
"all candidates should obey the minimum rate-limit: now: {:?} min: {:?}",
now,
minimum_reconnect_instant,
);
assert!(
now <= maximum_reconnect_instant,
"rate-limited candidates should not be delayed too long: now: {:?} max: {:?}. Hint: is the test machine overloaded?",
now,
maximum_reconnect_instant,
);
minimum_reconnect_instant = now + MIN_PEER_CONNECTION_INTERVAL;
maximum_reconnect_instant = now + MIN_PEER_CONNECTION_INTERVAL + MAX_SLEEP_EXTRA_DELAY;
}
}