zebra/zebra-network/src/address_book.rs

451 lines
15 KiB
Rust
Raw Normal View History

//! The `AddressBook` manages information about what peers exist, when they were
//! seen, and what services they provide.
use std::{
collections::{BTreeSet, HashMap},
iter::Extend,
net::SocketAddr,
time::Instant,
};
use tracing::Span;
use zebra_chain::serialization::canonical_socket_addr;
use crate::{meta_addr::MetaAddrChange, types::MetaAddr, PeerAddrState};
/// A database of peer listener addresses, their advertised services, and
/// information on when they were last seen.
///
/// # Security
///
/// Address book state must be based on outbound connections to peers.
///
/// If the address book is updated incorrectly:
/// - malicious peers can interfere with other peers' `AddressBook` state,
/// or
/// - Zebra can advertise unreachable addresses to its own peers.
///
/// ## Adding Addresses
///
/// The address book should only contain Zcash listener port addresses from peers
/// on the configured network. These addresses can come from:
/// - DNS seeders
/// - addresses gossiped by other peers
/// - the canonical address (`Version.address_from`) provided by each peer,
/// particularly peers on inbound connections.
///
/// The remote addresses of inbound connections must not be added to the address
/// book, because they contain ephemeral outbound ports, not listener ports.
///
/// Isolated connections must not add addresses or update the address book.
///
/// ## Updating Address State
///
/// Updates to address state must be based on outbound connections to peers.
///
/// Updates must not be based on:
/// - the remote addresses of inbound connections, or
/// - the canonical address of any connection.
#[derive(Clone, Debug)]
pub struct AddressBook {
/// Each known peer address has a matching `MetaAddr`.
by_addr: HashMap<SocketAddr, MetaAddr>,
/// The local listener address.
local_listener: SocketAddr,
/// The span for operations on this address book.
span: Span,
/// The last time we logged a message about the address metrics.
last_address_log: Option<Instant>,
}
/// Metrics about the states of the addresses in an [`AddressBook`].
#[derive(Debug)]
pub struct AddressMetrics {
/// The number of addresses in the `Responded` state.
responded: usize,
/// The number of addresses in the `NeverAttemptedGossiped` state.
never_attempted_gossiped: usize,
/// The number of addresses in the `NeverAttemptedAlternate` state.
never_attempted_alternate: usize,
/// The number of addresses in the `Failed` state.
failed: usize,
/// The number of addresses in the `AttemptPending` state.
attempt_pending: usize,
/// The number of `Responded` addresses within the liveness limit.
recently_live: usize,
/// The number of `Responded` addresses outside the liveness limit.
recently_stopped_responding: usize,
}
2020-02-04 22:53:24 -08:00
#[allow(clippy::len_without_is_empty)]
impl AddressBook {
/// Construct an [`AddressBook`] with the given `local_listener` and
/// [`tracing::Span`].
pub fn new(local_listener: SocketAddr, span: Span) -> AddressBook {
let constructor_span = span.clone();
let _guard = constructor_span.enter();
let mut new_book = AddressBook {
by_addr: HashMap::default(),
local_listener: canonical_socket_addr(local_listener),
span,
last_address_log: None,
};
new_book.update_metrics();
new_book
}
/// Construct an [`AddressBook`] with the given `local_listener`,
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
/// [`tracing::Span`], and addresses.
///
/// This constructor can be used to break address book invariants,
/// so it should only be used in tests.
#[cfg(any(test, feature = "proptest-impl"))]
pub fn new_with_addrs(
local_listener: SocketAddr,
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
span: Span,
addrs: impl IntoIterator<Item = MetaAddr>,
) -> AddressBook {
let mut new_book = AddressBook::new(local_listener, span);
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
let addrs = addrs
.into_iter()
.map(|mut meta_addr| {
meta_addr.addr = canonical_socket_addr(meta_addr.addr);
meta_addr
})
.filter(MetaAddr::address_is_valid_for_outbound)
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
.map(|meta_addr| (meta_addr.addr, meta_addr));
new_book.by_addr.extend(addrs);
new_book.update_metrics();
new_book
}
/// Get the local listener address.
///
/// This address contains minimal state, but it is not sanitized.
pub fn local_listener_meta_addr(&self) -> MetaAddr {
MetaAddr::new_local_listener_change(&self.local_listener)
.into_new_meta_addr()
.expect("unexpected invalid new local listener addr")
}
/// Get the contents of `self` in random order with sanitized timestamps.
pub fn sanitized(&self) -> Vec<MetaAddr> {
use rand::seq::SliceRandom;
let _guard = self.span.enter();
let mut peers = self.by_addr.clone();
// Unconditionally add our local listener address to the advertised peers,
// to replace any self-connection failures. The address book and change
// constructors make sure that the SocketAddr is canonical.
let local_listener = self.local_listener_meta_addr();
peers.insert(local_listener.addr, local_listener);
// Then sanitize and shuffle
let mut peers = peers
.values()
.filter_map(MetaAddr::sanitize)
.collect::<Vec<_>>();
peers.shuffle(&mut rand::thread_rng());
peers
}
/// Apply `change` to the address book, returning the updated `MetaAddr`,
/// if the change was valid.
///
/// # Correctness
///
/// All changes should go through `update`, so that the address book
/// only contains valid outbound addresses.
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
///
/// Change addresses must be canonical `SocketAddr`s. This makes sure that
/// each address book entry has a unique IP address.
///
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
/// # Security
///
/// This function must apply every attempted, responded, and failed change
/// to the address book. This prevents rapid reconnections to the same peer.
///
/// As an exception, this function can ignore all changes for specific
/// [`SocketAddr`]s. Ignored addresses will never be used to connect to
/// peers.
pub fn update(&mut self, change: MetaAddrChange) -> Option<MetaAddr> {
let _guard = self.span.enter();
let previous = self.by_addr.get(&change.addr()).cloned();
let updated = change.apply_to_meta_addr(previous);
trace!(
?change,
?updated,
?previous,
total_peers = self.by_addr.len(),
recent_peers = self.recently_live_peers().count(),
);
if let Some(updated) = updated {
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
// Ignore invalid outbound addresses.
// (Inbound connections can be monitored via Zebra's metrics.)
if !updated.address_is_valid_for_outbound() {
return None;
}
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
// Ignore invalid outbound services and other info,
// but only if the peer has never been attempted.
//
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
// Otherwise, if we got the info directly from the peer,
// store it in the address book, so we know not to reconnect.
//
// TODO: delete peers with invalid info when they get too old (#1873)
if !updated.last_known_info_is_valid_for_outbound()
&& updated.last_connection_state.is_never_attempted()
{
return None;
}
self.by_addr.insert(updated.addr, updated);
std::mem::drop(_guard);
self.update_metrics();
}
updated
}
/// Removes the entry with `addr`, returning it if it exists
///
/// # Note
///
/// All address removals should go through `take`, so that the address
/// book metrics are accurate.
#[allow(dead_code)]
fn take(&mut self, removed_addr: SocketAddr) -> Option<MetaAddr> {
let _guard = self.span.enter();
trace!(
?removed_addr,
total_peers = self.by_addr.len(),
recent_peers = self.recently_live_peers().count(),
);
if let Some(entry) = self.by_addr.remove(&removed_addr) {
std::mem::drop(_guard);
self.update_metrics();
Some(entry)
} else {
None
}
}
/// Returns true if the given [`SocketAddr`] has recently sent us a message.
pub fn recently_live_addr(&self, addr: &SocketAddr) -> bool {
let _guard = self.span.enter();
match self.by_addr.get(addr) {
None => false,
// NeverAttempted, Failed, and AttemptPending peers should never be live
Some(peer) => {
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
peer.last_connection_state == PeerAddrState::Responded && peer.was_recently_live()
}
}
}
/// Returns true if the given [`SocketAddr`] is pending a reconnection
/// attempt.
pub fn pending_reconnection_addr(&self, addr: &SocketAddr) -> bool {
let _guard = self.span.enter();
match self.by_addr.get(addr) {
2020-02-04 22:53:24 -08:00
None => false,
Some(peer) => peer.last_connection_state == PeerAddrState::AttemptPending,
}
}
/// Return an iterator over all peers.
///
/// Returns peers in reconnection attempt order, then recently live peers in
/// an arbitrary order.
pub fn peers(&'_ self) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
self.reconnection_peers()
.chain(self.maybe_connected_peers())
}
/// Return an iterator over peers that are due for a reconnection attempt,
/// in reconnection attempt order.
pub fn reconnection_peers(&'_ self) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
// TODO: optimise, if needed, or get rid of older peers
// Skip live peers, and peers pending a reconnect attempt, then sort using BTreeSet
self.by_addr
.values()
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
.filter(|peer| peer.is_ready_for_attempt())
.collect::<BTreeSet<_>>()
.into_iter()
.cloned()
}
/// Return an iterator over all the peers in `state`, in arbitrary order.
pub fn state_peers(&'_ self, state: PeerAddrState) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
self.by_addr
.values()
.filter(move |peer| peer.last_connection_state == state)
.cloned()
}
/// Return an iterator over peers that might be connected, in arbitrary
/// order.
pub fn maybe_connected_peers(&'_ self) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
self.by_addr
.values()
Security: Limit reconnection rate to individual peers (#2275) * Security: Limit reconnection rate to individual peers Reconnection Rate Limit the reconnection rate to each individual peer by applying the liveness cutoff to the attempt, responded, and failure time fields. If any field is recent, the peer is skipped. The new liveness cutoff skips any peers that have recently been attempted or failed. (Previously, the liveness check was only applied if the peer was in the `Responded` state, which could lead to repeated retries of `Failed` peers, particularly in small address books.) Reconnection Order Zebra prefers more useful peer states, then the earliest attempted, failed, and responded times, then the most recent gossiped last seen times. Before this change, Zebra took the most recent time in all the peer time fields, and used that time for liveness and ordering. This led to confusion between trusted and untrusted data, and success and failure times. Unlike the previous order, the new order: - tries all peers in each state, before re-trying any peer in that state, and - only checks the the gossiped untrusted last seen time if all other times are equal. * Preserve the later time if changes arrive out of order * Update CandidateSet::next documentation * Update CandidateSet state diagram * Fix variant names in comments * Explain why timestamps can be left out of MetaAddrChanges * Add a simple test for the individual peer retry limit * Only generate valid Arbitrary PeerServices values * Add an individual peer retry limit AddressBook and CandidateSet test * Stop deleting recently live addresses from the address book If we delete recently live addresses from the address book, we can get a new entry for them, and reconnect too rapidly. * Rename functions to match similar tokio API * Fix docs for service sorting * Clarify a comment * Cleanup a variable and comments * Remove blank lines in the CandidateSet state diagram * Add a multi-peer proptest that checks outbound attempt fairness * Fix a comment typo Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com> * Simplify time maths in MetaAddr * Create a Duration32 type to simplify calculations and comparisons * Rename variables for clarity * Split a string constant into multiple lines * Make constants match rustdoc order Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
.filter(|peer| !peer.is_ready_for_attempt())
.cloned()
}
/// Return an iterator over peers we've seen recently, in arbitrary order.
pub fn recently_live_peers(&'_ self) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
self.by_addr
.values()
.filter(move |peer| self.recently_live_addr(&peer.addr))
.cloned()
}
/// Returns the number of entries in this address book.
pub fn len(&self) -> usize {
self.by_addr.len()
}
/// Returns metrics for the addresses in this address book.
pub fn address_metrics(&self) -> AddressMetrics {
let responded = self.state_peers(PeerAddrState::Responded).count();
let never_attempted_gossiped = self
.state_peers(PeerAddrState::NeverAttemptedGossiped)
.count();
let never_attempted_alternate = self
.state_peers(PeerAddrState::NeverAttemptedAlternate)
.count();
let failed = self.state_peers(PeerAddrState::Failed).count();
let attempt_pending = self.state_peers(PeerAddrState::AttemptPending).count();
let recently_live = self.recently_live_peers().count();
let recently_stopped_responding = responded
.checked_sub(recently_live)
.expect("all recently live peers must have responded");
AddressMetrics {
responded,
never_attempted_gossiped,
never_attempted_alternate,
failed,
attempt_pending,
recently_live,
recently_stopped_responding,
}
}
/// Update the metrics for this address book.
fn update_metrics(&mut self) {
let _guard = self.span.enter();
let m = self.address_metrics();
// TODO: rename to address_book.[state_name]
metrics::gauge!("candidate_set.responded", m.responded as f64);
metrics::gauge!("candidate_set.gossiped", m.never_attempted_gossiped as f64);
metrics::gauge!(
"candidate_set.alternate",
m.never_attempted_alternate as f64
);
metrics::gauge!("candidate_set.failed", m.failed as f64);
metrics::gauge!("candidate_set.pending", m.attempt_pending as f64);
// TODO: rename to address_book.responded.recently_live
metrics::gauge!("candidate_set.recently_live", m.recently_live as f64);
// TODO: rename to address_book.responded.stopped_responding
metrics::gauge!(
"candidate_set.disconnected",
m.recently_stopped_responding as f64
);
std::mem::drop(_guard);
self.log_metrics(&m);
}
/// Log metrics for this address book
fn log_metrics(&mut self, m: &AddressMetrics) {
let _guard = self.span.enter();
trace!(
address_metrics = ?m,
);
if m.responded > 0 {
return;
}
// These logs are designed to be human-readable in a terminal, at the
// default Zebra log level. If you need to know address states for
// every request, use the trace-level logs, or the metrics exporter.
if let Some(last_address_log) = self.last_address_log {
// Avoid duplicate address logs
if Instant::now().duration_since(last_address_log).as_secs() < 60 {
return;
}
} else {
// Suppress initial logs until the peer set has started up.
// There can be multiple address changes before the first peer has
// responded.
self.last_address_log = Some(Instant::now());
return;
}
self.last_address_log = Some(Instant::now());
// if all peers have failed
if m.responded
+ m.attempt_pending
+ m.never_attempted_gossiped
+ m.never_attempted_alternate
== 0
{
warn!(
address_metrics = ?m,
"all peer addresses have failed. Hint: check your network connection"
);
} else {
info!(
address_metrics = ?m,
"no active peer connections: trying gossiped addresses"
);
}
}
}
impl Extend<MetaAddrChange> for AddressBook {
fn extend<T>(&mut self, iter: T)
where
T: IntoIterator<Item = MetaAddrChange>,
{
for change in iter.into_iter() {
self.update(change);
}
}
}