zebra/zebra-network/src/address_book.rs

505 lines
17 KiB
Rust

//! The `AddressBook` manages information about what peers exist, when they were
//! seen, and what services they provide.
use std::{cmp::Reverse, iter::Extend, net::SocketAddr, time::Instant};
use chrono::Utc;
use ordered_map::OrderedMap;
use tracing::Span;
use crate::{
meta_addr::MetaAddrChange, protocol::external::canonical_socket_addr, types::MetaAddr,
PeerAddrState,
};
#[cfg(test)]
mod tests;
/// A database of peer listener addresses, their advertised services, and
/// information on when they were last seen.
///
/// # Security
///
/// Address book state must be based on outbound connections to peers.
///
/// If the address book is updated incorrectly:
/// - malicious peers can interfere with other peers' `AddressBook` state,
/// or
/// - Zebra can advertise unreachable addresses to its own peers.
///
/// ## Adding Addresses
///
/// The address book should only contain Zcash listener port addresses from peers
/// on the configured network. These addresses can come from:
/// - DNS seeders
/// - addresses gossiped by other peers
/// - the canonical address (`Version.address_from`) provided by each peer,
/// particularly peers on inbound connections.
///
/// The remote addresses of inbound connections must not be added to the address
/// book, because they contain ephemeral outbound ports, not listener ports.
///
/// Isolated connections must not add addresses or update the address book.
///
/// ## Updating Address State
///
/// Updates to address state must be based on outbound connections to peers.
///
/// Updates must not be based on:
/// - the remote addresses of inbound connections, or
/// - the canonical address of any connection.
#[derive(Clone, Debug)]
pub struct AddressBook {
/// Peer listener addresses, suitable for outbound connections,
/// in connection attempt order.
///
/// Some peers in this list might have open outbound or inbound connections.
///
/// We reverse the comparison order, because the standard library ([`BTreeMap`])
/// sorts in ascending order, but [`OrderedMap`] sorts in descending order.
by_addr: OrderedMap<SocketAddr, MetaAddr, Reverse<MetaAddr>>,
/// The local listener address.
local_listener: SocketAddr,
/// The span for operations on this address book.
span: Span,
/// The last time we logged a message about the address metrics.
last_address_log: Option<Instant>,
}
/// Metrics about the states of the addresses in an [`AddressBook`].
#[derive(Debug)]
pub struct AddressMetrics {
/// The number of addresses in the `Responded` state.
responded: usize,
/// The number of addresses in the `NeverAttemptedGossiped` state.
never_attempted_gossiped: usize,
/// The number of addresses in the `NeverAttemptedAlternate` state.
never_attempted_alternate: usize,
/// The number of addresses in the `Failed` state.
failed: usize,
/// The number of addresses in the `AttemptPending` state.
attempt_pending: usize,
/// The number of `Responded` addresses within the liveness limit.
recently_live: usize,
/// The number of `Responded` addresses outside the liveness limit.
recently_stopped_responding: usize,
}
#[allow(clippy::len_without_is_empty)]
impl AddressBook {
/// Construct an [`AddressBook`] with the given `local_listener` and
/// [`tracing::Span`].
pub fn new(local_listener: SocketAddr, span: Span) -> AddressBook {
let constructor_span = span.clone();
let _guard = constructor_span.enter();
let instant_now = Instant::now();
let chrono_now = Utc::now();
let mut new_book = AddressBook {
by_addr: OrderedMap::new(|meta_addr| Reverse(*meta_addr)),
local_listener: canonical_socket_addr(local_listener),
span,
last_address_log: None,
};
new_book.update_metrics(instant_now, chrono_now);
new_book
}
/// Construct an [`AddressBook`] with the given `local_listener`,
/// [`tracing::Span`], and addresses.
///
/// If there are multiple [`MetaAddr`]s with the same address,
/// an arbitrary address is inserted into the address book,
/// and the rest are dropped.
///
/// This constructor can be used to break address book invariants,
/// so it should only be used in tests.
#[cfg(any(test, feature = "proptest-impl"))]
pub fn new_with_addrs(
local_listener: SocketAddr,
span: Span,
addrs: impl IntoIterator<Item = MetaAddr>,
) -> AddressBook {
let constructor_span = span.clone();
let _guard = constructor_span.enter();
let instant_now = Instant::now();
let chrono_now = Utc::now();
let mut new_book = AddressBook::new(local_listener, span);
let addrs = addrs
.into_iter()
.map(|mut meta_addr| {
meta_addr.addr = canonical_socket_addr(meta_addr.addr);
meta_addr
})
.filter(MetaAddr::address_is_valid_for_outbound)
.map(|meta_addr| (meta_addr.addr, meta_addr));
for (socket_addr, meta_addr) in addrs {
// overwrite any duplicate addresses
new_book.by_addr.insert(socket_addr, meta_addr);
}
new_book.update_metrics(instant_now, chrono_now);
new_book
}
/// Get the local listener address.
///
/// This address contains minimal state, but it is not sanitized.
pub fn local_listener_meta_addr(&self) -> MetaAddr {
MetaAddr::new_local_listener_change(&self.local_listener)
.into_new_meta_addr()
.expect("unexpected invalid new local listener addr")
}
/// Get the contents of `self` in random order with sanitized timestamps.
pub fn sanitized(&self, now: chrono::DateTime<Utc>) -> Vec<MetaAddr> {
use rand::seq::SliceRandom;
let _guard = self.span.enter();
let mut peers = self.by_addr.clone();
// Unconditionally add our local listener address to the advertised peers,
// to replace any self-connection failures. The address book and change
// constructors make sure that the SocketAddr is canonical.
let local_listener = self.local_listener_meta_addr();
peers.insert(local_listener.addr, local_listener);
// Then sanitize and shuffle
let mut peers = peers
.descending_values()
.filter_map(MetaAddr::sanitize)
// Security: remove peers that:
// - last responded more than three hours ago, or
// - haven't responded yet but were reported last seen more than three hours ago
//
// This prevents Zebra from gossiping nodes that are likely unreachable. Gossiping such
// nodes impacts the network health, because connection attempts end up being wasted on
// peers that are less likely to respond.
.filter(|addr| addr.is_active_for_gossip(now))
.collect::<Vec<_>>();
peers.shuffle(&mut rand::thread_rng());
peers
}
/// Look up `addr` in the address book, and return its [`MetaAddr`].
///
/// Converts `addr` to a canonical address before looking it up.
pub fn get(&mut self, addr: &SocketAddr) -> Option<MetaAddr> {
let addr = canonical_socket_addr(*addr);
// Unfortunately, `OrderedMap` doesn't implement `get`.
let meta_addr = self.by_addr.remove(&addr);
if let Some(meta_addr) = meta_addr {
self.by_addr.insert(addr, meta_addr);
}
meta_addr
}
/// Apply `change` to the address book, returning the updated `MetaAddr`,
/// if the change was valid.
///
/// # Correctness
///
/// All changes should go through `update`, so that the address book
/// only contains valid outbound addresses.
///
/// Change addresses must be canonical `SocketAddr`s. This makes sure that
/// each address book entry has a unique IP address.
///
/// # Security
///
/// This function must apply every attempted, responded, and failed change
/// to the address book. This prevents rapid reconnections to the same peer.
///
/// As an exception, this function can ignore all changes for specific
/// [`SocketAddr`]s. Ignored addresses will never be used to connect to
/// peers.
pub fn update(&mut self, change: MetaAddrChange) -> Option<MetaAddr> {
let previous = self.get(&change.addr());
let _guard = self.span.enter();
let instant_now = Instant::now();
let chrono_now = Utc::now();
let updated = change.apply_to_meta_addr(previous);
trace!(
?change,
?updated,
?previous,
total_peers = self.by_addr.len(),
recent_peers = self.recently_live_peers(chrono_now).count(),
);
if let Some(updated) = updated {
// Ignore invalid outbound addresses.
// (Inbound connections can be monitored via Zebra's metrics.)
if !updated.address_is_valid_for_outbound() {
return None;
}
// Ignore invalid outbound services and other info,
// but only if the peer has never been attempted.
//
// Otherwise, if we got the info directly from the peer,
// store it in the address book, so we know not to reconnect.
//
// TODO: delete peers with invalid info when they get too old (#1873)
if !updated.last_known_info_is_valid_for_outbound()
&& updated.last_connection_state.is_never_attempted()
{
return None;
}
self.by_addr.insert(updated.addr, updated);
std::mem::drop(_guard);
self.update_metrics(instant_now, chrono_now);
}
updated
}
/// Removes the entry with `addr`, returning it if it exists
///
/// # Note
///
/// All address removals should go through `take`, so that the address
/// book metrics are accurate.
#[allow(dead_code)]
fn take(&mut self, removed_addr: SocketAddr) -> Option<MetaAddr> {
let _guard = self.span.enter();
let instant_now = Instant::now();
let chrono_now = Utc::now();
trace!(
?removed_addr,
total_peers = self.by_addr.len(),
recent_peers = self.recently_live_peers(chrono_now).count(),
);
if let Some(entry) = self.by_addr.remove(&removed_addr) {
std::mem::drop(_guard);
self.update_metrics(instant_now, chrono_now);
Some(entry)
} else {
None
}
}
/// Returns true if the given [`SocketAddr`] is pending a reconnection
/// attempt.
pub fn pending_reconnection_addr(&mut self, addr: &SocketAddr) -> bool {
let meta_addr = self.get(addr);
let _guard = self.span.enter();
match meta_addr {
None => false,
Some(peer) => peer.last_connection_state == PeerAddrState::AttemptPending,
}
}
/// Return an iterator over all peers.
///
/// Returns peers in reconnection attempt order, including recently connected peers.
pub fn peers(&'_ self) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
self.by_addr.descending_values().cloned()
}
/// Return an iterator over peers that are due for a reconnection attempt,
/// in reconnection attempt order.
pub fn reconnection_peers(
&'_ self,
instant_now: Instant,
chrono_now: chrono::DateTime<Utc>,
) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
// Skip live peers, and peers pending a reconnect attempt.
// The peers are already stored in sorted order.
self.by_addr
.descending_values()
.filter(move |peer| peer.is_ready_for_connection_attempt(instant_now, chrono_now))
.cloned()
}
/// Return an iterator over all the peers in `state`,
/// in reconnection attempt order, including recently connected peers.
pub fn state_peers(&'_ self, state: PeerAddrState) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
self.by_addr
.descending_values()
.filter(move |peer| peer.last_connection_state == state)
.cloned()
}
/// Return an iterator over peers that might be connected,
/// in reconnection attempt order.
pub fn maybe_connected_peers(
&'_ self,
instant_now: Instant,
chrono_now: chrono::DateTime<Utc>,
) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
self.by_addr
.descending_values()
.filter(move |peer| !peer.is_ready_for_connection_attempt(instant_now, chrono_now))
.cloned()
}
/// Return an iterator over peers we've seen recently,
/// in reconnection attempt order.
pub fn recently_live_peers(
&'_ self,
now: chrono::DateTime<Utc>,
) -> impl Iterator<Item = MetaAddr> + '_ {
let _guard = self.span.enter();
self.by_addr
.descending_values()
.filter(move |peer| peer.was_recently_live(now))
.cloned()
}
/// Returns the number of entries in this address book.
pub fn len(&self) -> usize {
self.by_addr.len()
}
/// Returns metrics for the addresses in this address book.
pub fn address_metrics(&self, now: chrono::DateTime<Utc>) -> AddressMetrics {
let responded = self.state_peers(PeerAddrState::Responded).count();
let never_attempted_gossiped = self
.state_peers(PeerAddrState::NeverAttemptedGossiped)
.count();
let never_attempted_alternate = self
.state_peers(PeerAddrState::NeverAttemptedAlternate)
.count();
let failed = self.state_peers(PeerAddrState::Failed).count();
let attempt_pending = self.state_peers(PeerAddrState::AttemptPending).count();
let recently_live = self.recently_live_peers(now).count();
let recently_stopped_responding = responded
.checked_sub(recently_live)
.expect("all recently live peers must have responded");
AddressMetrics {
responded,
never_attempted_gossiped,
never_attempted_alternate,
failed,
attempt_pending,
recently_live,
recently_stopped_responding,
}
}
/// Update the metrics for this address book.
fn update_metrics(&mut self, instant_now: Instant, chrono_now: chrono::DateTime<Utc>) {
let _guard = self.span.enter();
let m = self.address_metrics(chrono_now);
// TODO: rename to address_book.[state_name]
metrics::gauge!("candidate_set.responded", m.responded as f64);
metrics::gauge!("candidate_set.gossiped", m.never_attempted_gossiped as f64);
metrics::gauge!(
"candidate_set.alternate",
m.never_attempted_alternate as f64
);
metrics::gauge!("candidate_set.failed", m.failed as f64);
metrics::gauge!("candidate_set.pending", m.attempt_pending as f64);
// TODO: rename to address_book.responded.recently_live
metrics::gauge!("candidate_set.recently_live", m.recently_live as f64);
// TODO: rename to address_book.responded.stopped_responding
metrics::gauge!(
"candidate_set.disconnected",
m.recently_stopped_responding as f64
);
std::mem::drop(_guard);
self.log_metrics(&m, instant_now);
}
/// Log metrics for this address book
fn log_metrics(&mut self, m: &AddressMetrics, now: Instant) {
let _guard = self.span.enter();
trace!(
address_metrics = ?m,
);
if m.responded > 0 {
return;
}
// These logs are designed to be human-readable in a terminal, at the
// default Zebra log level. If you need to know address states for
// every request, use the trace-level logs, or the metrics exporter.
if let Some(last_address_log) = self.last_address_log {
// Avoid duplicate address logs
if now.saturating_duration_since(last_address_log).as_secs() < 60 {
return;
}
} else {
// Suppress initial logs until the peer set has started up.
// There can be multiple address changes before the first peer has
// responded.
self.last_address_log = Some(now);
return;
}
self.last_address_log = Some(now);
// if all peers have failed
if m.responded
+ m.attempt_pending
+ m.never_attempted_gossiped
+ m.never_attempted_alternate
== 0
{
warn!(
address_metrics = ?m,
"all peer addresses have failed. Hint: check your network connection"
);
} else {
info!(
address_metrics = ?m,
"no active peer connections: trying gossiped addresses"
);
}
}
}
impl Extend<MetaAddrChange> for AddressBook {
fn extend<T>(&mut self, iter: T)
where
T: IntoIterator<Item = MetaAddrChange>,
{
for change in iter.into_iter() {
self.update(change);
}
}
}