//! The `AddressBook` manages information about what peers exist, when they were //! seen, and what services they provide. use std::{ cmp::Reverse, iter::Extend, net::SocketAddr, sync::{Arc, Mutex}, time::Instant, }; use chrono::Utc; use ordered_map::OrderedMap; use tokio::sync::watch; use tracing::Span; use zebra_chain::{parameters::Network, serialization::DateTime32}; use crate::{ constants, meta_addr::MetaAddrChange, protocol::external::{canonical_peer_addr, canonical_socket_addr}, types::MetaAddr, AddressBookPeers, PeerAddrState, PeerSocketAddr, }; #[cfg(test)] mod tests; /// A database of peer listener addresses, their advertised services, and /// information on when they were last seen. /// /// # Security /// /// Address book state must be based on outbound connections to peers. /// /// If the address book is updated incorrectly: /// - malicious peers can interfere with other peers' `AddressBook` state, /// or /// - Zebra can advertise unreachable addresses to its own peers. /// /// ## Adding Addresses /// /// The address book should only contain Zcash listener port addresses from peers /// on the configured network. These addresses can come from: /// - DNS seeders /// - addresses gossiped by other peers /// - the canonical address (`Version.address_from`) provided by each peer, /// particularly peers on inbound connections. /// /// The remote addresses of inbound connections must not be added to the address /// book, because they contain ephemeral outbound ports, not listener ports. /// /// Isolated connections must not add addresses or update the address book. /// /// ## Updating Address State /// /// Updates to address state must be based on outbound connections to peers. /// /// Updates must not be based on: /// - the remote addresses of inbound connections, or /// - the canonical address of any connection. #[derive(Debug)] pub struct AddressBook { /// Peer listener addresses, suitable for outbound connections, /// in connection attempt order. /// /// Some peers in this list might have open outbound or inbound connections. /// /// We reverse the comparison order, because the standard library /// ([`BTreeMap`](std::collections::BTreeMap)) sorts in ascending order, but /// [`OrderedMap`] sorts in descending order. by_addr: OrderedMap>, /// The local listener address. local_listener: SocketAddr, /// The configured Zcash network. network: Network, /// The maximum number of addresses in the address book. /// /// Always set to [`MAX_ADDRS_IN_ADDRESS_BOOK`](constants::MAX_ADDRS_IN_ADDRESS_BOOK), /// in release builds. Lower values are used during testing. addr_limit: usize, /// The span for operations on this address book. span: Span, /// A channel used to send the latest address book metrics. address_metrics_tx: watch::Sender, /// The last time we logged a message about the address metrics. last_address_log: Option, } /// Metrics about the states of the addresses in an [`AddressBook`]. #[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)] pub struct AddressMetrics { /// The number of addresses in the `Responded` state. pub responded: usize, /// The number of addresses in the `NeverAttemptedGossiped` state. pub never_attempted_gossiped: usize, /// The number of addresses in the `NeverAttemptedAlternate` state. pub never_attempted_alternate: usize, /// The number of addresses in the `Failed` state. pub failed: usize, /// The number of addresses in the `AttemptPending` state. pub attempt_pending: usize, /// The number of `Responded` addresses within the liveness limit. pub recently_live: usize, /// The number of `Responded` addresses outside the liveness limit. pub recently_stopped_responding: usize, /// The number of addresses in the address book, regardless of their states. pub num_addresses: usize, /// The maximum number of addresses in the address book. pub address_limit: usize, } #[allow(clippy::len_without_is_empty)] impl AddressBook { /// Construct an [`AddressBook`] with the given `local_listener` on `network`. /// /// Uses the supplied [`tracing::Span`] for address book operations. pub fn new(local_listener: SocketAddr, network: Network, span: Span) -> AddressBook { let constructor_span = span.clone(); let _guard = constructor_span.enter(); let instant_now = Instant::now(); let chrono_now = Utc::now(); // The default value is correct for an empty address book, // and it gets replaced by `update_metrics` anyway. let (address_metrics_tx, _address_metrics_rx) = watch::channel(AddressMetrics::default()); let mut new_book = AddressBook { by_addr: OrderedMap::new(|meta_addr| Reverse(*meta_addr)), local_listener: canonical_socket_addr(local_listener), network, addr_limit: constants::MAX_ADDRS_IN_ADDRESS_BOOK, span, address_metrics_tx, last_address_log: None, }; new_book.update_metrics(instant_now, chrono_now); new_book } /// Construct an [`AddressBook`] with the given `local_listener`, `network`, /// `addr_limit`, [`tracing::Span`], and addresses. /// /// `addr_limit` is enforced by this method, and by [`AddressBook::update`]. /// /// If there are multiple [`MetaAddr`]s with the same address, /// an arbitrary address is inserted into the address book, /// and the rest are dropped. /// /// This constructor can be used to break address book invariants, /// so it should only be used in tests. #[cfg(any(test, feature = "proptest-impl"))] pub fn new_with_addrs( local_listener: SocketAddr, network: Network, addr_limit: usize, span: Span, addrs: impl IntoIterator, ) -> AddressBook { let constructor_span = span.clone(); let _guard = constructor_span.enter(); let instant_now = Instant::now(); let chrono_now = Utc::now(); // The maximum number of addresses should be always greater than 0 assert!(addr_limit > 0); let mut new_book = AddressBook::new(local_listener, network, span); new_book.addr_limit = addr_limit; let addrs = addrs .into_iter() .map(|mut meta_addr| { meta_addr.addr = canonical_peer_addr(meta_addr.addr); meta_addr }) .filter(|meta_addr| meta_addr.address_is_valid_for_outbound(network)) .map(|meta_addr| (meta_addr.addr, meta_addr)); for (socket_addr, meta_addr) in addrs { // overwrite any duplicate addresses new_book.by_addr.insert(socket_addr, meta_addr); // exit as soon as we get enough addresses if new_book.by_addr.len() >= addr_limit { break; } } new_book.update_metrics(instant_now, chrono_now); new_book } /// Return a watch channel for the address book metrics. /// /// The metrics in the watch channel are only updated when the address book updates, /// so they can be significantly outdated if Zebra is disconnected or hung. /// /// The current metrics value is marked as seen. /// So `Receiver::changed` will only return after the next address book update. pub fn address_metrics_watcher(&self) -> watch::Receiver { self.address_metrics_tx.subscribe() } /// Set the local listener address. Only for use in tests. #[cfg(any(test, feature = "proptest-impl"))] pub fn set_local_listener(&mut self, addr: SocketAddr) { self.local_listener = addr; } /// Get the local listener address. /// /// This address contains minimal state, but it is not sanitized. pub fn local_listener_meta_addr(&self, now: chrono::DateTime) -> MetaAddr { let now: DateTime32 = now.try_into().expect("will succeed until 2038"); MetaAddr::new_local_listener_change(self.local_listener) .local_listener_into_new_meta_addr(now) } /// Get the local listener [`SocketAddr`]. pub fn local_listener_socket_addr(&self) -> SocketAddr { self.local_listener } /// Get the active addresses in `self` in random order with sanitized timestamps, /// including our local listener address. pub fn sanitized(&self, now: chrono::DateTime) -> Vec { use rand::seq::SliceRandom; let _guard = self.span.enter(); let mut peers = self.by_addr.clone(); // Unconditionally add our local listener address to the advertised peers, // to replace any self-connection failures. The address book and change // constructors make sure that the SocketAddr is canonical. let local_listener = self.local_listener_meta_addr(now); peers.insert(local_listener.addr, local_listener); // Then sanitize and shuffle let mut peers: Vec = peers .descending_values() .filter_map(|meta_addr| meta_addr.sanitize(self.network)) // # Security // // Remove peers that: // - last responded more than three hours ago, or // - haven't responded yet but were reported last seen more than three hours ago // // This prevents Zebra from gossiping nodes that are likely unreachable. Gossiping such // nodes impacts the network health, because connection attempts end up being wasted on // peers that are less likely to respond. .filter(|addr| addr.is_active_for_gossip(now)) .collect(); peers.shuffle(&mut rand::thread_rng()); peers } /// Get the active addresses in `self`, in preferred caching order, /// excluding our local listener address. pub fn cacheable(&self, now: chrono::DateTime) -> Vec { let _guard = self.span.enter(); let peers = self.by_addr.clone(); // Get peers in preferred order, then keep the recently active ones peers .descending_values() // # Security // // Remove peers that: // - last responded more than three hours ago, or // - haven't responded yet but were reported last seen more than three hours ago // // This prevents Zebra from caching nodes that are likely unreachable, // which improves startup time and reliability. .filter(|addr| addr.is_active_for_gossip(now)) .cloned() .collect() } /// Look up `addr` in the address book, and return its [`MetaAddr`]. /// /// Converts `addr` to a canonical address before looking it up. pub fn get(&mut self, addr: PeerSocketAddr) -> Option { let addr = canonical_peer_addr(*addr); // Unfortunately, `OrderedMap` doesn't implement `get`. let meta_addr = self.by_addr.remove(&addr); if let Some(meta_addr) = meta_addr { self.by_addr.insert(addr, meta_addr); } meta_addr } /// Apply `change` to the address book, returning the updated `MetaAddr`, /// if the change was valid. /// /// # Correctness /// /// All changes should go through `update`, so that the address book /// only contains valid outbound addresses. /// /// Change addresses must be canonical `PeerSocketAddr`s. This makes sure that /// each address book entry has a unique IP address. /// /// # Security /// /// This function must apply every attempted, responded, and failed change /// to the address book. This prevents rapid reconnections to the same peer. /// /// As an exception, this function can ignore all changes for specific /// [`PeerSocketAddr`]s. Ignored addresses will never be used to connect to /// peers. #[allow(clippy::unwrap_in_result)] pub fn update(&mut self, change: MetaAddrChange) -> Option { let previous = self.get(change.addr()); let _guard = self.span.enter(); let instant_now = Instant::now(); let chrono_now = Utc::now(); let updated = change.apply_to_meta_addr(previous, instant_now, chrono_now); trace!( ?change, ?updated, ?previous, total_peers = self.by_addr.len(), recent_peers = self.recently_live_peers(chrono_now).len(), "calculated updated address book entry", ); if let Some(updated) = updated { // Ignore invalid outbound addresses. // (Inbound connections can be monitored via Zebra's metrics.) if !updated.address_is_valid_for_outbound(self.network) { return None; } // Ignore invalid outbound services and other info, // but only if the peer has never been attempted. // // Otherwise, if we got the info directly from the peer, // store it in the address book, so we know not to reconnect. if !updated.last_known_info_is_valid_for_outbound(self.network) && updated.last_connection_state.is_never_attempted() { return None; } self.by_addr.insert(updated.addr, updated); debug!( ?change, ?updated, ?previous, total_peers = self.by_addr.len(), recent_peers = self.recently_live_peers(chrono_now).len(), "updated address book entry", ); // Security: Limit the number of peers in the address book. // // We only delete outdated peers when we have too many peers. // If we deleted them as soon as they became too old, // then other peers could re-insert them into the address book. // And we would start connecting to those outdated peers again, // ignoring the age limit in [`MetaAddr::is_probably_reachable`]. while self.by_addr.len() > self.addr_limit { let surplus_peer = self .peers() .next_back() .expect("just checked there is at least one peer"); self.by_addr.remove(&surplus_peer.addr); debug!( surplus = ?surplus_peer, ?updated, total_peers = self.by_addr.len(), recent_peers = self.recently_live_peers(chrono_now).len(), "removed surplus address book entry", ); } assert!(self.len() <= self.addr_limit); std::mem::drop(_guard); self.update_metrics(instant_now, chrono_now); } updated } /// Removes the entry with `addr`, returning it if it exists /// /// # Note /// /// All address removals should go through `take`, so that the address /// book metrics are accurate. #[allow(dead_code)] fn take(&mut self, removed_addr: PeerSocketAddr) -> Option { let _guard = self.span.enter(); let instant_now = Instant::now(); let chrono_now = Utc::now(); trace!( ?removed_addr, total_peers = self.by_addr.len(), recent_peers = self.recently_live_peers(chrono_now).len(), ); if let Some(entry) = self.by_addr.remove(&removed_addr) { std::mem::drop(_guard); self.update_metrics(instant_now, chrono_now); Some(entry) } else { None } } /// Returns true if the given [`PeerSocketAddr`] is pending a reconnection /// attempt. pub fn pending_reconnection_addr(&mut self, addr: PeerSocketAddr) -> bool { let meta_addr = self.get(addr); let _guard = self.span.enter(); match meta_addr { None => false, Some(peer) => peer.last_connection_state == PeerAddrState::AttemptPending, } } /// Return an iterator over all peers. /// /// Returns peers in reconnection attempt order, including recently connected peers. pub fn peers(&'_ self) -> impl Iterator + DoubleEndedIterator + '_ { let _guard = self.span.enter(); self.by_addr.descending_values().cloned() } /// Return an iterator over peers that are due for a reconnection attempt, /// in reconnection attempt order. pub fn reconnection_peers( &'_ self, instant_now: Instant, chrono_now: chrono::DateTime, ) -> impl Iterator + DoubleEndedIterator + '_ { let _guard = self.span.enter(); // Skip live peers, and peers pending a reconnect attempt. // The peers are already stored in sorted order. self.by_addr .descending_values() .filter(move |peer| { peer.is_ready_for_connection_attempt(instant_now, chrono_now, self.network) }) .cloned() } /// Return an iterator over all the peers in `state`, /// in reconnection attempt order, including recently connected peers. pub fn state_peers( &'_ self, state: PeerAddrState, ) -> impl Iterator + DoubleEndedIterator + '_ { let _guard = self.span.enter(); self.by_addr .descending_values() .filter(move |peer| peer.last_connection_state == state) .cloned() } /// Return an iterator over peers that might be connected, /// in reconnection attempt order. pub fn maybe_connected_peers( &'_ self, instant_now: Instant, chrono_now: chrono::DateTime, ) -> impl Iterator + DoubleEndedIterator + '_ { let _guard = self.span.enter(); self.by_addr .descending_values() .filter(move |peer| { !peer.is_ready_for_connection_attempt(instant_now, chrono_now, self.network) }) .cloned() } /// Returns the number of entries in this address book. pub fn len(&self) -> usize { self.by_addr.len() } /// Returns metrics for the addresses in this address book. /// Only for use in tests. /// /// # Correctness /// /// Use [`AddressBook::address_metrics_watcher().borrow()`] in production code, /// to avoid deadlocks. #[cfg(test)] pub fn address_metrics(&self, now: chrono::DateTime) -> AddressMetrics { self.address_metrics_internal(now) } /// Returns metrics for the addresses in this address book. /// /// # Correctness /// /// External callers should use [`AddressBook::address_metrics_watcher().borrow()`] /// in production code, to avoid deadlocks. /// (Using the watch channel receiver does not lock the address book mutex.) fn address_metrics_internal(&self, now: chrono::DateTime) -> AddressMetrics { let responded = self.state_peers(PeerAddrState::Responded).count(); let never_attempted_gossiped = self .state_peers(PeerAddrState::NeverAttemptedGossiped) .count(); let never_attempted_alternate = self .state_peers(PeerAddrState::NeverAttemptedAlternate) .count(); let failed = self.state_peers(PeerAddrState::Failed).count(); let attempt_pending = self.state_peers(PeerAddrState::AttemptPending).count(); let recently_live = self.recently_live_peers(now).len(); let recently_stopped_responding = responded .checked_sub(recently_live) .expect("all recently live peers must have responded"); let num_addresses = self.len(); AddressMetrics { responded, never_attempted_gossiped, never_attempted_alternate, failed, attempt_pending, recently_live, recently_stopped_responding, num_addresses, address_limit: self.addr_limit, } } /// Update the metrics for this address book. fn update_metrics(&mut self, instant_now: Instant, chrono_now: chrono::DateTime) { let _guard = self.span.enter(); let m = self.address_metrics_internal(chrono_now); // Ignore errors: we don't care if any receivers are listening. let _ = self.address_metrics_tx.send(m); // TODO: rename to address_book.[state_name] metrics::gauge!("candidate_set.responded", m.responded as f64); metrics::gauge!("candidate_set.gossiped", m.never_attempted_gossiped as f64); metrics::gauge!( "candidate_set.alternate", m.never_attempted_alternate as f64, ); metrics::gauge!("candidate_set.failed", m.failed as f64); metrics::gauge!("candidate_set.pending", m.attempt_pending as f64); // TODO: rename to address_book.responded.recently_live metrics::gauge!("candidate_set.recently_live", m.recently_live as f64); // TODO: rename to address_book.responded.stopped_responding metrics::gauge!( "candidate_set.disconnected", m.recently_stopped_responding as f64, ); std::mem::drop(_guard); self.log_metrics(&m, instant_now); } /// Log metrics for this address book fn log_metrics(&mut self, m: &AddressMetrics, now: Instant) { let _guard = self.span.enter(); trace!( address_metrics = ?m, ); if m.responded > 0 { return; } // These logs are designed to be human-readable in a terminal, at the // default Zebra log level. If you need to know address states for // every request, use the trace-level logs, or the metrics exporter. if let Some(last_address_log) = self.last_address_log { // Avoid duplicate address logs if now.saturating_duration_since(last_address_log).as_secs() < 60 { return; } } else { // Suppress initial logs until the peer set has started up. // There can be multiple address changes before the first peer has // responded. self.last_address_log = Some(now); return; } self.last_address_log = Some(now); // if all peers have failed if m.responded + m.attempt_pending + m.never_attempted_gossiped + m.never_attempted_alternate == 0 { warn!( address_metrics = ?m, "all peer addresses have failed. Hint: check your network connection" ); } else { info!( address_metrics = ?m, "no active peer connections: trying gossiped addresses" ); } } } impl AddressBookPeers for AddressBook { fn recently_live_peers(&self, now: chrono::DateTime) -> Vec { let _guard = self.span.enter(); self.by_addr .descending_values() .filter(|peer| peer.was_recently_live(now)) .cloned() .collect() } } impl AddressBookPeers for Arc> { fn recently_live_peers(&self, now: chrono::DateTime) -> Vec { self.lock() .expect("panic in a previous thread that was holding the mutex") .recently_live_peers(now) } } impl Extend for AddressBook { fn extend(&mut self, iter: T) where T: IntoIterator, { for change in iter.into_iter() { self.update(change); } } } impl Clone for AddressBook { /// Clone the addresses, address limit, local listener address, and span. /// /// Cloned address books have a separate metrics struct watch channel, and an empty last address log. /// /// All address books update the same prometheus metrics. fn clone(&self) -> AddressBook { // The existing metrics might be outdated, but we avoid calling `update_metrics`, // so we don't overwrite the prometheus metrics from the main address book. let (address_metrics_tx, _address_metrics_rx) = watch::channel(*self.address_metrics_tx.borrow()); AddressBook { by_addr: self.by_addr.clone(), local_listener: self.local_listener, network: self.network, addr_limit: self.addr_limit, span: self.span.clone(), address_metrics_tx, last_address_log: None, } } }