2021-05-06 17:50:04 -07:00
|
|
|
//! The `AddressBook` manages information about what peers exist, when they were
|
2019-10-17 15:42:19 -07:00
|
|
|
//! seen, and what services they provide.
|
|
|
|
|
2023-01-16 23:09:07 -08:00
|
|
|
use std::{
|
|
|
|
cmp::Reverse,
|
2023-07-05 22:54:10 -07:00
|
|
|
collections::HashMap,
|
2023-01-16 23:09:07 -08:00
|
|
|
iter::Extend,
|
2023-07-05 22:54:10 -07:00
|
|
|
net::{IpAddr, SocketAddr},
|
2023-01-16 23:09:07 -08:00
|
|
|
sync::{Arc, Mutex},
|
|
|
|
time::Instant,
|
|
|
|
};
|
2019-10-17 15:42:19 -07:00
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
use chrono::Utc;
|
|
|
|
use ordered_map::OrderedMap;
|
2021-12-19 16:44:43 -08:00
|
|
|
use tokio::sync::watch;
|
2019-10-21 15:56:16 -07:00
|
|
|
use tracing::Span;
|
2019-10-17 15:42:19 -07:00
|
|
|
|
2023-05-24 16:53:53 -07:00
|
|
|
use zebra_chain::{parameters::Network, serialization::DateTime32};
|
2022-06-13 21:58:37 -07:00
|
|
|
|
2021-11-09 12:47:50 -08:00
|
|
|
use crate::{
|
2023-11-20 19:32:23 -08:00
|
|
|
constants::{self, ADDR_RESPONSE_LIMIT_DENOMINATOR, MAX_ADDRS_IN_MESSAGE},
|
2023-05-14 08:06:07 -07:00
|
|
|
meta_addr::MetaAddrChange,
|
|
|
|
protocol::external::{canonical_peer_addr, canonical_socket_addr},
|
|
|
|
types::MetaAddr,
|
|
|
|
AddressBookPeers, PeerAddrState, PeerSocketAddr,
|
2021-11-09 12:47:50 -08:00
|
|
|
};
|
2023-05-14 08:06:07 -07:00
|
|
|
|
2021-06-28 22:12:27 -07:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests;
|
|
|
|
|
2021-05-06 17:50:04 -07:00
|
|
|
/// A database of peer listener addresses, their advertised services, and
|
|
|
|
/// information on when they were last seen.
|
|
|
|
///
|
|
|
|
/// # Security
|
|
|
|
///
|
|
|
|
/// Address book state must be based on outbound connections to peers.
|
|
|
|
///
|
|
|
|
/// If the address book is updated incorrectly:
|
|
|
|
/// - malicious peers can interfere with other peers' `AddressBook` state,
|
|
|
|
/// or
|
|
|
|
/// - Zebra can advertise unreachable addresses to its own peers.
|
|
|
|
///
|
|
|
|
/// ## Adding Addresses
|
|
|
|
///
|
|
|
|
/// The address book should only contain Zcash listener port addresses from peers
|
|
|
|
/// on the configured network. These addresses can come from:
|
|
|
|
/// - DNS seeders
|
|
|
|
/// - addresses gossiped by other peers
|
|
|
|
/// - the canonical address (`Version.address_from`) provided by each peer,
|
|
|
|
/// particularly peers on inbound connections.
|
|
|
|
///
|
|
|
|
/// The remote addresses of inbound connections must not be added to the address
|
|
|
|
/// book, because they contain ephemeral outbound ports, not listener ports.
|
|
|
|
///
|
|
|
|
/// Isolated connections must not add addresses or update the address book.
|
|
|
|
///
|
|
|
|
/// ## Updating Address State
|
|
|
|
///
|
|
|
|
/// Updates to address state must be based on outbound connections to peers.
|
|
|
|
///
|
|
|
|
/// Updates must not be based on:
|
|
|
|
/// - the remote addresses of inbound connections, or
|
|
|
|
/// - the canonical address of any connection.
|
2021-12-19 16:44:43 -08:00
|
|
|
#[derive(Debug)]
|
2019-10-17 15:42:19 -07:00
|
|
|
pub struct AddressBook {
|
2021-12-03 10:09:43 -08:00
|
|
|
/// Peer listener addresses, suitable for outbound connections,
|
|
|
|
/// in connection attempt order.
|
|
|
|
///
|
|
|
|
/// Some peers in this list might have open outbound or inbound connections.
|
|
|
|
///
|
2022-06-02 08:07:35 -07:00
|
|
|
/// We reverse the comparison order, because the standard library
|
|
|
|
/// ([`BTreeMap`](std::collections::BTreeMap)) sorts in ascending order, but
|
|
|
|
/// [`OrderedMap`] sorts in descending order.
|
2023-05-14 08:06:07 -07:00
|
|
|
by_addr: OrderedMap<PeerSocketAddr, MetaAddr, Reverse<MetaAddr>>,
|
2021-02-17 17:18:32 -08:00
|
|
|
|
2023-07-05 22:54:10 -07:00
|
|
|
/// The address with a last_connection_state of [`PeerAddrState::Responded`] and
|
|
|
|
/// the most recent `last_response` time by IP.
|
|
|
|
///
|
|
|
|
/// This is used to avoid initiating outbound connections past [`Config::max_connections_per_ip`](crate::config::Config), and
|
|
|
|
/// currently only supports a `max_connections_per_ip` of 1, and must be `None` when used with a greater `max_connections_per_ip`.
|
|
|
|
// TODO: Replace with `by_ip: HashMap<IpAddr, BTreeMap<DateTime32, MetaAddr>>` to support configured `max_connections_per_ip` greater than 1
|
|
|
|
most_recent_by_ip: Option<HashMap<IpAddr, MetaAddr>>,
|
|
|
|
|
2022-06-13 21:58:37 -07:00
|
|
|
/// The local listener address.
|
|
|
|
local_listener: SocketAddr,
|
|
|
|
|
|
|
|
/// The configured Zcash network.
|
|
|
|
network: Network,
|
|
|
|
|
2021-12-06 11:09:10 -08:00
|
|
|
/// The maximum number of addresses in the address book.
|
|
|
|
///
|
|
|
|
/// Always set to [`MAX_ADDRS_IN_ADDRESS_BOOK`](constants::MAX_ADDRS_IN_ADDRESS_BOOK),
|
|
|
|
/// in release builds. Lower values are used during testing.
|
|
|
|
addr_limit: usize,
|
|
|
|
|
2021-02-17 17:18:32 -08:00
|
|
|
/// The span for operations on this address book.
|
2019-10-21 15:56:16 -07:00
|
|
|
span: Span,
|
2021-03-15 05:31:25 -07:00
|
|
|
|
2021-12-19 16:44:43 -08:00
|
|
|
/// A channel used to send the latest address book metrics.
|
|
|
|
address_metrics_tx: watch::Sender<AddressMetrics>,
|
|
|
|
|
2021-05-06 18:08:06 -07:00
|
|
|
/// The last time we logged a message about the address metrics.
|
2021-03-15 05:31:25 -07:00
|
|
|
last_address_log: Option<Instant>,
|
2019-10-17 15:42:19 -07:00
|
|
|
}
|
|
|
|
|
2021-03-15 04:52:58 -07:00
|
|
|
/// Metrics about the states of the addresses in an [`AddressBook`].
|
2021-12-19 16:44:43 -08:00
|
|
|
#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
|
2021-03-15 04:52:58 -07:00
|
|
|
pub struct AddressMetrics {
|
|
|
|
/// The number of addresses in the `Responded` state.
|
2023-04-13 01:42:17 -07:00
|
|
|
pub responded: usize,
|
2021-03-15 04:52:58 -07:00
|
|
|
|
2021-05-06 17:50:04 -07:00
|
|
|
/// The number of addresses in the `NeverAttemptedGossiped` state.
|
2023-04-13 01:42:17 -07:00
|
|
|
pub never_attempted_gossiped: usize,
|
2021-05-06 17:50:04 -07:00
|
|
|
|
2021-03-15 04:52:58 -07:00
|
|
|
/// The number of addresses in the `Failed` state.
|
2023-04-13 01:42:17 -07:00
|
|
|
pub failed: usize,
|
2021-03-15 04:52:58 -07:00
|
|
|
|
|
|
|
/// The number of addresses in the `AttemptPending` state.
|
2023-04-13 01:42:17 -07:00
|
|
|
pub attempt_pending: usize,
|
2021-03-15 04:52:58 -07:00
|
|
|
|
|
|
|
/// The number of `Responded` addresses within the liveness limit.
|
2023-04-13 01:42:17 -07:00
|
|
|
pub recently_live: usize,
|
2021-03-15 04:52:58 -07:00
|
|
|
|
|
|
|
/// The number of `Responded` addresses outside the liveness limit.
|
2023-04-13 01:42:17 -07:00
|
|
|
pub recently_stopped_responding: usize,
|
|
|
|
|
|
|
|
/// The number of addresses in the address book, regardless of their states.
|
|
|
|
pub num_addresses: usize,
|
|
|
|
|
|
|
|
/// The maximum number of addresses in the address book.
|
|
|
|
pub address_limit: usize,
|
2021-03-15 04:52:58 -07:00
|
|
|
}
|
|
|
|
|
2020-02-04 22:53:24 -08:00
|
|
|
#[allow(clippy::len_without_is_empty)]
|
2019-10-17 15:42:19 -07:00
|
|
|
impl AddressBook {
|
2022-06-13 21:58:37 -07:00
|
|
|
/// Construct an [`AddressBook`] with the given `local_listener` on `network`.
|
|
|
|
///
|
|
|
|
/// Uses the supplied [`tracing::Span`] for address book operations.
|
2023-07-05 22:54:10 -07:00
|
|
|
pub fn new(
|
|
|
|
local_listener: SocketAddr,
|
2024-03-19 13:45:27 -07:00
|
|
|
network: &Network,
|
2023-07-05 22:54:10 -07:00
|
|
|
max_connections_per_ip: usize,
|
|
|
|
span: Span,
|
|
|
|
) -> AddressBook {
|
2021-02-17 17:18:32 -08:00
|
|
|
let constructor_span = span.clone();
|
|
|
|
let _guard = constructor_span.enter();
|
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
let instant_now = Instant::now();
|
|
|
|
let chrono_now = Utc::now();
|
|
|
|
|
2021-12-19 16:44:43 -08:00
|
|
|
// The default value is correct for an empty address book,
|
|
|
|
// and it gets replaced by `update_metrics` anyway.
|
|
|
|
let (address_metrics_tx, _address_metrics_rx) = watch::channel(AddressMetrics::default());
|
|
|
|
|
2023-07-05 22:54:10 -07:00
|
|
|
// Avoid initiating outbound handshakes when max_connections_per_ip is 1.
|
|
|
|
let should_limit_outbound_conns_per_ip = max_connections_per_ip == 1;
|
2021-03-15 05:31:25 -07:00
|
|
|
let mut new_book = AddressBook {
|
2021-12-03 10:09:43 -08:00
|
|
|
by_addr: OrderedMap::new(|meta_addr| Reverse(*meta_addr)),
|
2021-06-22 14:59:06 -07:00
|
|
|
local_listener: canonical_socket_addr(local_listener),
|
2024-03-19 13:45:27 -07:00
|
|
|
network: network.clone(),
|
2022-06-13 21:58:37 -07:00
|
|
|
addr_limit: constants::MAX_ADDRS_IN_ADDRESS_BOOK,
|
2019-10-21 15:56:16 -07:00
|
|
|
span,
|
2021-12-19 16:44:43 -08:00
|
|
|
address_metrics_tx,
|
2021-03-15 05:31:25 -07:00
|
|
|
last_address_log: None,
|
2023-07-05 22:54:10 -07:00
|
|
|
most_recent_by_ip: should_limit_outbound_conns_per_ip.then(HashMap::new),
|
2021-02-17 17:18:32 -08:00
|
|
|
};
|
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
new_book.update_metrics(instant_now, chrono_now);
|
2021-02-17 17:18:32 -08:00
|
|
|
new_book
|
2019-10-21 15:56:16 -07:00
|
|
|
}
|
|
|
|
|
2022-06-13 21:58:37 -07:00
|
|
|
/// Construct an [`AddressBook`] with the given `local_listener`, `network`,
|
2021-12-06 11:09:10 -08:00
|
|
|
/// `addr_limit`, [`tracing::Span`], and addresses.
|
|
|
|
///
|
|
|
|
/// `addr_limit` is enforced by this method, and by [`AddressBook::update`].
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
///
|
2021-12-03 10:09:43 -08:00
|
|
|
/// If there are multiple [`MetaAddr`]s with the same address,
|
|
|
|
/// an arbitrary address is inserted into the address book,
|
|
|
|
/// and the rest are dropped.
|
|
|
|
///
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
/// This constructor can be used to break address book invariants,
|
|
|
|
/// so it should only be used in tests.
|
|
|
|
#[cfg(any(test, feature = "proptest-impl"))]
|
|
|
|
pub fn new_with_addrs(
|
2021-06-22 14:59:06 -07:00
|
|
|
local_listener: SocketAddr,
|
2024-03-19 13:45:27 -07:00
|
|
|
network: &Network,
|
2023-07-05 22:54:10 -07:00
|
|
|
max_connections_per_ip: usize,
|
2021-12-06 11:09:10 -08:00
|
|
|
addr_limit: usize,
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
span: Span,
|
|
|
|
addrs: impl IntoIterator<Item = MetaAddr>,
|
|
|
|
) -> AddressBook {
|
2021-12-03 10:09:43 -08:00
|
|
|
let constructor_span = span.clone();
|
|
|
|
let _guard = constructor_span.enter();
|
|
|
|
|
|
|
|
let instant_now = Instant::now();
|
|
|
|
let chrono_now = Utc::now();
|
|
|
|
|
2023-05-19 01:36:09 -07:00
|
|
|
// The maximum number of addresses should be always greater than 0
|
|
|
|
assert!(addr_limit > 0);
|
|
|
|
|
2023-07-05 22:54:10 -07:00
|
|
|
let mut new_book = AddressBook::new(local_listener, network, max_connections_per_ip, span);
|
2021-12-06 11:09:10 -08:00
|
|
|
new_book.addr_limit = addr_limit;
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
|
|
|
|
let addrs = addrs
|
|
|
|
.into_iter()
|
2021-06-21 19:16:59 -07:00
|
|
|
.map(|mut meta_addr| {
|
2023-05-14 08:06:07 -07:00
|
|
|
meta_addr.addr = canonical_peer_addr(meta_addr.addr);
|
2021-06-21 19:16:59 -07:00
|
|
|
meta_addr
|
|
|
|
})
|
2022-06-13 21:58:37 -07:00
|
|
|
.filter(|meta_addr| meta_addr.address_is_valid_for_outbound(network))
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
.map(|meta_addr| (meta_addr.addr, meta_addr));
|
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
for (socket_addr, meta_addr) in addrs {
|
|
|
|
// overwrite any duplicate addresses
|
|
|
|
new_book.by_addr.insert(socket_addr, meta_addr);
|
2023-07-05 22:54:10 -07:00
|
|
|
// Add the address to `most_recent_by_ip` if it has responded
|
|
|
|
if new_book.should_update_most_recent_by_ip(meta_addr) {
|
|
|
|
new_book
|
|
|
|
.most_recent_by_ip
|
|
|
|
.as_mut()
|
|
|
|
.expect("should be some when should_update_most_recent_by_ip is true")
|
|
|
|
.insert(socket_addr.ip(), meta_addr);
|
|
|
|
}
|
2023-05-19 01:36:09 -07:00
|
|
|
// exit as soon as we get enough addresses
|
|
|
|
if new_book.by_addr.len() >= addr_limit {
|
|
|
|
break;
|
|
|
|
}
|
2021-12-03 10:09:43 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
new_book.update_metrics(instant_now, chrono_now);
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
new_book
|
|
|
|
}
|
|
|
|
|
2021-12-19 16:44:43 -08:00
|
|
|
/// Return a watch channel for the address book metrics.
|
|
|
|
///
|
|
|
|
/// The metrics in the watch channel are only updated when the address book updates,
|
|
|
|
/// so they can be significantly outdated if Zebra is disconnected or hung.
|
|
|
|
///
|
|
|
|
/// The current metrics value is marked as seen.
|
|
|
|
/// So `Receiver::changed` will only return after the next address book update.
|
|
|
|
pub fn address_metrics_watcher(&self) -> watch::Receiver<AddressMetrics> {
|
|
|
|
self.address_metrics_tx.subscribe()
|
|
|
|
}
|
|
|
|
|
2023-04-25 05:50:38 -07:00
|
|
|
/// Set the local listener address. Only for use in tests.
|
|
|
|
#[cfg(any(test, feature = "proptest-impl"))]
|
|
|
|
pub fn set_local_listener(&mut self, addr: SocketAddr) {
|
|
|
|
self.local_listener = addr;
|
|
|
|
}
|
|
|
|
|
2021-05-06 18:08:06 -07:00
|
|
|
/// Get the local listener address.
|
2021-06-21 19:16:59 -07:00
|
|
|
///
|
|
|
|
/// This address contains minimal state, but it is not sanitized.
|
2023-05-24 16:53:53 -07:00
|
|
|
pub fn local_listener_meta_addr(&self, now: chrono::DateTime<Utc>) -> MetaAddr {
|
|
|
|
let now: DateTime32 = now.try_into().expect("will succeed until 2038");
|
|
|
|
|
2023-05-14 08:06:07 -07:00
|
|
|
MetaAddr::new_local_listener_change(self.local_listener)
|
2023-05-24 16:53:53 -07:00
|
|
|
.local_listener_into_new_meta_addr(now)
|
2021-05-06 18:08:06 -07:00
|
|
|
}
|
|
|
|
|
2023-05-14 08:06:07 -07:00
|
|
|
/// Get the local listener [`SocketAddr`].
|
|
|
|
pub fn local_listener_socket_addr(&self) -> SocketAddr {
|
|
|
|
self.local_listener
|
|
|
|
}
|
|
|
|
|
2023-06-06 01:28:14 -07:00
|
|
|
/// Get the active addresses in `self` in random order with sanitized timestamps,
|
|
|
|
/// including our local listener address.
|
2023-11-20 19:32:23 -08:00
|
|
|
///
|
|
|
|
/// Limited to a the number of peer addresses Zebra should give out per `GetAddr` request.
|
|
|
|
pub fn fresh_get_addr_response(&self) -> Vec<MetaAddr> {
|
|
|
|
let now = Utc::now();
|
|
|
|
let mut peers = self.sanitized(now);
|
|
|
|
let address_limit = peers.len().div_ceil(ADDR_RESPONSE_LIMIT_DENOMINATOR);
|
|
|
|
peers.truncate(MAX_ADDRS_IN_MESSAGE.min(address_limit));
|
|
|
|
|
|
|
|
peers
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the active addresses in `self` in random order with sanitized timestamps,
|
|
|
|
/// including our local listener address.
|
|
|
|
pub(crate) fn sanitized(&self, now: chrono::DateTime<Utc>) -> Vec<MetaAddr> {
|
2020-08-11 13:07:44 -07:00
|
|
|
use rand::seq::SliceRandom;
|
2021-02-17 17:18:32 -08:00
|
|
|
let _guard = self.span.enter();
|
2021-06-21 19:16:59 -07:00
|
|
|
|
|
|
|
let mut peers = self.by_addr.clone();
|
|
|
|
|
|
|
|
// Unconditionally add our local listener address to the advertised peers,
|
|
|
|
// to replace any self-connection failures. The address book and change
|
|
|
|
// constructors make sure that the SocketAddr is canonical.
|
2023-05-24 16:53:53 -07:00
|
|
|
let local_listener = self.local_listener_meta_addr(now);
|
2021-06-21 19:16:59 -07:00
|
|
|
peers.insert(local_listener.addr, local_listener);
|
|
|
|
|
|
|
|
// Then sanitize and shuffle
|
2023-06-06 01:28:14 -07:00
|
|
|
let mut peers: Vec<MetaAddr> = peers
|
2021-12-03 10:09:43 -08:00
|
|
|
.descending_values()
|
2024-03-19 13:45:27 -07:00
|
|
|
.filter_map(|meta_addr| meta_addr.sanitize(&self.network))
|
2023-06-06 01:28:14 -07:00
|
|
|
// # Security
|
|
|
|
//
|
|
|
|
// Remove peers that:
|
2021-06-28 22:12:27 -07:00
|
|
|
// - last responded more than three hours ago, or
|
|
|
|
// - haven't responded yet but were reported last seen more than three hours ago
|
|
|
|
//
|
|
|
|
// This prevents Zebra from gossiping nodes that are likely unreachable. Gossiping such
|
|
|
|
// nodes impacts the network health, because connection attempts end up being wasted on
|
|
|
|
// peers that are less likely to respond.
|
2021-12-03 10:09:43 -08:00
|
|
|
.filter(|addr| addr.is_active_for_gossip(now))
|
2023-06-06 01:28:14 -07:00
|
|
|
.collect();
|
|
|
|
|
2020-08-11 13:07:44 -07:00
|
|
|
peers.shuffle(&mut rand::thread_rng());
|
2023-06-06 01:28:14 -07:00
|
|
|
|
2020-08-11 13:07:44 -07:00
|
|
|
peers
|
|
|
|
}
|
|
|
|
|
2023-06-06 01:28:14 -07:00
|
|
|
/// Get the active addresses in `self`, in preferred caching order,
|
|
|
|
/// excluding our local listener address.
|
|
|
|
pub fn cacheable(&self, now: chrono::DateTime<Utc>) -> Vec<MetaAddr> {
|
|
|
|
let _guard = self.span.enter();
|
|
|
|
|
|
|
|
let peers = self.by_addr.clone();
|
|
|
|
|
|
|
|
// Get peers in preferred order, then keep the recently active ones
|
|
|
|
peers
|
|
|
|
.descending_values()
|
|
|
|
// # Security
|
|
|
|
//
|
|
|
|
// Remove peers that:
|
|
|
|
// - last responded more than three hours ago, or
|
|
|
|
// - haven't responded yet but were reported last seen more than three hours ago
|
|
|
|
//
|
|
|
|
// This prevents Zebra from caching nodes that are likely unreachable,
|
|
|
|
// which improves startup time and reliability.
|
|
|
|
.filter(|addr| addr.is_active_for_gossip(now))
|
|
|
|
.cloned()
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
/// Look up `addr` in the address book, and return its [`MetaAddr`].
|
|
|
|
///
|
|
|
|
/// Converts `addr` to a canonical address before looking it up.
|
2023-05-14 08:06:07 -07:00
|
|
|
pub fn get(&mut self, addr: PeerSocketAddr) -> Option<MetaAddr> {
|
|
|
|
let addr = canonical_peer_addr(*addr);
|
2021-12-03 10:09:43 -08:00
|
|
|
|
|
|
|
// Unfortunately, `OrderedMap` doesn't implement `get`.
|
|
|
|
let meta_addr = self.by_addr.remove(&addr);
|
|
|
|
|
|
|
|
if let Some(meta_addr) = meta_addr {
|
|
|
|
self.by_addr.insert(addr, meta_addr);
|
|
|
|
}
|
|
|
|
|
|
|
|
meta_addr
|
|
|
|
}
|
|
|
|
|
2023-07-05 22:54:10 -07:00
|
|
|
/// Returns true if `updated` needs to be applied to the recent outbound peer connection IP cache.
|
|
|
|
///
|
|
|
|
/// Checks if there are no existing entries in the address book with this IP,
|
|
|
|
/// or if `updated` has a more recent `last_response` requiring the outbound connector to wait
|
|
|
|
/// longer before initiating handshakes with peers at this IP.
|
|
|
|
///
|
|
|
|
/// This code only needs to check a single cache entry, rather than the entire address book,
|
|
|
|
/// because other code maintains these invariants:
|
|
|
|
/// - `last_response` times for an entry can only increase.
|
|
|
|
/// - this is the only field checked by `has_connection_recently_responded()`
|
|
|
|
///
|
|
|
|
/// See [`AddressBook::is_ready_for_connection_attempt_with_ip`] for more details.
|
|
|
|
fn should_update_most_recent_by_ip(&self, updated: MetaAddr) -> bool {
|
|
|
|
let Some(most_recent_by_ip) = self.most_recent_by_ip.as_ref() else {
|
2023-08-24 21:08:13 -07:00
|
|
|
return false;
|
2023-07-05 22:54:10 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
if let Some(previous) = most_recent_by_ip.get(&updated.addr.ip()) {
|
|
|
|
updated.last_connection_state == PeerAddrState::Responded
|
|
|
|
&& updated.last_response() > previous.last_response()
|
|
|
|
} else {
|
|
|
|
updated.last_connection_state == PeerAddrState::Responded
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if `addr` is the latest entry for its IP, which is stored in `most_recent_by_ip`.
|
|
|
|
/// The entry is checked for an exact match to the IP and port of `addr`.
|
|
|
|
fn should_remove_most_recent_by_ip(&self, addr: PeerSocketAddr) -> bool {
|
|
|
|
let Some(most_recent_by_ip) = self.most_recent_by_ip.as_ref() else {
|
2023-08-24 21:08:13 -07:00
|
|
|
return false;
|
2023-07-05 22:54:10 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
if let Some(previous) = most_recent_by_ip.get(&addr.ip()) {
|
|
|
|
previous.addr == addr
|
|
|
|
} else {
|
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-14 20:31:16 -07:00
|
|
|
/// Apply `change` to the address book, returning the updated `MetaAddr`,
|
|
|
|
/// if the change was valid.
|
2021-02-17 17:18:32 -08:00
|
|
|
///
|
2021-05-06 17:50:04 -07:00
|
|
|
/// # Correctness
|
2021-02-17 17:18:32 -08:00
|
|
|
///
|
2021-06-14 20:31:16 -07:00
|
|
|
/// All changes should go through `update`, so that the address book
|
2021-05-06 17:50:04 -07:00
|
|
|
/// only contains valid outbound addresses.
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
///
|
2023-05-14 08:06:07 -07:00
|
|
|
/// Change addresses must be canonical `PeerSocketAddr`s. This makes sure that
|
2021-06-21 19:16:59 -07:00
|
|
|
/// each address book entry has a unique IP address.
|
|
|
|
///
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
/// # Security
|
|
|
|
///
|
|
|
|
/// This function must apply every attempted, responded, and failed change
|
|
|
|
/// to the address book. This prevents rapid reconnections to the same peer.
|
|
|
|
///
|
|
|
|
/// As an exception, this function can ignore all changes for specific
|
2023-05-14 08:06:07 -07:00
|
|
|
/// [`PeerSocketAddr`]s. Ignored addresses will never be used to connect to
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
/// peers.
|
2022-06-27 23:22:07 -07:00
|
|
|
#[allow(clippy::unwrap_in_result)]
|
2021-06-14 20:31:16 -07:00
|
|
|
pub fn update(&mut self, change: MetaAddrChange) -> Option<MetaAddr> {
|
2023-05-14 08:06:07 -07:00
|
|
|
let previous = self.get(change.addr());
|
2021-12-03 10:09:43 -08:00
|
|
|
|
2019-10-21 15:56:16 -07:00
|
|
|
let _guard = self.span.enter();
|
2021-06-14 20:31:16 -07:00
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
let instant_now = Instant::now();
|
|
|
|
let chrono_now = Utc::now();
|
|
|
|
|
2023-05-24 16:53:53 -07:00
|
|
|
let updated = change.apply_to_meta_addr(previous, instant_now, chrono_now);
|
2021-06-14 20:31:16 -07:00
|
|
|
|
2019-10-17 21:19:23 -07:00
|
|
|
trace!(
|
2021-06-14 20:31:16 -07:00
|
|
|
?change,
|
|
|
|
?updated,
|
|
|
|
?previous,
|
2021-02-17 17:18:32 -08:00
|
|
|
total_peers = self.by_addr.len(),
|
2023-01-16 23:09:07 -08:00
|
|
|
recent_peers = self.recently_live_peers(chrono_now).len(),
|
2022-01-04 15:43:30 -08:00
|
|
|
"calculated updated address book entry",
|
2019-10-17 15:42:19 -07:00
|
|
|
);
|
|
|
|
|
2021-06-14 20:31:16 -07:00
|
|
|
if let Some(updated) = updated {
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
// Ignore invalid outbound addresses.
|
|
|
|
// (Inbound connections can be monitored via Zebra's metrics.)
|
2024-03-19 13:45:27 -07:00
|
|
|
if !updated.address_is_valid_for_outbound(&self.network) {
|
2021-06-14 20:31:16 -07:00
|
|
|
return None;
|
|
|
|
}
|
2021-05-06 17:50:04 -07:00
|
|
|
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
// Ignore invalid outbound services and other info,
|
|
|
|
// but only if the peer has never been attempted.
|
2021-06-14 20:31:16 -07:00
|
|
|
//
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
// Otherwise, if we got the info directly from the peer,
|
|
|
|
// store it in the address book, so we know not to reconnect.
|
2024-03-19 13:45:27 -07:00
|
|
|
if !updated.last_known_info_is_valid_for_outbound(&self.network)
|
Security: Limit reconnection rate to individual peers (#2275)
* Security: Limit reconnection rate to individual peers
Reconnection Rate
Limit the reconnection rate to each individual peer by applying the
liveness cutoff to the attempt, responded, and failure time fields.
If any field is recent, the peer is skipped.
The new liveness cutoff skips any peers that have recently been attempted
or failed. (Previously, the liveness check was only applied if the peer
was in the `Responded` state, which could lead to repeated retries of
`Failed` peers, particularly in small address books.)
Reconnection Order
Zebra prefers more useful peer states, then the earliest attempted,
failed, and responded times, then the most recent gossiped last seen
times.
Before this change, Zebra took the most recent time in all the peer time
fields, and used that time for liveness and ordering. This led to
confusion between trusted and untrusted data, and success and failure
times.
Unlike the previous order, the new order:
- tries all peers in each state, before re-trying any peer in that state,
and
- only checks the the gossiped untrusted last seen time
if all other times are equal.
* Preserve the later time if changes arrive out of order
* Update CandidateSet::next documentation
* Update CandidateSet state diagram
* Fix variant names in comments
* Explain why timestamps can be left out of MetaAddrChanges
* Add a simple test for the individual peer retry limit
* Only generate valid Arbitrary PeerServices values
* Add an individual peer retry limit AddressBook and CandidateSet test
* Stop deleting recently live addresses from the address book
If we delete recently live addresses from the address book, we can get a
new entry for them, and reconnect too rapidly.
* Rename functions to match similar tokio API
* Fix docs for service sorting
* Clarify a comment
* Cleanup a variable and comments
* Remove blank lines in the CandidateSet state diagram
* Add a multi-peer proptest that checks outbound attempt fairness
* Fix a comment typo
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
* Simplify time maths in MetaAddr
* Create a Duration32 type to simplify calculations and comparisons
* Rename variables for clarity
* Split a string constant into multiple lines
* Make constants match rustdoc order
Co-authored-by: Janito Vaqueiro Ferreira Filho <janito.vff@gmail.com>
2021-06-18 05:30:44 -07:00
|
|
|
&& updated.last_connection_state.is_never_attempted()
|
|
|
|
{
|
2021-06-14 20:31:16 -07:00
|
|
|
return None;
|
2019-10-17 15:42:19 -07:00
|
|
|
}
|
2021-06-14 20:31:16 -07:00
|
|
|
|
|
|
|
self.by_addr.insert(updated.addr, updated);
|
2021-12-06 11:09:10 -08:00
|
|
|
|
2023-07-05 22:54:10 -07:00
|
|
|
// Add the address to `most_recent_by_ip` if it sent the most recent
|
|
|
|
// response Zebra has received from this IP.
|
|
|
|
if self.should_update_most_recent_by_ip(updated) {
|
|
|
|
self.most_recent_by_ip
|
|
|
|
.as_mut()
|
|
|
|
.expect("should be some when should_update_most_recent_by_ip is true")
|
|
|
|
.insert(updated.addr.ip(), updated);
|
|
|
|
}
|
|
|
|
|
2022-01-04 15:43:30 -08:00
|
|
|
debug!(
|
|
|
|
?change,
|
|
|
|
?updated,
|
|
|
|
?previous,
|
|
|
|
total_peers = self.by_addr.len(),
|
2023-01-16 23:09:07 -08:00
|
|
|
recent_peers = self.recently_live_peers(chrono_now).len(),
|
2022-01-04 15:43:30 -08:00
|
|
|
"updated address book entry",
|
|
|
|
);
|
|
|
|
|
2021-12-06 11:09:10 -08:00
|
|
|
// Security: Limit the number of peers in the address book.
|
|
|
|
//
|
|
|
|
// We only delete outdated peers when we have too many peers.
|
|
|
|
// If we deleted them as soon as they became too old,
|
|
|
|
// then other peers could re-insert them into the address book.
|
|
|
|
// And we would start connecting to those outdated peers again,
|
|
|
|
// ignoring the age limit in [`MetaAddr::is_probably_reachable`].
|
|
|
|
while self.by_addr.len() > self.addr_limit {
|
|
|
|
let surplus_peer = self
|
|
|
|
.peers()
|
|
|
|
.next_back()
|
|
|
|
.expect("just checked there is at least one peer");
|
|
|
|
|
|
|
|
self.by_addr.remove(&surplus_peer.addr);
|
2022-01-04 15:43:30 -08:00
|
|
|
|
2023-07-05 22:54:10 -07:00
|
|
|
// Check if this surplus peer's addr matches that in `most_recent_by_ip`
|
|
|
|
// for this the surplus peer's ip to remove it there as well.
|
|
|
|
if self.should_remove_most_recent_by_ip(surplus_peer.addr) {
|
|
|
|
self.most_recent_by_ip
|
|
|
|
.as_mut()
|
|
|
|
.expect("should be some when should_remove_most_recent_by_ip is true")
|
|
|
|
.remove(&surplus_peer.addr.ip());
|
|
|
|
}
|
|
|
|
|
2022-01-04 15:43:30 -08:00
|
|
|
debug!(
|
|
|
|
surplus = ?surplus_peer,
|
|
|
|
?updated,
|
|
|
|
total_peers = self.by_addr.len(),
|
2023-01-16 23:09:07 -08:00
|
|
|
recent_peers = self.recently_live_peers(chrono_now).len(),
|
2022-01-04 15:43:30 -08:00
|
|
|
"removed surplus address book entry",
|
|
|
|
);
|
2021-12-06 11:09:10 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
assert!(self.len() <= self.addr_limit);
|
|
|
|
|
2021-06-14 20:31:16 -07:00
|
|
|
std::mem::drop(_guard);
|
2021-12-03 10:09:43 -08:00
|
|
|
self.update_metrics(instant_now, chrono_now);
|
2019-10-17 15:42:19 -07:00
|
|
|
}
|
2019-10-18 11:04:38 -07:00
|
|
|
|
2021-06-14 20:31:16 -07:00
|
|
|
updated
|
2021-02-17 17:18:32 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Removes the entry with `addr`, returning it if it exists
|
|
|
|
///
|
2021-05-06 17:50:04 -07:00
|
|
|
/// # Note
|
2021-02-17 17:18:32 -08:00
|
|
|
///
|
2021-05-06 17:50:04 -07:00
|
|
|
/// All address removals should go through `take`, so that the address
|
|
|
|
/// book metrics are accurate.
|
2021-06-21 19:16:59 -07:00
|
|
|
#[allow(dead_code)]
|
2023-05-14 08:06:07 -07:00
|
|
|
fn take(&mut self, removed_addr: PeerSocketAddr) -> Option<MetaAddr> {
|
2021-02-17 17:18:32 -08:00
|
|
|
let _guard = self.span.enter();
|
2021-12-03 10:09:43 -08:00
|
|
|
|
|
|
|
let instant_now = Instant::now();
|
|
|
|
let chrono_now = Utc::now();
|
|
|
|
|
2021-02-17 17:18:32 -08:00
|
|
|
trace!(
|
|
|
|
?removed_addr,
|
|
|
|
total_peers = self.by_addr.len(),
|
2023-01-16 23:09:07 -08:00
|
|
|
recent_peers = self.recently_live_peers(chrono_now).len(),
|
2021-02-17 17:18:32 -08:00
|
|
|
);
|
|
|
|
|
|
|
|
if let Some(entry) = self.by_addr.remove(&removed_addr) {
|
2023-07-05 22:54:10 -07:00
|
|
|
// Check if this surplus peer's addr matches that in `most_recent_by_ip`
|
|
|
|
// for this the surplus peer's ip to remove it there as well.
|
|
|
|
if self.should_remove_most_recent_by_ip(entry.addr) {
|
|
|
|
if let Some(most_recent_by_ip) = self.most_recent_by_ip.as_mut() {
|
|
|
|
most_recent_by_ip.remove(&entry.addr.ip());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-15 05:31:25 -07:00
|
|
|
std::mem::drop(_guard);
|
2021-12-03 10:09:43 -08:00
|
|
|
self.update_metrics(instant_now, chrono_now);
|
2021-02-17 17:18:32 -08:00
|
|
|
Some(entry)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
2019-10-17 15:42:19 -07:00
|
|
|
}
|
2019-10-17 16:25:24 -07:00
|
|
|
|
2023-05-14 08:06:07 -07:00
|
|
|
/// Returns true if the given [`PeerSocketAddr`] is pending a reconnection
|
2021-02-17 17:18:32 -08:00
|
|
|
/// attempt.
|
2023-05-14 08:06:07 -07:00
|
|
|
pub fn pending_reconnection_addr(&mut self, addr: PeerSocketAddr) -> bool {
|
2021-12-03 10:09:43 -08:00
|
|
|
let meta_addr = self.get(addr);
|
|
|
|
|
2019-10-21 15:56:16 -07:00
|
|
|
let _guard = self.span.enter();
|
2021-12-03 10:09:43 -08:00
|
|
|
match meta_addr {
|
2020-02-04 22:53:24 -08:00
|
|
|
None => false,
|
2021-02-17 17:18:32 -08:00
|
|
|
Some(peer) => peer.last_connection_state == PeerAddrState::AttemptPending,
|
2019-10-18 09:54:34 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-17 17:18:32 -08:00
|
|
|
/// Return an iterator over all peers.
|
|
|
|
///
|
2021-12-03 10:09:43 -08:00
|
|
|
/// Returns peers in reconnection attempt order, including recently connected peers.
|
2023-12-04 20:06:42 -08:00
|
|
|
pub fn peers(&'_ self) -> impl DoubleEndedIterator<Item = MetaAddr> + '_ {
|
2019-10-21 15:56:16 -07:00
|
|
|
let _guard = self.span.enter();
|
2021-12-03 10:09:43 -08:00
|
|
|
self.by_addr.descending_values().cloned()
|
2019-10-17 16:25:24 -07:00
|
|
|
}
|
|
|
|
|
2023-07-05 22:54:10 -07:00
|
|
|
/// Is this IP ready for a new outbound connection attempt?
|
|
|
|
/// Checks if the outbound connection with the most recent response at this IP has recently responded.
|
|
|
|
///
|
|
|
|
/// Note: last_response times may remain live for a long time if the local clock is changed to an earlier time.
|
|
|
|
fn is_ready_for_connection_attempt_with_ip(
|
|
|
|
&self,
|
|
|
|
ip: &IpAddr,
|
|
|
|
chrono_now: chrono::DateTime<Utc>,
|
|
|
|
) -> bool {
|
|
|
|
let Some(most_recent_by_ip) = self.most_recent_by_ip.as_ref() else {
|
|
|
|
// if we're not checking IPs, any connection is allowed
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
let Some(same_ip_peer) = most_recent_by_ip.get(ip) else {
|
|
|
|
// If there's no entry for this IP, any connection is allowed
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
!same_ip_peer.has_connection_recently_responded(chrono_now)
|
|
|
|
}
|
|
|
|
|
2021-02-17 17:18:32 -08:00
|
|
|
/// Return an iterator over peers that are due for a reconnection attempt,
|
|
|
|
/// in reconnection attempt order.
|
2021-12-03 10:09:43 -08:00
|
|
|
pub fn reconnection_peers(
|
|
|
|
&'_ self,
|
|
|
|
instant_now: Instant,
|
|
|
|
chrono_now: chrono::DateTime<Utc>,
|
2023-12-04 20:06:42 -08:00
|
|
|
) -> impl DoubleEndedIterator<Item = MetaAddr> + '_ {
|
2019-10-21 15:56:16 -07:00
|
|
|
let _guard = self.span.enter();
|
2019-10-17 16:25:24 -07:00
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
// Skip live peers, and peers pending a reconnect attempt.
|
|
|
|
// The peers are already stored in sorted order.
|
2021-02-17 17:18:32 -08:00
|
|
|
self.by_addr
|
2021-12-03 10:09:43 -08:00
|
|
|
.descending_values()
|
2022-06-13 21:58:37 -07:00
|
|
|
.filter(move |peer| {
|
2024-03-19 13:45:27 -07:00
|
|
|
peer.is_ready_for_connection_attempt(instant_now, chrono_now, &self.network)
|
2023-07-05 22:54:10 -07:00
|
|
|
&& self.is_ready_for_connection_attempt_with_ip(&peer.addr.ip(), chrono_now)
|
2022-06-13 21:58:37 -07:00
|
|
|
})
|
2020-09-03 10:09:25 -07:00
|
|
|
.cloned()
|
|
|
|
}
|
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
/// Return an iterator over all the peers in `state`,
|
|
|
|
/// in reconnection attempt order, including recently connected peers.
|
2021-12-06 11:09:10 -08:00
|
|
|
pub fn state_peers(
|
|
|
|
&'_ self,
|
|
|
|
state: PeerAddrState,
|
2023-12-04 20:06:42 -08:00
|
|
|
) -> impl DoubleEndedIterator<Item = MetaAddr> + '_ {
|
2020-09-03 10:09:25 -07:00
|
|
|
let _guard = self.span.enter();
|
|
|
|
|
2021-02-17 17:18:32 -08:00
|
|
|
self.by_addr
|
2021-12-03 10:09:43 -08:00
|
|
|
.descending_values()
|
2021-02-17 17:18:32 -08:00
|
|
|
.filter(move |peer| peer.last_connection_state == state)
|
2019-10-17 21:19:23 -07:00
|
|
|
.cloned()
|
2019-10-17 16:25:24 -07:00
|
|
|
}
|
2019-10-17 17:54:08 -07:00
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
/// Return an iterator over peers that might be connected,
|
|
|
|
/// in reconnection attempt order.
|
|
|
|
pub fn maybe_connected_peers(
|
|
|
|
&'_ self,
|
|
|
|
instant_now: Instant,
|
|
|
|
chrono_now: chrono::DateTime<Utc>,
|
2023-12-04 20:06:42 -08:00
|
|
|
) -> impl DoubleEndedIterator<Item = MetaAddr> + '_ {
|
2021-02-17 17:18:32 -08:00
|
|
|
let _guard = self.span.enter();
|
|
|
|
|
|
|
|
self.by_addr
|
2021-12-03 10:09:43 -08:00
|
|
|
.descending_values()
|
2022-06-13 21:58:37 -07:00
|
|
|
.filter(move |peer| {
|
2024-03-19 13:45:27 -07:00
|
|
|
!peer.is_ready_for_connection_attempt(instant_now, chrono_now, &self.network)
|
2022-06-13 21:58:37 -07:00
|
|
|
})
|
2021-02-17 17:18:32 -08:00
|
|
|
.cloned()
|
2019-10-18 09:27:28 -07:00
|
|
|
}
|
|
|
|
|
2019-10-21 15:24:17 -07:00
|
|
|
/// Returns the number of entries in this address book.
|
|
|
|
pub fn len(&self) -> usize {
|
2021-02-17 17:18:32 -08:00
|
|
|
self.by_addr.len()
|
|
|
|
}
|
|
|
|
|
2021-03-15 04:52:58 -07:00
|
|
|
/// Returns metrics for the addresses in this address book.
|
2021-12-19 16:44:43 -08:00
|
|
|
/// Only for use in tests.
|
|
|
|
///
|
|
|
|
/// # Correctness
|
|
|
|
///
|
|
|
|
/// Use [`AddressBook::address_metrics_watcher().borrow()`] in production code,
|
|
|
|
/// to avoid deadlocks.
|
|
|
|
#[cfg(test)]
|
2021-12-03 10:09:43 -08:00
|
|
|
pub fn address_metrics(&self, now: chrono::DateTime<Utc>) -> AddressMetrics {
|
2021-12-19 16:44:43 -08:00
|
|
|
self.address_metrics_internal(now)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns metrics for the addresses in this address book.
|
|
|
|
///
|
|
|
|
/// # Correctness
|
|
|
|
///
|
|
|
|
/// External callers should use [`AddressBook::address_metrics_watcher().borrow()`]
|
|
|
|
/// in production code, to avoid deadlocks.
|
|
|
|
/// (Using the watch channel receiver does not lock the address book mutex.)
|
|
|
|
fn address_metrics_internal(&self, now: chrono::DateTime<Utc>) -> AddressMetrics {
|
2021-02-17 17:18:32 -08:00
|
|
|
let responded = self.state_peers(PeerAddrState::Responded).count();
|
2021-05-06 17:50:04 -07:00
|
|
|
let never_attempted_gossiped = self
|
|
|
|
.state_peers(PeerAddrState::NeverAttemptedGossiped)
|
|
|
|
.count();
|
2021-02-17 17:18:32 -08:00
|
|
|
let failed = self.state_peers(PeerAddrState::Failed).count();
|
2021-03-15 04:52:58 -07:00
|
|
|
let attempt_pending = self.state_peers(PeerAddrState::AttemptPending).count();
|
2021-02-17 17:18:32 -08:00
|
|
|
|
2023-01-16 23:09:07 -08:00
|
|
|
let recently_live = self.recently_live_peers(now).len();
|
2021-02-17 17:18:32 -08:00
|
|
|
let recently_stopped_responding = responded
|
|
|
|
.checked_sub(recently_live)
|
|
|
|
.expect("all recently live peers must have responded");
|
|
|
|
|
2023-04-13 01:42:17 -07:00
|
|
|
let num_addresses = self.len();
|
|
|
|
|
2021-03-15 04:52:58 -07:00
|
|
|
AddressMetrics {
|
|
|
|
responded,
|
2021-05-06 17:50:04 -07:00
|
|
|
never_attempted_gossiped,
|
2021-03-15 04:52:58 -07:00
|
|
|
failed,
|
|
|
|
attempt_pending,
|
|
|
|
recently_live,
|
|
|
|
recently_stopped_responding,
|
2023-04-13 01:42:17 -07:00
|
|
|
num_addresses,
|
|
|
|
address_limit: self.addr_limit,
|
2021-03-15 04:52:58 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Update the metrics for this address book.
|
2021-12-03 10:09:43 -08:00
|
|
|
fn update_metrics(&mut self, instant_now: Instant, chrono_now: chrono::DateTime<Utc>) {
|
2021-03-15 04:52:58 -07:00
|
|
|
let _guard = self.span.enter();
|
|
|
|
|
2021-12-19 16:44:43 -08:00
|
|
|
let m = self.address_metrics_internal(chrono_now);
|
|
|
|
|
|
|
|
// Ignore errors: we don't care if any receivers are listening.
|
|
|
|
let _ = self.address_metrics_tx.send(m);
|
2021-03-15 04:52:58 -07:00
|
|
|
|
|
|
|
// TODO: rename to address_book.[state_name]
|
2024-01-01 17:26:54 -08:00
|
|
|
metrics::gauge!("candidate_set.responded").set(m.responded as f64);
|
|
|
|
metrics::gauge!("candidate_set.gossiped").set(m.never_attempted_gossiped as f64);
|
|
|
|
metrics::gauge!("candidate_set.failed").set(m.failed as f64);
|
|
|
|
metrics::gauge!("candidate_set.pending").set(m.attempt_pending as f64);
|
2021-03-15 04:52:58 -07:00
|
|
|
|
2021-02-17 17:18:32 -08:00
|
|
|
// TODO: rename to address_book.responded.recently_live
|
2024-01-01 17:26:54 -08:00
|
|
|
metrics::gauge!("candidate_set.recently_live").set(m.recently_live as f64);
|
2021-02-17 17:18:32 -08:00
|
|
|
// TODO: rename to address_book.responded.stopped_responding
|
2024-01-01 17:26:54 -08:00
|
|
|
metrics::gauge!("candidate_set.disconnected").set(m.recently_stopped_responding as f64);
|
2021-02-17 17:18:32 -08:00
|
|
|
|
2021-03-15 05:31:25 -07:00
|
|
|
std::mem::drop(_guard);
|
2021-12-03 10:09:43 -08:00
|
|
|
self.log_metrics(&m, instant_now);
|
2021-03-15 05:31:25 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Log metrics for this address book
|
2021-12-03 10:09:43 -08:00
|
|
|
fn log_metrics(&mut self, m: &AddressMetrics, now: Instant) {
|
2021-03-15 05:31:25 -07:00
|
|
|
let _guard = self.span.enter();
|
|
|
|
|
|
|
|
trace!(
|
2021-03-15 04:52:58 -07:00
|
|
|
address_metrics = ?m,
|
2021-02-17 17:18:32 -08:00
|
|
|
);
|
2021-03-15 05:31:25 -07:00
|
|
|
|
|
|
|
if m.responded > 0 {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// These logs are designed to be human-readable in a terminal, at the
|
|
|
|
// default Zebra log level. If you need to know address states for
|
|
|
|
// every request, use the trace-level logs, or the metrics exporter.
|
|
|
|
if let Some(last_address_log) = self.last_address_log {
|
|
|
|
// Avoid duplicate address logs
|
2021-12-03 10:09:43 -08:00
|
|
|
if now.saturating_duration_since(last_address_log).as_secs() < 60 {
|
2021-03-15 05:31:25 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// Suppress initial logs until the peer set has started up.
|
|
|
|
// There can be multiple address changes before the first peer has
|
|
|
|
// responded.
|
2021-12-03 10:09:43 -08:00
|
|
|
self.last_address_log = Some(now);
|
2021-03-15 05:31:25 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-12-03 10:09:43 -08:00
|
|
|
self.last_address_log = Some(now);
|
2021-03-15 05:31:25 -07:00
|
|
|
// if all peers have failed
|
2023-11-27 16:30:13 -08:00
|
|
|
if m.responded + m.attempt_pending + m.never_attempted_gossiped == 0 {
|
2021-03-15 05:31:25 -07:00
|
|
|
warn!(
|
|
|
|
address_metrics = ?m,
|
|
|
|
"all peer addresses have failed. Hint: check your network connection"
|
|
|
|
);
|
|
|
|
} else {
|
|
|
|
info!(
|
|
|
|
address_metrics = ?m,
|
|
|
|
"no active peer connections: trying gossiped addresses"
|
|
|
|
);
|
|
|
|
}
|
2019-10-21 15:24:17 -07:00
|
|
|
}
|
2019-10-17 16:25:24 -07:00
|
|
|
}
|
|
|
|
|
2023-01-16 23:09:07 -08:00
|
|
|
impl AddressBookPeers for AddressBook {
|
|
|
|
fn recently_live_peers(&self, now: chrono::DateTime<Utc>) -> Vec<MetaAddr> {
|
|
|
|
let _guard = self.span.enter();
|
|
|
|
|
|
|
|
self.by_addr
|
|
|
|
.descending_values()
|
|
|
|
.filter(|peer| peer.was_recently_live(now))
|
|
|
|
.cloned()
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AddressBookPeers for Arc<Mutex<AddressBook>> {
|
|
|
|
fn recently_live_peers(&self, now: chrono::DateTime<Utc>) -> Vec<MetaAddr> {
|
|
|
|
self.lock()
|
|
|
|
.expect("panic in a previous thread that was holding the mutex")
|
|
|
|
.recently_live_peers(now)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-14 20:31:16 -07:00
|
|
|
impl Extend<MetaAddrChange> for AddressBook {
|
2019-10-17 17:54:08 -07:00
|
|
|
fn extend<T>(&mut self, iter: T)
|
|
|
|
where
|
2021-06-14 20:31:16 -07:00
|
|
|
T: IntoIterator<Item = MetaAddrChange>,
|
2019-10-17 17:54:08 -07:00
|
|
|
{
|
2021-06-14 20:31:16 -07:00
|
|
|
for change in iter.into_iter() {
|
|
|
|
self.update(change);
|
2019-10-17 17:54:08 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-12-19 16:44:43 -08:00
|
|
|
|
|
|
|
impl Clone for AddressBook {
|
|
|
|
/// Clone the addresses, address limit, local listener address, and span.
|
|
|
|
///
|
|
|
|
/// Cloned address books have a separate metrics struct watch channel, and an empty last address log.
|
|
|
|
///
|
|
|
|
/// All address books update the same prometheus metrics.
|
|
|
|
fn clone(&self) -> AddressBook {
|
|
|
|
// The existing metrics might be outdated, but we avoid calling `update_metrics`,
|
|
|
|
// so we don't overwrite the prometheus metrics from the main address book.
|
|
|
|
let (address_metrics_tx, _address_metrics_rx) =
|
|
|
|
watch::channel(*self.address_metrics_tx.borrow());
|
|
|
|
|
|
|
|
AddressBook {
|
|
|
|
by_addr: self.by_addr.clone(),
|
|
|
|
local_listener: self.local_listener,
|
2024-03-19 13:45:27 -07:00
|
|
|
network: self.network.clone(),
|
2022-06-13 21:58:37 -07:00
|
|
|
addr_limit: self.addr_limit,
|
2021-12-19 16:44:43 -08:00
|
|
|
span: self.span.clone(),
|
|
|
|
address_metrics_tx,
|
|
|
|
last_address_log: None,
|
2023-07-05 22:54:10 -07:00
|
|
|
most_recent_by_ip: self.most_recent_by_ip.clone(),
|
2021-12-19 16:44:43 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|