zebra/zebra-network/src/peer_set/candidate_set.rs

use std::{cmp::min, mem, sync::Arc, time::Duration};

use futures::stream::{FuturesUnordered, StreamExt};
use tokio::time::{sleep, sleep_until, timeout, Instant, Sleep};
use tower::{Service, ServiceExt};

use zebra_chain::serialization::DateTime32;

use crate::{constants, types::MetaAddr, AddressBook, BoxError, Request, Response};

#[cfg(test)]
mod tests;

/// The `CandidateSet` manages the `PeerSet`'s peer reconnection attempts.
///
/// It divides the set of all possible candidate peers into disjoint subsets,
/// using the `PeerAddrState`:
///
/// 1. `Responded` peers, which we previously had inbound or outbound connections
///    to. If we have not received any messages from a `Responded` peer within a
///    cutoff time, we assume that it has disconnected or hung, and attempt
///    reconnection;
/// 2. `NeverAttempted` peers, which we learned about from other peers or a DNS
///    seeder, but have never connected to;
/// 3. `Failed` peers, to whom we attempted to connect but were unable to;
/// 4. `AttemptPending` peers, which we've recently queued for reconnection.
///
/// ```ascii,no_run
///                         ┌──────────────────┐
///                         │     PeerSet      │
///                         │GetPeers Responses│
///                         └──────────────────┘
///                                  │
///                                  │
///                                  │
///                                  │
///                                  ▼
///             filter by            Λ
///          !contains_addr         ╱ ╲
///  ┌────────────────────────────▶▕   ▏
///  │                              ╲ ╱
///  │                               V
///  │                               │
///  │                               │
///  │                               │
///  │ ┌──────────────────┐          │
///  │ │     Inbound      │          │
///  │ │ Peer Connections │          │
///  │ └──────────────────┘          │
///  │          │                    │
///  ├──────────┼────────────────────┼───────────────────────────────┐
///  │ PeerSet  ▼  AddressBook       ▼                               │
///  │ ┌─────────────┐       ┌────────────────┐      ┌─────────────┐ │
///  │ │  Possibly   │       │`NeverAttempted`│      │  `Failed`   │ │
///  │ │Disconnected │       │     Peers      │      │   Peers     │◀┼┐
///  │ │ `Responded` │       │                │      │             │ ││
///  │ │    Peers    │       │                │      │             │ ││
///  │ └─────────────┘       └────────────────┘      └─────────────┘ ││
///  │        │                      │                      │        ││
///  │ #1 oldest_first        #2 newest_first        #3 oldest_first ││
///  │        │                      │                      │        ││
///  │        ├──────────────────────┴──────────────────────┘        ││
///  │        │         disjoint `PeerAddrState`s                    ││
///  ├────────┼──────────────────────────────────────────────────────┘│
///  │        ▼                                                       │
///  │        Λ                                                       │
///  │       ╱ ╲         filter by                                    │
///  └─────▶▕   ▏!is_potentially_connected                            │
///          ╲ ╱      to remove live                                  │
///           V      `Responded` peers                                │
///           │                                                       │
///           │ Try outbound connection                               │
///           ▼                                                       │
///    ┌────────────────┐                                             │
///    │`AttemptPending`│                                             │
///    │     Peers      │                                             │
///    │                │                                             │
///    └────────────────┘                                             │
///           │                                                       │
///           │                                                       │
///           ▼                                                       │
///           Λ                                                       │
///          ╱ ╲                                                      │
///         ▕   ▏─────────────────────────────────────────────────────┘
///          ╲ ╱   connection failed, update last_seen to now()
///           V
///           │
///           │
///           ▼
///    ┌────────────┐
///    │    send    │
///    │peer::Client│
///    │to Discover │
///    └────────────┘
///           │
///           │
///           ▼
///  ┌───────────────────────────────────────┐
///  │ every time we receive a peer message: │
///  │  * update state to `Responded`        │
///  │  * update last_seen to now()          │
///  └───────────────────────────────────────┘
///
/// ```
// TODO:
//   * draw arrow from the "peer message" box into the `Responded` state box
//   * make the "disjoint states" box include `AttemptPending`
pub(super) struct CandidateSet<S> {
    pub(super) address_book: Arc<std::sync::Mutex<AddressBook>>,
    pub(super) peer_service: S,
    wait_next_handshake: Sleep,
    min_next_crawl: Instant,
}

impl<S> CandidateSet<S>
where
    S: Service<Request, Response = Response, Error = BoxError>,
    S::Future: Send + 'static,
{
    /// Uses `address_book` and `peer_service` to manage a [`CandidateSet`] of peers.
    pub fn new(
        address_book: Arc<std::sync::Mutex<AddressBook>>,
        peer_service: S,
    ) -> CandidateSet<S> {
        CandidateSet {
            address_book,
            peer_service,
            wait_next_handshake: sleep(Duration::from_secs(0)),
            min_next_crawl: Instant::now(),
        }
    }

    /// Update the peer set from the network, using the default fanout limit.
    ///
    /// See [`update_initial`][Self::update_initial] for details.
    pub async fn update(&mut self) -> Result<(), BoxError> {
        self.update_timeout(None).await
    }

    /// Update the peer set from the network, limiting the fanout to
    /// `fanout_limit`.
    ///
    /// - Ask a few live [`Responded`] peers to send us more peers.
    /// - Process all completed peer responses, adding new peers in the
    ///   [`NeverAttemptedGossiped`] state.
    ///
    /// ## Correctness
    ///
    /// Pass the initial peer set size as `fanout_limit` during initialization,
    /// so that Zebra does not send duplicate requests to the same peer.
    ///
    /// The crawler exits when update returns an error, so it must only return
    /// errors on permanent failures.
    ///
    /// The handshaker sets up the peer message receiver so it also sends a
    /// [`Responded`] peer address update.
    ///
    /// [`report_failed`][Self::report_failed] puts peers into the [`Failed`] state.
    ///
    /// [`next`][Self::next] puts peers into the [`AttemptPending`] state.
    ///
    /// ## Security
    ///
    /// This call is rate-limited to prevent sending a burst of repeated requests for new peer
    /// addresses. Each call will only update the [`CandidateSet`] if more time
    /// than [`MIN_PEER_GET_ADDR_INTERVAL`][constants::MIN_PEER_GET_ADDR_INTERVAL] has passed since
    /// the last call. Otherwise, the update is skipped.
    ///
    /// [`Responded`]: crate::PeerAddrState::Responded
    /// [`NeverAttemptedGossiped`]: crate::PeerAddrState::NeverAttemptedGossiped
    /// [`Failed`]: crate::PeerAddrState::Failed
    /// [`AttemptPending`]: crate::PeerAddrState::AttemptPending
    pub async fn update_initial(&mut self, fanout_limit: usize) -> Result<(), BoxError> {
        self.update_timeout(Some(fanout_limit)).await
    }

    /// Update the peer set from the network, limiting the fanout to
    /// `fanout_limit`, and imposing a timeout on the entire fanout.
    ///
    /// See [`update_initial`][Self::update_initial] for details.
    async fn update_timeout(&mut self, fanout_limit: Option<usize>) -> Result<(), BoxError> {
        // SECURITY
        //
        // Rate limit sending `GetAddr` messages to peers.
        if self.min_next_crawl <= Instant::now() {
            // CORRECTNESS
            //
            // Use a timeout to avoid deadlocks when there are no connected
            // peers, and:
            // - we're waiting on a handshake to complete so there are peers, or
            // - another task that handles or adds peers is waiting on this task
            //   to complete.
            if let Ok(fanout_result) =
                timeout(constants::REQUEST_TIMEOUT, self.update_fanout(fanout_limit)).await
            {
                fanout_result?;
            } else {
                // update must only return an error for permanent failures
                info!("timeout waiting for the peer service to become ready");
            }

            self.min_next_crawl = Instant::now() + constants::MIN_PEER_GET_ADDR_INTERVAL;
        }

        Ok(())
    }

    /// Update the peer set from the network, limiting the fanout to
    /// `fanout_limit`.
    ///
    /// See [`update_initial`][Self::update_initial]  for details.
    ///
    /// # Correctness
    ///
    /// This function does not have a timeout.
    /// Use [`update_timeout`][Self::update_timeout] instead.
    async fn update_fanout(&mut self, fanout_limit: Option<usize>) -> Result<(), BoxError> {
        // Opportunistically crawl the network on every update call to ensure
        // we're actively fetching peers. Continue independently of whether we
        // actually receive any peers, but always ask the network for more.
        //
        // Because requests are load-balanced across existing peers, we can make
        // multiple requests concurrently, which will be randomly assigned to
        // existing peers, but we don't make too many because update may be
        // called while the peer set is already loaded.
        let mut responses = FuturesUnordered::new();
        let fanout_limit = fanout_limit
            .map(|fanout_limit| min(fanout_limit, constants::GET_ADDR_FANOUT))
            .unwrap_or(constants::GET_ADDR_FANOUT);
        debug!(?fanout_limit, "sending GetPeers requests");
        // TODO: launch each fanout in its own task (might require tokio 1.6)
        for _ in 0..fanout_limit {
            let peer_service = self.peer_service.ready_and().await?;
            responses.push(peer_service.call(Request::Peers));
        }
        while let Some(rsp) = responses.next().await {
            match rsp {
                Ok(Response::Peers(addrs)) => {
                    trace!(
                        addr_count = ?addrs.len(),
                        ?addrs,
                        "got response to GetPeers"
                    );
                    let addrs = validate_addrs(addrs, DateTime32::now());
                    self.send_addrs(addrs);
                }
                Err(e) => {
                    // since we do a fanout, and new updates are triggered by
                    // each demand, we can ignore errors in individual responses
                    trace!(?e, "got error in GetPeers request");
                }
                Ok(_) => unreachable!("Peers requests always return Peers responses"),
            }
        }

        Ok(())
    }

    /// Add new `addrs` to the address book.
    fn send_addrs(&self, addrs: impl IntoIterator<Item = MetaAddr>) {
        // # Correctness
        //
        // Briefly hold the address book threaded mutex, to extend
        // the address list.
        //
        // Extend handles duplicate addresses internally.
        self.address_book.lock().unwrap().extend(addrs);
    }

    /// Returns the next candidate for a connection attempt, if any are available.
    ///
    /// Returns peers in this order:
    /// - oldest `Responded` that are not live
    /// - newest `NeverAttempted`
    /// - oldest `Failed`
    ///
    /// Skips `AttemptPending` peers and live `Responded` peers.
    ///
    /// ## Correctness
    ///
    /// `AttemptPending` peers will become `Responded` if they respond, or
    /// become `Failed` if they time out or provide a bad response.
    ///
    /// Live `Responded` peers will stay live if they keep responding, or
    /// become a reconnection candidate if they stop responding.
    ///
    /// ## Security
    ///
    /// Zebra resists distributed denial of service attacks by making sure that
    /// new peer connections are initiated at least
    /// [`MIN_PEER_CONNECTION_INTERVAL`][constants::MIN_PEER_CONNECTION_INTERVAL] apart.
    pub async fn next(&mut self) -> Option<MetaAddr> {
        let current_deadline = self.wait_next_handshake.deadline().max(Instant::now());
        let mut sleep = sleep_until(current_deadline + constants::MIN_PEER_CONNECTION_INTERVAL);
        mem::swap(&mut self.wait_next_handshake, &mut sleep);

        // # Correctness
        //
        // In this critical section, we hold the address mutex, blocking the
        // current thread, and all async tasks scheduled on that thread.
        //
        // To avoid deadlocks, the critical section:
        // - must not acquire any other locks
        // - must not await any futures
        //
        // To avoid hangs, any computation in the critical section should
        // be kept to a minimum.
        let reconnect = {
            let mut guard = self.address_book.lock().unwrap();
            // It's okay to return without sleeping here, because we're returning
            // `None`. We only need to sleep before yielding an address.
            let reconnect = guard.reconnection_peers().next()?;

            let reconnect = MetaAddr::new_reconnect(&reconnect.addr, &reconnect.services);
            guard.update(reconnect);
            reconnect
        };

        // SECURITY: rate-limit new candidate connections
        sleep.await;

        Some(reconnect)
    }

    /// Mark `addr` as a failed peer.
    pub fn report_failed(&mut self, addr: &MetaAddr) {
        let addr = MetaAddr::new_errored(&addr.addr, &addr.services);
        // # Correctness
        //
        // Briefly hold the address book threaded mutex, to update the state for
        // a single address.
        self.address_book.lock().unwrap().update(addr);
    }
}

/// Check new `addrs` before adding them to the address book.
///
/// `last_seen_limit` is the maximum permitted last seen time, typically
/// [`Utc::now`].
///
/// If the data in an address is invalid, this function can:
/// - modify the address data, or
/// - delete the address.
///
/// # Security
///
/// Adjusts untrusted last seen times so they are not in the future. This stops
/// malicious peers keeping all their addresses at the front of the connection
/// queue. Honest peers with future clock skew also get adjusted.
///
/// Rejects all addresses if any calculated times overflow or underflow.
fn validate_addrs(
    addrs: impl IntoIterator<Item = MetaAddr>,
    last_seen_limit: DateTime32,
) -> impl Iterator<Item = MetaAddr> {
    // Note: The address book handles duplicate addresses internally,
    // so we don't need to de-duplicate addresses here.

    // TODO:
    // We should eventually implement these checks in this function:
    // - Zebra should ignore peers that are older than 3 weeks (part of #1865)
    //   - Zebra should count back 3 weeks from the newest peer timestamp sent
    //     by the other peer, to compensate for clock skew
    // - Zebra should limit the number of addresses it uses from a single Addrs
    //   response (#1869)

    let mut addrs: Vec<_> = addrs.into_iter().collect();

    limit_last_seen_times(&mut addrs, last_seen_limit);

    addrs.into_iter()
}

/// Ensure all reported `last_seen` times are less than or equal to `last_seen_limit`.
///
/// This will consider all addresses as invalid if trying to offset their
/// `last_seen` times to be before the limit causes an underflow.
fn limit_last_seen_times(addrs: &mut Vec<MetaAddr>, last_seen_limit: DateTime32) {
    let (oldest_reported_seen_timestamp, newest_reported_seen_timestamp) =
        addrs
            .iter()
            .fold((u32::MAX, u32::MIN), |(oldest, newest), addr| {
                let last_seen = addr.get_last_seen().timestamp();
                (oldest.min(last_seen), newest.max(last_seen))
            });

    // If any time is in the future, adjust all times, to compensate for clock skew on honest peers
    if newest_reported_seen_timestamp > last_seen_limit.timestamp() {
        let offset = newest_reported_seen_timestamp - last_seen_limit.timestamp();

        // Apply offset to oldest timestamp to check for underflow
        let oldest_resulting_timestamp = oldest_reported_seen_timestamp as i64 - offset as i64;
        if oldest_resulting_timestamp >= 0 {
            // No underflow is possible, so apply offset to all addresses
            for addr in addrs {
                let old_last_seen = addr.get_last_seen().timestamp();
                let new_last_seen = old_last_seen - offset;

                addr.set_last_seen(new_last_seen.into());
            }
        } else {
            // An underflow will occur, so reject all gossiped peers
            addrs.clear();
        }
    }
}
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								use std::{cmp::min, mem, sync::Arc, time::Duration};
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
-												Suppress unused import warnings.

											
										
										
											2019-10-22 12:48:50 -07:00
+								use futures::stream::{FuturesUnordered, StreamExt};
-												Prevent burst of reconnection attempts (#2251)

* Rate-limit new outbound peer connections

Set the rate-limiting sleep timer to use a delay added to the maximum
between the next peer connection instant and now. This ensures that the
timer always sleeps at least the time used for the delay.

This change fixes rate-limiting new outbound peer connections, since
before there could be a burst of attempts until the deadline progressed
to the current instant.

Fixes #2216

* Create `MetaAddr::alternate_node_strategy` helper

Creates arbitrary `MetaAddr`s as if they were network nodes that sent
their listening address.

* Test outbound peer connection rate limiting

Tests if connections are rate limited to 10 per second, and also tests
that sleeping before continuing with the attempts still respets the rate
limit and does not result in a burst of reconnection attempts.

											
										
										
											2021-06-06 21:13:46 -07:00
+								use tokio::time::{sleep, sleep_until, timeout, Instant, Sleep};
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								use tower::{Service, ServiceExt};
-												Use `DateTime32` in `validate_addrs`

											
										
										
											2021-05-31 06:49:59 -07:00
+								use zebra_chain::serialization::DateTime32;
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								use crate::{constants, types::MetaAddr, AddressBook, BoxError, Request, Response};
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
-												Test if validation offsets times in the future

Use some mock gossiped peers that all have `last_seen` times in the
future and check that they all have a specific offset applied to them.

											
										
										
											2021-05-21 14:52:36 -07:00
+								#[cfg(test)]
 								mod tests;
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								/// The `CandidateSet` manages the `PeerSet`'s peer reconnection attempts.
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								///
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								/// It divides the set of all possible candidate peers into disjoint subsets,
 								/// using the `PeerAddrState`:
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								///
-												CandidateSet: Add Listener Port Connections

Inbound connections on the Zcash protocol listener port
perform a handshake. If the handshake is successful, it
adds the peer to the AddressBook.
											
										
										
											2021-03-05 02:03:15 -08:00
+								/// 1. `Responded` peers, which we previously had inbound or outbound connections
 								///    to. If we have not received any messages from a `Responded` peer within a
 								///    cutoff time, we assume that it has disconnected or hung, and attempt
 								///    reconnection;
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								/// 2. `NeverAttempted` peers, which we learned about from other peers or a DNS
 								///    seeder, but have never connected to;
 								/// 3. `Failed` peers, to whom we attempted to connect but were unable to;
 								/// 4. `AttemptPending` peers, which we've recently queued for reconnection.
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								///
 								/// ```ascii,no_run
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								///                         ┌──────────────────┐
 								///                         │     PeerSet      │
 								///                         │GetPeers Responses│
 								///                         └──────────────────┘
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								///                                  │
 								///                                  │
 								///                                  │
 								///                                  │
 								///                                  ▼
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								///             filter by            Λ
 								///          !contains_addr         ╱ ╲
 								///  ┌────────────────────────────▶▕   ▏
 								///  │                              ╲ ╱
 								///  │                               V
 								///  │                               │
 								///  │                               │
 								///  │                               │
-												CandidateSet: Add Listener Port Connections

Inbound connections on the Zcash protocol listener port
perform a handshake. If the handshake is successful, it
adds the peer to the AddressBook.
											
										
										
											2021-03-05 02:03:15 -08:00
+								///  │ ┌──────────────────┐          │
-												Clarify CandidateSet state diagram

We get inbound connections on the listener port,
but the important part is the inbound connection
itself.
											
										
										
											2021-04-19 13:57:02 -07:00
+								///  │ │     Inbound      │          │
-												CandidateSet: Add Listener Port Connections

Inbound connections on the Zcash protocol listener port
perform a handshake. If the handshake is successful, it
adds the peer to the AddressBook.
											
										
										
											2021-03-05 02:03:15 -08:00
+								///  │ │ Peer Connections │          │
 								///  │ └──────────────────┘          │
 								///  │          │                    │
 								///  ├──────────┼────────────────────┼───────────────────────────────┐
 								///  │ PeerSet  ▼  AddressBook       ▼                               │
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								///  │ ┌─────────────┐       ┌────────────────┐      ┌─────────────┐ │
 								///  │ │  Possibly   │       │`NeverAttempted`│      │  `Failed`   │ │
 								///  │ │Disconnected │       │     Peers      │      │   Peers     │◀┼┐
 								///  │ │ `Responded` │       │                │      │             │ ││
 								///  │ │    Peers    │       │                │      │             │ ││
 								///  │ └─────────────┘       └────────────────┘      └─────────────┘ ││
 								///  │        │                      │                      │        ││
 								///  │ #1 oldest_first        #2 newest_first        #3 oldest_first ││
 								///  │        │                      │                      │        ││
 								///  │        ├──────────────────────┴──────────────────────┘        ││
 								///  │        │         disjoint `PeerAddrState`s                    ││
 								///  ├────────┼──────────────────────────────────────────────────────┘│
 								///  │        ▼                                                       │
 								///  │        Λ                                                       │
 								///  │       ╱ ╲         filter by                                    │
 								///  └─────▶▕   ▏!is_potentially_connected                            │
 								///          ╲ ╱      to remove live                                  │
 								///           V      `Responded` peers                                │
 								///           │                                                       │
-												Clarify CandidateSet state diagram

We get inbound connections on the listener port,
but the important part is the inbound connection
itself.
											
										
										
											2021-04-19 13:57:02 -07:00
+								///           │ Try outbound connection                               │
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								///           ▼                                                       │
 								///    ┌────────────────┐                                             │
 								///    │`AttemptPending`│                                             │
 								///    │     Peers      │                                             │
 								///    │                │                                             │
 								///    └────────────────┘                                             │
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								///           │                                                       │
 								///           │                                                       │
 								///           ▼                                                       │
 								///           Λ                                                       │
 								///          ╱ ╲                                                      │
 								///         ▕   ▏─────────────────────────────────────────────────────┘
 								///          ╲ ╱   connection failed, update last_seen to now()
 								///           V
 								///           │
 								///           │
 								///           ▼
 								///    ┌────────────┐
 								///    │    send    │
-												Rename `PeerClient` to `peer::Client`.

											
										
										
											2019-11-27 11:27:17 -08:00
+								///    │peer::Client│
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								///    │to Discover │
 								///    └────────────┘
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								///           │
 								///           │
 								///           ▼
 								///  ┌───────────────────────────────────────┐
 								///  │ every time we receive a peer message: │
 								///  │  * update state to `Responded`        │
 								///  │  * update last_seen to now()          │
 								///  └───────────────────────────────────────┘
 								///
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								/// ```
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								// TODO:
 								//   * draw arrow from the "peer message" box into the `Responded` state box
 								//   * make the "disjoint states" box include `AttemptPending`
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								pub(super) struct CandidateSet<S> {
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								    pub(super) address_book: Arc<std::sync::Mutex<AddressBook>>,
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								    pub(super) peer_service: S,
-												Rate limit `GetAddr` messages to any peer, Credit: Equilibrium (#2254)

* Rename field to `wait_next_handshake`

Make the name a bit more clear regarding to the field's purpose.

* Move `MIN_PEER_CONNECTION_INTERVAL` to `constants`

Move it to the `constants` module so that it is placed closer to other
constants for consistency and to make it easier to see any relationships
when changing them.

* Rate limit calls to `CandidateSet::update()`

This effectively rate limits requests asking for more peer addresses
sent to the same peer. A new `min_next_crawl` field was added to
`CandidateSet`, and `update` only sends requests for more peer addresses
if the call happens after the instant specified by that field. After
sending the requests, the field value is updated so that there is a
`MIN_PEER_GET_ADDR_INTERVAL` wait time until the next `update` call
sends requests again.

* Include `update_initial` in rate limiting

Move the rate limiting code from `update` to `update_timeout`, so that
both `update` and `update_initial` get rate limited.

* Test `CandidateSet::update` rate limiting

Create a `CandidateSet` that uses a mocked `PeerService`. The mocked
service always returns an empty list of peers, but it also checks that
the requests only happen after expected instants, determined by the
fanout amount and the rate limiting interval.

* Refactor to create a `mock_peer_service` helper

Move the code from the test to a utility function so that another test
will be able to use it as well.

* Check number of times service was called

Use an `AtomicUsize` shared between the service and the test body that
the service increments on every call. The test can then verify if the
service was called the number of times it expected.

* Test calling `update` after `update_initial`

The call to `update` should be skipped because the call to
`update_initial` should also be considered in the rate limiting.

* Mention that call to `update` may be skipped

Make it clearer that in this case the rate limiting causes calls to be
skipped, and not that there's an internal sleep that happens.

Also remove "to the same peers", because it's more general than that.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-06-08 16:42:45 -07:00
+								    wait_next_handshake: Sleep,
 								    min_next_crawl: Instant,
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								}
 								impl<S> CandidateSet<S>
 								where
-												network: rename alias to BoxError

This is shorter and consistent with Tower (which is why we use it in the
first place).

											
										
										
											2020-09-18 11:20:55 -07:00
+								    S: Service<Request, Response = Response, Error = BoxError>,
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								    S::Future: Send + 'static,
 								{
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								    /// Uses `address_book` and `peer_service` to manage a [`CandidateSet`] of peers.
 								    pub fn new(
 								        address_book: Arc<std::sync::Mutex<AddressBook>>,
 								        peer_service: S,
 								    ) -> CandidateSet<S> {
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								        CandidateSet {
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								            address_book,
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								            peer_service,
-												Rate limit `GetAddr` messages to any peer, Credit: Equilibrium (#2254)

* Rename field to `wait_next_handshake`

Make the name a bit more clear regarding to the field's purpose.

* Move `MIN_PEER_CONNECTION_INTERVAL` to `constants`

Move it to the `constants` module so that it is placed closer to other
constants for consistency and to make it easier to see any relationships
when changing them.

* Rate limit calls to `CandidateSet::update()`

This effectively rate limits requests asking for more peer addresses
sent to the same peer. A new `min_next_crawl` field was added to
`CandidateSet`, and `update` only sends requests for more peer addresses
if the call happens after the instant specified by that field. After
sending the requests, the field value is updated so that there is a
`MIN_PEER_GET_ADDR_INTERVAL` wait time until the next `update` call
sends requests again.

* Include `update_initial` in rate limiting

Move the rate limiting code from `update` to `update_timeout`, so that
both `update` and `update_initial` get rate limited.

* Test `CandidateSet::update` rate limiting

Create a `CandidateSet` that uses a mocked `PeerService`. The mocked
service always returns an empty list of peers, but it also checks that
the requests only happen after expected instants, determined by the
fanout amount and the rate limiting interval.

* Refactor to create a `mock_peer_service` helper

Move the code from the test to a utility function so that another test
will be able to use it as well.

* Check number of times service was called

Use an `AtomicUsize` shared between the service and the test body that
the service increments on every call. The test can then verify if the
service was called the number of times it expected.

* Test calling `update` after `update_initial`

The call to `update` should be skipped because the call to
`update_initial` should also be considered in the rate limiting.

* Mention that call to `update` may be skipped

Make it clearer that in this case the rate limiting causes calls to be
skipped, and not that there's an internal sleep that happens.

Also remove "to the same peers", because it's more general than that.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-06-08 16:42:45 -07:00
+								            wait_next_handshake: sleep(Duration::from_secs(0)),
 								            min_next_crawl: Instant::now(),
-												Move the CandidateSet to its own file.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 21:25:49 -07:00
+								        }
 								    }
-												Limit initial candidate set fanout to the number of initial peers

If there is a small number of initial peers, and they are slow, the
initial candidate set update can appear to hang. To avoid this issue,
limit the initial candidate set fanout to the number of initial peers.

Once the initial peers have sent us more peer addresses, there is no need
to limit the fanouts for future updates.

Reported by Niklas Long of Equilibrium.

											
										
										
											2021-05-13 19:15:39 -07:00
+								    /// Update the peer set from the network, using the default fanout limit.
 								    ///
-												Fix some candidate set and meta addr doc links (#2174)

Suggested by jvff.
											
										
										
											2021-05-20 18:40:14 -07:00
+								    /// See [`update_initial`][Self::update_initial] for details.
-												Limit initial candidate set fanout to the number of initial peers

If there is a small number of initial peers, and they are slow, the
initial candidate set update can appear to hang. To avoid this issue,
limit the initial candidate set fanout to the number of initial peers.

Once the initial peers have sent us more peer addresses, there is no need
to limit the fanouts for future updates.

Reported by Niklas Long of Equilibrium.

											
										
										
											2021-05-13 19:15:39 -07:00
+								    pub async fn update(&mut self) -> Result<(), BoxError> {
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								        self.update_timeout(None).await
-												Limit initial candidate set fanout to the number of initial peers

If there is a small number of initial peers, and they are slow, the
initial candidate set update can appear to hang. To avoid this issue,
limit the initial candidate set fanout to the number of initial peers.

Once the initial peers have sent us more peer addresses, there is no need
to limit the fanouts for future updates.

Reported by Niklas Long of Equilibrium.

											
										
										
											2021-05-13 19:15:39 -07:00
+								    }
 								    /// Update the peer set from the network, limiting the fanout to
 								    /// `fanout_limit`.
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								    ///
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								    /// - Ask a few live [`Responded`] peers to send us more peers.
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								    /// - Process all completed peer responses, adding new peers in the
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								    ///   [`NeverAttemptedGossiped`] state.
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								    ///
 								    /// ## Correctness
 								    ///
-												Limit initial candidate set fanout to the number of initial peers

If there is a small number of initial peers, and they are slow, the
initial candidate set update can appear to hang. To avoid this issue,
limit the initial candidate set fanout to the number of initial peers.

Once the initial peers have sent us more peer addresses, there is no need
to limit the fanouts for future updates.

Reported by Niklas Long of Equilibrium.

											
										
										
											2021-05-13 19:15:39 -07:00
+								    /// Pass the initial peer set size as `fanout_limit` during initialization,
 								    /// so that Zebra does not send duplicate requests to the same peer.
 								    ///
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								    /// The crawler exits when update returns an error, so it must only return
 								    /// errors on permanent failures.
 								    ///
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								    /// The handshaker sets up the peer message receiver so it also sends a
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								    /// [`Responded`] peer address update.
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								    ///
-												Fix some candidate set and meta addr doc links (#2174)

Suggested by jvff.
											
										
										
											2021-05-20 18:40:14 -07:00
+								    /// [`report_failed`][Self::report_failed] puts peers into the [`Failed`] state.
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								    ///
-												Fix some candidate set and meta addr doc links (#2174)

Suggested by jvff.
											
										
										
											2021-05-20 18:40:14 -07:00
+								    /// [`next`][Self::next] puts peers into the [`AttemptPending`] state.
 								    ///
-												Rate limit `GetAddr` messages to any peer, Credit: Equilibrium (#2254)

* Rename field to `wait_next_handshake`

Make the name a bit more clear regarding to the field's purpose.

* Move `MIN_PEER_CONNECTION_INTERVAL` to `constants`

Move it to the `constants` module so that it is placed closer to other
constants for consistency and to make it easier to see any relationships
when changing them.

* Rate limit calls to `CandidateSet::update()`

This effectively rate limits requests asking for more peer addresses
sent to the same peer. A new `min_next_crawl` field was added to
`CandidateSet`, and `update` only sends requests for more peer addresses
if the call happens after the instant specified by that field. After
sending the requests, the field value is updated so that there is a
`MIN_PEER_GET_ADDR_INTERVAL` wait time until the next `update` call
sends requests again.

* Include `update_initial` in rate limiting

Move the rate limiting code from `update` to `update_timeout`, so that
both `update` and `update_initial` get rate limited.

* Test `CandidateSet::update` rate limiting

Create a `CandidateSet` that uses a mocked `PeerService`. The mocked
service always returns an empty list of peers, but it also checks that
the requests only happen after expected instants, determined by the
fanout amount and the rate limiting interval.

* Refactor to create a `mock_peer_service` helper

Move the code from the test to a utility function so that another test
will be able to use it as well.

* Check number of times service was called

Use an `AtomicUsize` shared between the service and the test body that
the service increments on every call. The test can then verify if the
service was called the number of times it expected.

* Test calling `update` after `update_initial`

The call to `update` should be skipped because the call to
`update_initial` should also be considered in the rate limiting.

* Mention that call to `update` may be skipped

Make it clearer that in this case the rate limiting causes calls to be
skipped, and not that there's an internal sleep that happens.

Also remove "to the same peers", because it's more general than that.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-06-08 16:42:45 -07:00
+								    /// ## Security
 								    ///
 								    /// This call is rate-limited to prevent sending a burst of repeated requests for new peer
 								    /// addresses. Each call will only update the [`CandidateSet`] if more time
 								    /// than [`MIN_PEER_GET_ADDR_INTERVAL`][constants::MIN_PEER_GET_ADDR_INTERVAL] has passed since
 								    /// the last call. Otherwise, the update is skipped.
 								    ///
-												Fix some candidate set and meta addr doc links (#2174)

Suggested by jvff.
											
										
										
											2021-05-20 18:40:14 -07:00
+								    /// [`Responded`]: crate::PeerAddrState::Responded
 								    /// [`NeverAttemptedGossiped`]: crate::PeerAddrState::NeverAttemptedGossiped
 								    /// [`Failed`]: crate::PeerAddrState::Failed
 								    /// [`AttemptPending`]: crate::PeerAddrState::AttemptPending
-												Limit initial candidate set fanout to the number of initial peers

If there is a small number of initial peers, and they are slow, the
initial candidate set update can appear to hang. To avoid this issue,
limit the initial candidate set fanout to the number of initial peers.

Once the initial peers have sent us more peer addresses, there is no need
to limit the fanouts for future updates.

Reported by Niklas Long of Equilibrium.

											
										
										
											2021-05-13 19:15:39 -07:00
+								    pub async fn update_initial(&mut self, fanout_limit: usize) -> Result<(), BoxError> {
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								        self.update_timeout(Some(fanout_limit)).await
 								    }
 								    /// Update the peer set from the network, limiting the fanout to
 								    /// `fanout_limit`, and imposing a timeout on the entire fanout.
 								    ///
-												Fix some candidate set and meta addr doc links (#2174)

Suggested by jvff.
											
										
										
											2021-05-20 18:40:14 -07:00
+								    /// See [`update_initial`][Self::update_initial] for details.
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								    async fn update_timeout(&mut self, fanout_limit: Option<usize>) -> Result<(), BoxError> {
-												Rate limit `GetAddr` messages to any peer, Credit: Equilibrium (#2254)

* Rename field to `wait_next_handshake`

Make the name a bit more clear regarding to the field's purpose.

* Move `MIN_PEER_CONNECTION_INTERVAL` to `constants`

Move it to the `constants` module so that it is placed closer to other
constants for consistency and to make it easier to see any relationships
when changing them.

* Rate limit calls to `CandidateSet::update()`

This effectively rate limits requests asking for more peer addresses
sent to the same peer. A new `min_next_crawl` field was added to
`CandidateSet`, and `update` only sends requests for more peer addresses
if the call happens after the instant specified by that field. After
sending the requests, the field value is updated so that there is a
`MIN_PEER_GET_ADDR_INTERVAL` wait time until the next `update` call
sends requests again.

* Include `update_initial` in rate limiting

Move the rate limiting code from `update` to `update_timeout`, so that
both `update` and `update_initial` get rate limited.

* Test `CandidateSet::update` rate limiting

Create a `CandidateSet` that uses a mocked `PeerService`. The mocked
service always returns an empty list of peers, but it also checks that
the requests only happen after expected instants, determined by the
fanout amount and the rate limiting interval.

* Refactor to create a `mock_peer_service` helper

Move the code from the test to a utility function so that another test
will be able to use it as well.

* Check number of times service was called

Use an `AtomicUsize` shared between the service and the test body that
the service increments on every call. The test can then verify if the
service was called the number of times it expected.

* Test calling `update` after `update_initial`

The call to `update` should be skipped because the call to
`update_initial` should also be considered in the rate limiting.

* Mention that call to `update` may be skipped

Make it clearer that in this case the rate limiting causes calls to be
skipped, and not that there's an internal sleep that happens.

Also remove "to the same peers", because it's more general than that.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-06-08 16:42:45 -07:00
+								        // SECURITY
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								        //
-												Rate limit `GetAddr` messages to any peer, Credit: Equilibrium (#2254)

* Rename field to `wait_next_handshake`

Make the name a bit more clear regarding to the field's purpose.

* Move `MIN_PEER_CONNECTION_INTERVAL` to `constants`

Move it to the `constants` module so that it is placed closer to other
constants for consistency and to make it easier to see any relationships
when changing them.

* Rate limit calls to `CandidateSet::update()`

This effectively rate limits requests asking for more peer addresses
sent to the same peer. A new `min_next_crawl` field was added to
`CandidateSet`, and `update` only sends requests for more peer addresses
if the call happens after the instant specified by that field. After
sending the requests, the field value is updated so that there is a
`MIN_PEER_GET_ADDR_INTERVAL` wait time until the next `update` call
sends requests again.

* Include `update_initial` in rate limiting

Move the rate limiting code from `update` to `update_timeout`, so that
both `update` and `update_initial` get rate limited.

* Test `CandidateSet::update` rate limiting

Create a `CandidateSet` that uses a mocked `PeerService`. The mocked
service always returns an empty list of peers, but it also checks that
the requests only happen after expected instants, determined by the
fanout amount and the rate limiting interval.

* Refactor to create a `mock_peer_service` helper

Move the code from the test to a utility function so that another test
will be able to use it as well.

* Check number of times service was called

Use an `AtomicUsize` shared between the service and the test body that
the service increments on every call. The test can then verify if the
service was called the number of times it expected.

* Test calling `update` after `update_initial`

The call to `update` should be skipped because the call to
`update_initial` should also be considered in the rate limiting.

* Mention that call to `update` may be skipped

Make it clearer that in this case the rate limiting causes calls to be
skipped, and not that there's an internal sleep that happens.

Also remove "to the same peers", because it's more general than that.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-06-08 16:42:45 -07:00
+								        // Rate limit sending `GetAddr` messages to peers.
 								        if self.min_next_crawl <= Instant::now() {
 								            // CORRECTNESS
 								            //
 								            // Use a timeout to avoid deadlocks when there are no connected
 								            // peers, and:
 								            // - we're waiting on a handshake to complete so there are peers, or
 								            // - another task that handles or adds peers is waiting on this task
 								            //   to complete.
 								            if let Ok(fanout_result) =
 								                timeout(constants::REQUEST_TIMEOUT, self.update_fanout(fanout_limit)).await
 								            {
 								                fanout_result?;
 								            } else {
 								                // update must only return an error for permanent failures
 								                info!("timeout waiting for the peer service to become ready");
 								            }
 								            self.min_next_crawl = Instant::now() + constants::MIN_PEER_GET_ADDR_INTERVAL;
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								        }
 								        Ok(())
-												Limit initial candidate set fanout to the number of initial peers

If there is a small number of initial peers, and they are slow, the
initial candidate set update can appear to hang. To avoid this issue,
limit the initial candidate set fanout to the number of initial peers.

Once the initial peers have sent us more peer addresses, there is no need
to limit the fanouts for future updates.

Reported by Niklas Long of Equilibrium.

											
										
										
											2021-05-13 19:15:39 -07:00
+								    }
 								    /// Update the peer set from the network, limiting the fanout to
 								    /// `fanout_limit`.
 								    ///
-												Fix some candidate set and meta addr doc links (#2174)

Suggested by jvff.
											
										
										
											2021-05-20 18:40:14 -07:00
+								    /// See [`update_initial`][Self::update_initial]  for details.
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								    ///
 								    /// # Correctness
 								    ///
-												Fix some candidate set and meta addr doc links (#2174)

Suggested by jvff.
											
										
										
											2021-05-20 18:40:14 -07:00
+								    /// This function does not have a timeout.
 								    /// Use [`update_timeout`][Self::update_timeout] instead.
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								    async fn update_fanout(&mut self, fanout_limit: Option<usize>) -> Result<(), BoxError> {
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								        // Opportunistically crawl the network on every update call to ensure
 								        // we're actively fetching peers. Continue independently of whether we
 								        // actually receive any peers, but always ask the network for more.
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								        //
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								        // Because requests are load-balanced across existing peers, we can make
 								        // multiple requests concurrently, which will be randomly assigned to
 								        // existing peers, but we don't make too many because update may be
 								        // called while the peer set is already loaded.
 								        let mut responses = FuturesUnordered::new();
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								        let fanout_limit = fanout_limit
 								            .map(|fanout_limit| min(fanout_limit, constants::GET_ADDR_FANOUT))
 								            .unwrap_or(constants::GET_ADDR_FANOUT);
 								        debug!(?fanout_limit, "sending GetPeers requests");
 								        // TODO: launch each fanout in its own task (might require tokio 1.6)
 								        for _ in 0..fanout_limit {
 								            let peer_service = self.peer_service.ready_and().await?;
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								            responses.push(peer_service.call(Request::Peers));
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								        }
 								        while let Some(rsp) = responses.next().await {
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								            match rsp {
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								                Ok(Response::Peers(addrs)) => {
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								                    trace!(
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								                        addr_count = ?addrs.len(),
 								                        ?addrs,
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								                        "got response to GetPeers"
 								                    );
-												Use `DateTime32` in `validate_addrs`

											
										
										
											2021-05-31 06:49:59 -07:00
+								                    let addrs = validate_addrs(addrs, DateTime32::now());
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								                    self.send_addrs(addrs);
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								                }
 								                Err(e) => {
 								                    // since we do a fanout, and new updates are triggered by
 								                    // each demand, we can ignore errors in individual responses
 								                    trace!(?e, "got error in GetPeers request");
 								                }
 								                Ok(_) => unreachable!("Peers requests always return Peers responses"),
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								            }
 								        }
 								        Ok(())
 								    }
-												Security: Fix CandidateSet timeout and fanout

* Refactor: Split CandidateSet::update into separate functions
* Security: Apply a timeout to the entire CandidateSet::update
* Security: Stop using very large fanout limits during initialization

Previously, Zebra used the number of resolved peer addresses.
So it was possible for all peers to fail, and for Zebra to hang on the
first update.

And Zebra could send a fanout for each initial peer, regardless
of whether their connection was successful.

Also:
- wait for at least one successful peer before trying an update
- warn if there are no successful initial peers

											
										
										
											2021-05-20 01:15:46 -07:00
+								    /// Add new `addrs` to the address book.
 								    fn send_addrs(&self, addrs: impl IntoIterator<Item = MetaAddr>) {
 								        // # Correctness
 								        //
 								        // Briefly hold the address book threaded mutex, to extend
 								        // the address list.
 								        //
 								        // Extend handles duplicate addresses internally.
 								        self.address_book.lock().unwrap().extend(addrs);
 								    }
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								    /// Returns the next candidate for a connection attempt, if any are available.
 								    ///
 								    /// Returns peers in this order:
 								    /// - oldest `Responded` that are not live
 								    /// - newest `NeverAttempted`
 								    /// - oldest `Failed`
 								    ///
 								    /// Skips `AttemptPending` peers and live `Responded` peers.
 								    ///
 								    /// ## Correctness
 								    ///
 								    /// `AttemptPending` peers will become `Responded` if they respond, or
 								    /// become `Failed` if they time out or provide a bad response.
 								    ///
 								    /// Live `Responded` peers will stay live if they keep responding, or
 								    /// become a reconnection candidate if they stop responding.
-												Implement outbound connection rate limiting - includes config rename with alias (#1855)

* Implement outbound connection rate limiting
* fix breaking change on config

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-03-09 17:36:05 -08:00
+								    ///
 								    /// ## Security
 								    ///
 								    /// Zebra resists distributed denial of service attacks by making sure that
 								    /// new peer connections are initiated at least
-												Rate limit `GetAddr` messages to any peer, Credit: Equilibrium (#2254)

* Rename field to `wait_next_handshake`

Make the name a bit more clear regarding to the field's purpose.

* Move `MIN_PEER_CONNECTION_INTERVAL` to `constants`

Move it to the `constants` module so that it is placed closer to other
constants for consistency and to make it easier to see any relationships
when changing them.

* Rate limit calls to `CandidateSet::update()`

This effectively rate limits requests asking for more peer addresses
sent to the same peer. A new `min_next_crawl` field was added to
`CandidateSet`, and `update` only sends requests for more peer addresses
if the call happens after the instant specified by that field. After
sending the requests, the field value is updated so that there is a
`MIN_PEER_GET_ADDR_INTERVAL` wait time until the next `update` call
sends requests again.

* Include `update_initial` in rate limiting

Move the rate limiting code from `update` to `update_timeout`, so that
both `update` and `update_initial` get rate limited.

* Test `CandidateSet::update` rate limiting

Create a `CandidateSet` that uses a mocked `PeerService`. The mocked
service always returns an empty list of peers, but it also checks that
the requests only happen after expected instants, determined by the
fanout amount and the rate limiting interval.

* Refactor to create a `mock_peer_service` helper

Move the code from the test to a utility function so that another test
will be able to use it as well.

* Check number of times service was called

Use an `AtomicUsize` shared between the service and the test body that
the service increments on every call. The test can then verify if the
service was called the number of times it expected.

* Test calling `update` after `update_initial`

The call to `update` should be skipped because the call to
`update_initial` should also be considered in the rate limiting.

* Mention that call to `update` may be skipped

Make it clearer that in this case the rate limiting causes calls to be
skipped, and not that there's an internal sleep that happens.

Also remove "to the same peers", because it's more general than that.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-06-08 16:42:45 -07:00
+								    /// [`MIN_PEER_CONNECTION_INTERVAL`][constants::MIN_PEER_CONNECTION_INTERVAL] apart.
-												Implement outbound connection rate limiting - includes config rename with alias (#1855)

* Implement outbound connection rate limiting
* fix breaking change on config

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-03-09 17:36:05 -08:00
+								    pub async fn next(&mut self) -> Option<MetaAddr> {
-												Rate limit `GetAddr` messages to any peer, Credit: Equilibrium (#2254)

* Rename field to `wait_next_handshake`

Make the name a bit more clear regarding to the field's purpose.

* Move `MIN_PEER_CONNECTION_INTERVAL` to `constants`

Move it to the `constants` module so that it is placed closer to other
constants for consistency and to make it easier to see any relationships
when changing them.

* Rate limit calls to `CandidateSet::update()`

This effectively rate limits requests asking for more peer addresses
sent to the same peer. A new `min_next_crawl` field was added to
`CandidateSet`, and `update` only sends requests for more peer addresses
if the call happens after the instant specified by that field. After
sending the requests, the field value is updated so that there is a
`MIN_PEER_GET_ADDR_INTERVAL` wait time until the next `update` call
sends requests again.

* Include `update_initial` in rate limiting

Move the rate limiting code from `update` to `update_timeout`, so that
both `update` and `update_initial` get rate limited.

* Test `CandidateSet::update` rate limiting

Create a `CandidateSet` that uses a mocked `PeerService`. The mocked
service always returns an empty list of peers, but it also checks that
the requests only happen after expected instants, determined by the
fanout amount and the rate limiting interval.

* Refactor to create a `mock_peer_service` helper

Move the code from the test to a utility function so that another test
will be able to use it as well.

* Check number of times service was called

Use an `AtomicUsize` shared between the service and the test body that
the service increments on every call. The test can then verify if the
service was called the number of times it expected.

* Test calling `update` after `update_initial`

The call to `update` should be skipped because the call to
`update_initial` should also be considered in the rate limiting.

* Mention that call to `update` may be skipped

Make it clearer that in this case the rate limiting causes calls to be
skipped, and not that there's an internal sleep that happens.

Also remove "to the same peers", because it's more general than that.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-06-08 16:42:45 -07:00
+								        let current_deadline = self.wait_next_handshake.deadline().max(Instant::now());
 								        let mut sleep = sleep_until(current_deadline + constants::MIN_PEER_CONNECTION_INTERVAL);
 								        mem::swap(&mut self.wait_next_handshake, &mut sleep);
-												Implement outbound connection rate limiting - includes config rename with alias (#1855)

* Implement outbound connection rate limiting
* fix breaking change on config

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-03-09 17:36:05 -08:00
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								        // # Correctness
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								        //
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								        // In this critical section, we hold the address mutex, blocking the
 								        // current thread, and all async tasks scheduled on that thread.
-												Fix a deadlock between the crawler and dialer, and other hangs (#1950)

* Stop ignoring inbound message errors and handshake timeouts

To avoid hangs, Zebra needs to maintain the following invariants in the
handshake and heartbeat code:
- each handshake should run in a separate spawned task
  (not yet implemented)
- every message, error, timeout, and shutdown must update the peer address state
- every await that depends on the network must have a timeout

Once the Connection is created, it should handle timeouts.
But we need to handle timeouts during handshake setup.

* Avoid hangs by adding a timeout to the candidate set update

Also increase the fanout from 1 to 2, to increase address diversity.

But only return permanent errors from `CandidateSet::update`, because
the crawler task exits if `update` returns an error.

Also log Peers response errors in the CandidateSet.

* Use the select macro in the crawler to reduce hangs

The `select` function is biased towards its first argument, risking
starvation.

As a side-benefit, this change also makes the code a lot easier to read
and maintain.

* Split CrawlerAction::Demand into separate actions

This refactor makes the code a bit easier to read, at the cost of
sometimes blocking the crawler on `candidates.next()`.

That's ok, because `next` only has a short (< 100 ms) delay. And we're
just about to spawn a separate task for each handshake.

* Spawn a separate task for each handshake

This change avoids deadlocks by letting each handshake make progress
independently.

* Move the dial task into a separate function

This refactor improves readability.

* Fix buggy future::select function usage

And document the correctness of the new code.
											
										
										
											2021-04-07 06:25:10 -07:00
+								        //
 								        // To avoid deadlocks, the critical section:
 								        // - must not acquire any other locks
 								        // - must not await any futures
 								        //
 								        // To avoid hangs, any computation in the critical section should
 								        // be kept to a minimum.
-												Implement outbound connection rate limiting - includes config rename with alias (#1855)

* Implement outbound connection rate limiting
* fix breaking change on config

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-03-09 17:36:05 -08:00
+								        let reconnect = {
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								            let mut guard = self.address_book.lock().unwrap();
 								            // It's okay to return without sleeping here, because we're returning
 								            // `None`. We only need to sleep before yielding an address.
 								            let reconnect = guard.reconnection_peers().next()?;
-												Implement outbound connection rate limiting - includes config rename with alias (#1855)

* Implement outbound connection rate limiting
* fix breaking change on config

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-03-09 17:36:05 -08:00
-												Add more methods for creating MetaAddrs

This refactor lets us remove `MetaAddr::update_last_seen()`.

											
										
										
											2021-03-25 03:14:52 -07:00
+								            let reconnect = MetaAddr::new_reconnect(&reconnect.addr, &reconnect.services);
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								            guard.update(reconnect);
-												Implement outbound connection rate limiting - includes config rename with alias (#1855)

* Implement outbound connection rate limiting
* fix breaking change on config

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-03-09 17:36:05 -08:00
+								            reconnect
 								        };
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								        // SECURITY: rate-limit new candidate connections
-												Implement outbound connection rate limiting - includes config rename with alias (#1855)

* Implement outbound connection rate limiting
* fix breaking change on config

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-03-09 17:36:05 -08:00
+								        sleep.await;
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
 								        Some(reconnect)
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								    }
-												Fix candidate set address state handling (#1709)

Design:
- Add a `PeerAddrState` to each `MetaAddr`
- Use a single peer set for all peers, regardless of state
- Implement time-based liveness as an `AddressBook` method, rather than
  a `PeerAddrState` variant
- Delete `AddressBook.by_state`

Implementation:
- Simplify `AddressBook` changes using `update` and `take` modifier
  methods
- Simplify the `AddressBook` iterator implementation, replacing it with
  methods that are more obviously correct
- Consistently collect peer set metrics

Documentation:
- Expand and update the peer set documentation

We can optimise later, but for now we want simple code that is more
obviously correct.
											
										
										
											2021-02-17 17:18:32 -08:00
+								    /// Mark `addr` as a failed peer.
-												Add more methods for creating MetaAddrs

This refactor lets us remove `MetaAddr::update_last_seen()`.

											
										
										
											2021-03-25 03:14:52 -07:00
+								    pub fn report_failed(&mut self, addr: &MetaAddr) {
 								        let addr = MetaAddr::new_errored(&addr.addr, &addr.services);
-												Refactor and document correctness for std::sync::Mutex<AddressBook>

											
										
										
											2021-04-18 23:04:24 -07:00
+								        // # Correctness
 								        //
 								        // Briefly hold the address book threaded mutex, to update the state for
 								        // a single address.
 								        self.address_book.lock().unwrap().update(addr);
-												Initial work to add a crawl-and-dial task.

This responds to peerset demand by connecting to additional peers.

Co-authored-by: Deirdre Connolly <deirdre@zfnd.org>

											
										
										
											2019-10-21 15:24:17 -07:00
+								    }
 								}
-												Remove CandidateSet state and add last seen time limit to candidate_set::validate_addrs (#2177)



											
										
										
											2021-05-20 19:21:13 -07:00
 								/// Check new `addrs` before adding them to the address book.
 								///
 								/// `last_seen_limit` is the maximum permitted last seen time, typically
 								/// [`Utc::now`].
 								///
 								/// If the data in an address is invalid, this function can:
 								/// - modify the address data, or
 								/// - delete the address.
-												Don't trust reported peer `last_seen` times

Due to clock skew, the peers could end up at the front of the
reconnection queue or far at the back. The solution to this is to offset
the reported times by the difference between the most recent reported
sight (in the remote clock) and the current time (in the local clock).

											
										
										
											2021-05-31 06:57:17 -07:00
+								///
-												Improve documentation

- Make the security impact clearer and in a separate section.
- Instead of listing an assumption as almost a side-note, describe it
  clearly inside a `Panics` section.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-05-31 07:18:01 -07:00
+								/// # Security
 								///
 								/// Adjusts untrusted last seen times so they are not in the future. This stops
 								/// malicious peers keeping all their addresses at the front of the connection
 								/// queue. Honest peers with future clock skew also get adjusted.
-												Handle overflow when applying offset

If an overflow occurs, the reported `last_seen` times are either very
wrong or malicious, so reject all addresses gossiped by that peer.

											
										
										
											2021-05-25 16:31:52 -07:00
+								///
-												Update security note to be broader

Focus on what can go wrong, and not on the specific causes.

Co-authored-by: teor <teor@riseup.net>
											
										
										
											2021-05-26 15:09:02 -07:00
+								/// Rejects all addresses if any calculated times overflow or underflow.
-												Remove CandidateSet state and add last seen time limit to candidate_set::validate_addrs (#2177)



											
										
										
											2021-05-20 19:21:13 -07:00
+								fn validate_addrs(
 								    addrs: impl IntoIterator<Item = MetaAddr>,
-												Use `DateTime32` in `validate_addrs`

											
										
										
											2021-05-31 06:49:59 -07:00
+								    last_seen_limit: DateTime32,
-												Improve ergonomics by returning `impl Iterator`

Returning `impl IntoIterator` means that the caller will always be
forced to call `.into_iter()`, and returning `impl Iterator` still
allows them to call `.into_iter()` because it becomes the identity
function.

											
										
										
											2021-05-21 14:41:26 -07:00
+								) -> impl Iterator<Item = MetaAddr> {
-												Remove CandidateSet state and add last seen time limit to candidate_set::validate_addrs (#2177)



											
										
										
											2021-05-20 19:21:13 -07:00
+								    // Note: The address book handles duplicate addresses internally,
 								    // so we don't need to de-duplicate addresses here.
 								    // TODO:
 								    // We should eventually implement these checks in this function:
 								    // - Zebra should ignore peers that are older than 3 weeks (part of #1865)
 								    //   - Zebra should count back 3 weeks from the newest peer timestamp sent
 								    //     by the other peer, to compensate for clock skew
 								    // - Zebra should limit the number of addresses it uses from a single Addrs
 								    //   response (#1869)
-												Don't trust reported peer `last_seen` times

Due to clock skew, the peers could end up at the front of the
reconnection queue or far at the back. The solution to this is to offset
the reported times by the difference between the most recent reported
sight (in the remote clock) and the current time (in the local clock).

											
										
										
											2021-05-31 06:57:17 -07:00
+								    let mut addrs: Vec<_> = addrs.into_iter().collect();
-												Remove empty list of peers check

The `limit_last_seen_times` can now safely handle an empty list.

											
										
										
											2021-05-27 14:29:49 -07:00
+								    limit_last_seen_times(&mut addrs, last_seen_limit);
-												Don't trust reported peer `last_seen` times

Due to clock skew, the peers could end up at the front of the
reconnection queue or far at the back. The solution to this is to offset
the reported times by the difference between the most recent reported
sight (in the remote clock) and the current time (in the local clock).

											
										
										
											2021-05-31 06:57:17 -07:00
-												Improve ergonomics by returning `impl Iterator`

Returning `impl IntoIterator` means that the caller will always be
forced to call `.into_iter()`, and returning `impl Iterator` still
allows them to call `.into_iter()` because it becomes the identity
function.

											
										
										
											2021-05-21 14:41:26 -07:00
+								    addrs.into_iter()
-												Remove CandidateSet state and add last seen time limit to candidate_set::validate_addrs (#2177)



											
										
										
											2021-05-20 19:21:13 -07:00
+								}
-												Don't trust reported peer `last_seen` times

Due to clock skew, the peers could end up at the front of the
reconnection queue or far at the back. The solution to this is to offset
the reported times by the difference between the most recent reported
sight (in the remote clock) and the current time (in the local clock).

											
										
										
											2021-05-31 06:57:17 -07:00
 								/// Ensure all reported `last_seen` times are less than or equal to `last_seen_limit`.
 								///
-												Handle overflow when applying offset

If an overflow occurs, the reported `last_seen` times are either very
wrong or malicious, so reject all addresses gossiped by that peer.

											
										
										
											2021-05-25 16:31:52 -07:00
+								/// This will consider all addresses as invalid if trying to offset their
-												Fix comment typo: overflow -> underflow
											
										
										
											2021-05-31 23:44:45 -07:00
+								/// `last_seen` times to be before the limit causes an underflow.
-												Don't trust reported peer `last_seen` times

Due to clock skew, the peers could end up at the front of the
reconnection queue or far at the back. The solution to this is to offset
the reported times by the difference between the most recent reported
sight (in the remote clock) and the current time (in the local clock).

											
										
										
											2021-05-31 06:57:17 -07:00
+								fn limit_last_seen_times(addrs: &mut Vec<MetaAddr>, last_seen_limit: DateTime32) {
-												Handle overflow when applying offset

If an overflow occurs, the reported `last_seen` times are either very
wrong or malicious, so reject all addresses gossiped by that peer.

											
										
										
											2021-05-25 16:31:52 -07:00
+								    let (oldest_reported_seen_timestamp, newest_reported_seen_timestamp) =
 								        addrs
 								            .iter()
 								            .fold((u32::MAX, u32::MIN), |(oldest, newest), addr| {
 								                let last_seen = addr.get_last_seen().timestamp();
 								                (oldest.min(last_seen), newest.max(last_seen))
 								            });
-												Don't trust reported peer `last_seen` times

Due to clock skew, the peers could end up at the front of the
reconnection queue or far at the back. The solution to this is to offset
the reported times by the difference between the most recent reported
sight (in the remote clock) and the current time (in the local clock).

											
										
										
											2021-05-31 06:57:17 -07:00
-												Add comment to describe purpose

Make it clear why all peers have the time offset applied to them.

Co-authored-by: teor <teor@riseup.net>

											
										
										
											2021-05-24 17:59:09 -07:00
+								    // If any time is in the future, adjust all times, to compensate for clock skew on honest peers
-												Handle overflow when applying offset

If an overflow occurs, the reported `last_seen` times are either very
wrong or malicious, so reject all addresses gossiped by that peer.

											
										
										
											2021-05-25 16:31:52 -07:00
+								    if newest_reported_seen_timestamp > last_seen_limit.timestamp() {
 								        let offset = newest_reported_seen_timestamp - last_seen_limit.timestamp();
-												Don't trust reported peer `last_seen` times

Due to clock skew, the peers could end up at the front of the
reconnection queue or far at the back. The solution to this is to offset
the reported times by the difference between the most recent reported
sight (in the remote clock) and the current time (in the local clock).

											
										
										
											2021-05-31 06:57:17 -07:00
-												Handle overflow when applying offset

If an overflow occurs, the reported `last_seen` times are either very
wrong or malicious, so reject all addresses gossiped by that peer.

											
										
										
											2021-05-25 16:31:52 -07:00
+								        // Apply offset to oldest timestamp to check for underflow
 								        let oldest_resulting_timestamp = oldest_reported_seen_timestamp as i64 - offset as i64;
 								        if oldest_resulting_timestamp >= 0 {
-												Fix comment typo: overflow -> underflow
											
										
										
											2021-05-31 23:44:45 -07:00
+								            // No underflow is possible, so apply offset to all addresses
-												Handle overflow when applying offset

If an overflow occurs, the reported `last_seen` times are either very
wrong or malicious, so reject all addresses gossiped by that peer.

											
										
										
											2021-05-25 16:31:52 -07:00
+								            for addr in addrs {
 								                let old_last_seen = addr.get_last_seen().timestamp();
 								                let new_last_seen = old_last_seen - offset;
-												Only apply offset to times in the future

Times in the past don't have any security implications, so there's no
point in trying to apply the offset to them as well.

											
										
										
											2021-05-31 07:00:34 -07:00
-												Handle overflow when applying offset

If an overflow occurs, the reported `last_seen` times are either very
wrong or malicious, so reject all addresses gossiped by that peer.

											
										
										
											2021-05-25 16:31:52 -07:00
+								                addr.set_last_seen(new_last_seen.into());
 								            }
 								        } else {
-												Fix comment typo: overflow -> underflow
											
										
										
											2021-05-31 23:44:45 -07:00
+								            // An underflow will occur, so reject all gossiped peers
-												Handle overflow when applying offset

If an overflow occurs, the reported `last_seen` times are either very
wrong or malicious, so reject all addresses gossiped by that peer.

											
										
										
											2021-05-25 16:31:52 -07:00
+								            addrs.clear();
-												Only apply offset to times in the future

Times in the past don't have any security implications, so there's no
point in trying to apply the offset to them as well.

											
										
										
											2021-05-31 07:00:34 -07:00
+								        }
-												Don't trust reported peer `last_seen` times

Due to clock skew, the peers could end up at the front of the
reconnection queue or far at the back. The solution to this is to offset
the reported times by the difference between the most recent reported
sight (in the remote clock) and the current time (in the local clock).

											
										
										
											2021-05-31 06:57:17 -07:00
+								    }
 								}