2021-02-02 18:20:26 -08:00
|
|
|
use std::{collections::HashSet, net::SocketAddr, string::String, time::Duration};
|
2019-10-16 15:16:29 -07:00
|
|
|
|
2020-08-15 15:45:37 -07:00
|
|
|
use zebra_chain::parameters::Network;
|
2019-10-16 15:16:29 -07:00
|
|
|
|
2021-02-25 15:06:27 -08:00
|
|
|
use crate::BoxError;
|
|
|
|
|
|
|
|
/// The number of times Zebra will retry each initial peer, before checking if
|
|
|
|
/// any other initial peers have returned addresses.
|
|
|
|
const MAX_SINGLE_PEER_RETRIES: usize = 2;
|
|
|
|
|
2019-10-08 13:57:24 -07:00
|
|
|
/// Configuration for networking code.
|
|
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
2020-06-18 13:34:05 -07:00
|
|
|
#[serde(deny_unknown_fields, default)]
|
2019-10-08 13:57:24 -07:00
|
|
|
pub struct Config {
|
2019-10-16 18:29:45 -07:00
|
|
|
/// The address on which this node should listen for connections.
|
|
|
|
pub listen_addr: SocketAddr,
|
2019-10-24 13:28:42 -07:00
|
|
|
|
2019-10-16 15:16:29 -07:00
|
|
|
/// The network to connect to.
|
|
|
|
pub network: Network,
|
2019-10-24 13:28:42 -07:00
|
|
|
|
|
|
|
/// A list of initial peers for the peerset when operating on
|
|
|
|
/// mainnet.
|
fix panic in seed subcommand (#401)
Co-authored-by: Jane Lusby <jane@zfnd.org>
Prior to this change, the seed subcommand would consistently encounter a panic in one of the background tasks, but would continue running after the panic. This is indicative of two bugs.
First, zebrad was not configured to treat panics as non recoverable and instead defaulted to the tokio defaults, which are to catch panics in tasks and return them via the join handle if available, or to print them if the join handle has been discarded. This is likely a poor fit for zebrad as an application, we do not need to maximize uptime or minimize the extent of an outage should one of our tasks / services start encountering panics. Ignoring a panic increases our risk of observing invalid state, causing all sorts of wild and bad bugs. To deal with this we've switched the default panic behavior from `unwind` to `abort`. This makes panics fail immediately and take down the entire application, regardless of where they occur, which is consistent with our treatment of misbehaving connections.
The second bug is the panic itself. This was triggered by a duplicate entry in the initial_peers set. To fix this we've switched the storage for the peers from a `Vec` to a `HashSet`, which has similar properties but guarantees uniqueness of its keys.
2020-05-27 17:40:12 -07:00
|
|
|
pub initial_mainnet_peers: HashSet<String>,
|
2019-10-24 13:28:42 -07:00
|
|
|
|
|
|
|
/// A list of initial peers for the peerset when operating on
|
|
|
|
/// testnet.
|
fix panic in seed subcommand (#401)
Co-authored-by: Jane Lusby <jane@zfnd.org>
Prior to this change, the seed subcommand would consistently encounter a panic in one of the background tasks, but would continue running after the panic. This is indicative of two bugs.
First, zebrad was not configured to treat panics as non recoverable and instead defaulted to the tokio defaults, which are to catch panics in tasks and return them via the join handle if available, or to print them if the join handle has been discarded. This is likely a poor fit for zebrad as an application, we do not need to maximize uptime or minimize the extent of an outage should one of our tasks / services start encountering panics. Ignoring a panic increases our risk of observing invalid state, causing all sorts of wild and bad bugs. To deal with this we've switched the default panic behavior from `unwind` to `abort`. This makes panics fail immediately and take down the entire application, regardless of where they occur, which is consistent with our treatment of misbehaving connections.
The second bug is the panic itself. This was triggered by a duplicate entry in the initial_peers set. To fix this we've switched the storage for the peers from a `Vec` to a `HashSet`, which has similar properties but guarantees uniqueness of its keys.
2020-05-27 17:40:12 -07:00
|
|
|
pub initial_testnet_peers: HashSet<String>,
|
2019-10-24 13:28:42 -07:00
|
|
|
|
2020-07-20 23:27:47 -07:00
|
|
|
/// The initial target size for the peer set.
|
2020-09-08 03:04:01 -07:00
|
|
|
///
|
|
|
|
/// If you have a slow network connection, and Zebra is having trouble
|
|
|
|
/// syncing, try reducing the peer set size. You can also reduce the peer
|
|
|
|
/// set size to reduce Zebra's bandwidth usage.
|
2020-07-20 23:27:47 -07:00
|
|
|
pub peerset_initial_target_size: usize,
|
|
|
|
|
2021-03-09 17:36:05 -08:00
|
|
|
/// How frequently we attempt to crawl the network to discover new peer
|
|
|
|
/// connections.
|
|
|
|
///
|
|
|
|
/// This duration only pertains to the rate at which zebra crawls for new
|
|
|
|
/// peers, not the rate zebra connects to new peers, which is restricted to
|
|
|
|
/// CandidateSet::PEER_CONNECTION_INTERVAL
|
|
|
|
#[serde(alias = "new_peer_interval")]
|
|
|
|
pub crawl_new_peer_interval: Duration,
|
2019-10-08 13:57:24 -07:00
|
|
|
}
|
|
|
|
|
2019-10-24 13:28:42 -07:00
|
|
|
impl Config {
|
2021-02-25 15:06:27 -08:00
|
|
|
/// Concurrently resolves `peers` into zero or more IP addresses, with a
|
|
|
|
/// timeout of a few seconds on each DNS request.
|
2021-02-02 18:20:26 -08:00
|
|
|
///
|
2021-02-25 15:06:27 -08:00
|
|
|
/// If DNS resolution fails or times out for all peers, continues retrying
|
|
|
|
/// until at least one peer is found.
|
|
|
|
async fn resolve_peers(peers: &HashSet<String>) -> HashSet<SocketAddr> {
|
2021-02-02 18:20:26 -08:00
|
|
|
use futures::stream::StreamExt;
|
|
|
|
|
2021-02-17 13:09:02 -08:00
|
|
|
loop {
|
2021-02-25 15:06:27 -08:00
|
|
|
// We retry each peer individually, as well as retrying if there are
|
|
|
|
// no peers in the combined list. DNS failures are correlated, so all
|
|
|
|
// peers can fail DNS, leaving Zebra with a small list of custom IP
|
|
|
|
// address peers. Individual retries avoid this issue.
|
2021-02-17 13:09:02 -08:00
|
|
|
let peer_addresses = peers
|
|
|
|
.iter()
|
2021-02-25 15:06:27 -08:00
|
|
|
.map(|s| Config::resolve_host(s, MAX_SINGLE_PEER_RETRIES))
|
2021-02-17 13:09:02 -08:00
|
|
|
.collect::<futures::stream::FuturesUnordered<_>>()
|
|
|
|
.concat()
|
|
|
|
.await;
|
|
|
|
|
|
|
|
if peer_addresses.is_empty() {
|
|
|
|
tracing::info!(
|
|
|
|
?peers,
|
|
|
|
?peer_addresses,
|
|
|
|
"empty peer list after DNS resolution, retrying after {} seconds",
|
|
|
|
crate::constants::DNS_LOOKUP_TIMEOUT.as_secs()
|
|
|
|
);
|
|
|
|
tokio::time::sleep(crate::constants::DNS_LOOKUP_TIMEOUT).await;
|
|
|
|
} else {
|
|
|
|
return peer_addresses;
|
|
|
|
}
|
|
|
|
}
|
2019-10-24 13:28:42 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the initial seed peers based on the configured network.
|
2021-02-02 18:20:26 -08:00
|
|
|
pub async fn initial_peers(&self) -> HashSet<SocketAddr> {
|
2019-10-24 13:28:42 -07:00
|
|
|
match self.network {
|
2021-02-25 15:06:27 -08:00
|
|
|
Network::Mainnet => Config::resolve_peers(&self.initial_mainnet_peers).await,
|
|
|
|
Network::Testnet => Config::resolve_peers(&self.initial_testnet_peers).await,
|
2021-02-02 18:20:26 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-25 15:06:27 -08:00
|
|
|
/// Resolves `host` into zero or more IP addresses, retrying up to
|
|
|
|
/// `max_retries` times.
|
|
|
|
///
|
|
|
|
/// If DNS continues to fail, returns an empty list of addresses.
|
|
|
|
async fn resolve_host(host: &str, max_retries: usize) -> HashSet<SocketAddr> {
|
|
|
|
for retry_count in 1..=max_retries {
|
|
|
|
match Config::resolve_host_once(host).await {
|
|
|
|
Ok(addresses) => return addresses,
|
|
|
|
Err(_) => tracing::info!(?host, ?retry_count, "Retrying peer DNS resolution"),
|
|
|
|
};
|
|
|
|
tokio::time::sleep(crate::constants::DNS_LOOKUP_TIMEOUT).await;
|
|
|
|
}
|
|
|
|
|
|
|
|
HashSet::new()
|
|
|
|
}
|
|
|
|
|
2021-02-02 18:20:26 -08:00
|
|
|
/// Resolves `host` into zero or more IP addresses.
|
|
|
|
///
|
|
|
|
/// If `host` is a DNS name, performs DNS resolution with a timeout of a few seconds.
|
2021-02-25 15:06:27 -08:00
|
|
|
/// If DNS resolution fails or times out, returns an error.
|
|
|
|
async fn resolve_host_once(host: &str) -> Result<HashSet<SocketAddr>, BoxError> {
|
2021-02-02 18:20:26 -08:00
|
|
|
let fut = tokio::net::lookup_host(host);
|
|
|
|
let fut = tokio::time::timeout(crate::constants::DNS_LOOKUP_TIMEOUT, fut);
|
|
|
|
|
|
|
|
match fut.await {
|
2021-02-25 15:06:27 -08:00
|
|
|
Ok(Ok(ips)) => Ok(ips.collect()),
|
2021-02-02 18:20:26 -08:00
|
|
|
Ok(Err(e)) => {
|
|
|
|
tracing::info!(?host, ?e, "DNS error resolving peer IP address");
|
2021-02-25 15:06:27 -08:00
|
|
|
Err(e.into())
|
2021-02-02 18:20:26 -08:00
|
|
|
}
|
|
|
|
Err(e) => {
|
|
|
|
tracing::info!(?host, ?e, "DNS timeout resolving peer IP address");
|
2021-02-25 15:06:27 -08:00
|
|
|
Err(e.into())
|
2021-02-02 18:20:26 -08:00
|
|
|
}
|
2019-10-24 13:28:42 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-08 13:57:24 -07:00
|
|
|
impl Default for Config {
|
|
|
|
fn default() -> Config {
|
2019-10-25 20:54:44 -07:00
|
|
|
let mainnet_peers = [
|
|
|
|
"dnsseed.z.cash:8233",
|
|
|
|
"dnsseed.str4d.xyz:8233",
|
2020-06-10 14:08:09 -07:00
|
|
|
"mainnet.seeder.zfnd.org:8233",
|
2020-06-10 18:20:50 -07:00
|
|
|
"mainnet.is.yolo.money:8233",
|
2019-10-25 20:54:44 -07:00
|
|
|
]
|
|
|
|
.iter()
|
|
|
|
.map(|&s| String::from(s))
|
|
|
|
.collect();
|
|
|
|
|
2020-06-10 14:08:09 -07:00
|
|
|
let testnet_peers = [
|
|
|
|
"dnsseed.testnet.z.cash:18233",
|
|
|
|
"testnet.seeder.zfnd.org:18233",
|
2020-06-10 18:20:50 -07:00
|
|
|
"testnet.is.yolo.money:18233",
|
2020-06-10 14:08:09 -07:00
|
|
|
]
|
|
|
|
.iter()
|
|
|
|
.map(|&s| String::from(s))
|
|
|
|
.collect();
|
2019-10-25 20:54:44 -07:00
|
|
|
|
2019-10-08 13:57:24 -07:00
|
|
|
Config {
|
2020-06-18 22:44:59 -07:00
|
|
|
listen_addr: "0.0.0.0:8233"
|
2019-10-16 18:29:45 -07:00
|
|
|
.parse()
|
|
|
|
.expect("Hardcoded address should be parseable"),
|
2019-10-16 15:16:29 -07:00
|
|
|
network: Network::Mainnet,
|
2019-10-25 20:54:44 -07:00
|
|
|
initial_mainnet_peers: mainnet_peers,
|
|
|
|
initial_testnet_peers: testnet_peers,
|
2021-03-09 17:36:05 -08:00
|
|
|
crawl_new_peer_interval: Duration::from_secs(60),
|
2020-09-08 03:04:01 -07:00
|
|
|
|
|
|
|
// The default peerset target size should be large enough to ensure
|
|
|
|
// nodes have a reliable set of peers. But it should also be limited
|
|
|
|
// to a reasonable size, to avoid queueing too many in-flight block
|
|
|
|
// downloads. A large queue of in-flight block downloads can choke a
|
|
|
|
// constrained local network connection.
|
|
|
|
//
|
|
|
|
// We assume that Zebra nodes have at least 10 Mbps bandwidth.
|
|
|
|
// Therefore, a maximum-sized block can take up to 2 seconds to
|
|
|
|
// download. So a full default peer set adds up to 100 seconds worth
|
|
|
|
// of blocks to the queue.
|
|
|
|
//
|
|
|
|
// But the peer set for slow nodes is typically much smaller, due to
|
|
|
|
// the handshake RTT timeout.
|
2020-02-09 20:34:53 -08:00
|
|
|
peerset_initial_target_size: 50,
|
2019-10-08 13:57:24 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|