2021-04-21 16:14:29 -07:00
|
|
|
use std::{
|
|
|
|
collections::HashSet,
|
|
|
|
net::{IpAddr, SocketAddr},
|
|
|
|
string::String,
|
|
|
|
time::Duration,
|
|
|
|
};
|
|
|
|
|
|
|
|
use serde::{de, Deserialize, Deserializer};
|
2019-10-16 15:16:29 -07:00
|
|
|
|
2021-06-21 19:16:59 -07:00
|
|
|
use zebra_chain::{parameters::Network, serialization::canonical_socket_addr};
|
2019-10-16 15:16:29 -07:00
|
|
|
|
2021-02-25 15:06:27 -08:00
|
|
|
use crate::BoxError;
|
|
|
|
|
2021-06-23 04:10:21 -07:00
|
|
|
/// The number of times Zebra will retry each initial peer's DNS resolution,
|
|
|
|
/// before checking if any other initial peers have returned addresses.
|
2021-02-25 15:06:27 -08:00
|
|
|
const MAX_SINGLE_PEER_RETRIES: usize = 2;
|
|
|
|
|
2019-10-08 13:57:24 -07:00
|
|
|
/// Configuration for networking code.
|
2021-04-21 16:14:29 -07:00
|
|
|
#[derive(Clone, Debug, Serialize)]
|
2021-10-13 08:04:49 -07:00
|
|
|
#[serde(deny_unknown_fields, default)]
|
2019-10-08 13:57:24 -07:00
|
|
|
pub struct Config {
|
2019-10-16 18:29:45 -07:00
|
|
|
/// The address on which this node should listen for connections.
|
2021-03-14 15:25:07 -07:00
|
|
|
///
|
2021-04-21 16:14:29 -07:00
|
|
|
/// Can be `address:port` or just `address`. If there is no configured
|
|
|
|
/// port, Zebra will use the default port for the configured `network`.
|
2021-06-23 04:10:21 -07:00
|
|
|
///
|
2021-04-21 16:14:29 -07:00
|
|
|
/// `address` can be an IP address or a DNS name. DNS names are
|
|
|
|
/// only resolved once, when Zebra starts up.
|
|
|
|
///
|
2021-06-23 04:10:21 -07:00
|
|
|
/// If a specific listener address is configured, Zebra will advertise
|
|
|
|
/// it to other nodes. But by default, Zebra uses an unspecified address
|
|
|
|
/// ("0.0.0.0" or "[::]"), which is not advertised to other nodes.
|
|
|
|
///
|
|
|
|
/// Zebra does not currently support:
|
|
|
|
/// - [Advertising a different external IP address #1890](https://github.com/ZcashFoundation/zebra/issues/1890), or
|
|
|
|
/// - [Auto-discovering its own external IP address #1893](https://github.com/ZcashFoundation/zebra/issues/1893).
|
|
|
|
///
|
|
|
|
/// However, other Zebra instances compensate for unspecified or incorrect
|
|
|
|
/// listener addresses by adding the external IP addresses of peers to
|
|
|
|
/// their address books.
|
2019-10-16 18:29:45 -07:00
|
|
|
pub listen_addr: SocketAddr,
|
2019-10-24 13:28:42 -07:00
|
|
|
|
2019-10-16 15:16:29 -07:00
|
|
|
/// The network to connect to.
|
|
|
|
pub network: Network,
|
2019-10-24 13:28:42 -07:00
|
|
|
|
|
|
|
/// A list of initial peers for the peerset when operating on
|
|
|
|
/// mainnet.
|
fix panic in seed subcommand (#401)
Co-authored-by: Jane Lusby <jane@zfnd.org>
Prior to this change, the seed subcommand would consistently encounter a panic in one of the background tasks, but would continue running after the panic. This is indicative of two bugs.
First, zebrad was not configured to treat panics as non recoverable and instead defaulted to the tokio defaults, which are to catch panics in tasks and return them via the join handle if available, or to print them if the join handle has been discarded. This is likely a poor fit for zebrad as an application, we do not need to maximize uptime or minimize the extent of an outage should one of our tasks / services start encountering panics. Ignoring a panic increases our risk of observing invalid state, causing all sorts of wild and bad bugs. To deal with this we've switched the default panic behavior from `unwind` to `abort`. This makes panics fail immediately and take down the entire application, regardless of where they occur, which is consistent with our treatment of misbehaving connections.
The second bug is the panic itself. This was triggered by a duplicate entry in the initial_peers set. To fix this we've switched the storage for the peers from a `Vec` to a `HashSet`, which has similar properties but guarantees uniqueness of its keys.
2020-05-27 17:40:12 -07:00
|
|
|
pub initial_mainnet_peers: HashSet<String>,
|
2019-10-24 13:28:42 -07:00
|
|
|
|
|
|
|
/// A list of initial peers for the peerset when operating on
|
|
|
|
/// testnet.
|
fix panic in seed subcommand (#401)
Co-authored-by: Jane Lusby <jane@zfnd.org>
Prior to this change, the seed subcommand would consistently encounter a panic in one of the background tasks, but would continue running after the panic. This is indicative of two bugs.
First, zebrad was not configured to treat panics as non recoverable and instead defaulted to the tokio defaults, which are to catch panics in tasks and return them via the join handle if available, or to print them if the join handle has been discarded. This is likely a poor fit for zebrad as an application, we do not need to maximize uptime or minimize the extent of an outage should one of our tasks / services start encountering panics. Ignoring a panic increases our risk of observing invalid state, causing all sorts of wild and bad bugs. To deal with this we've switched the default panic behavior from `unwind` to `abort`. This makes panics fail immediately and take down the entire application, regardless of where they occur, which is consistent with our treatment of misbehaving connections.
The second bug is the panic itself. This was triggered by a duplicate entry in the initial_peers set. To fix this we've switched the storage for the peers from a `Vec` to a `HashSet`, which has similar properties but guarantees uniqueness of its keys.
2020-05-27 17:40:12 -07:00
|
|
|
pub initial_testnet_peers: HashSet<String>,
|
2019-10-24 13:28:42 -07:00
|
|
|
|
2020-07-20 23:27:47 -07:00
|
|
|
/// The initial target size for the peer set.
|
2020-09-08 03:04:01 -07:00
|
|
|
///
|
|
|
|
/// If you have a slow network connection, and Zebra is having trouble
|
|
|
|
/// syncing, try reducing the peer set size. You can also reduce the peer
|
|
|
|
/// set size to reduce Zebra's bandwidth usage.
|
2020-07-20 23:27:47 -07:00
|
|
|
pub peerset_initial_target_size: usize,
|
|
|
|
|
2021-03-09 17:36:05 -08:00
|
|
|
/// How frequently we attempt to crawl the network to discover new peer
|
2021-04-12 18:12:10 -07:00
|
|
|
/// addresses.
|
2021-03-09 17:36:05 -08:00
|
|
|
///
|
2021-04-12 18:12:10 -07:00
|
|
|
/// Zebra asks its connected peers for more peer addresses:
|
|
|
|
/// - regularly, every time `crawl_new_peer_interval` elapses, and
|
|
|
|
/// - if the peer set is busy, and there aren't any peer addresses for the
|
|
|
|
/// next connection attempt.
|
2021-10-13 08:04:49 -07:00
|
|
|
//
|
|
|
|
// Note: Durations become a TOML table, so they must be the final item in the config
|
|
|
|
// We'll replace them with a more user-friendly format in #2847
|
2021-03-09 17:36:05 -08:00
|
|
|
pub crawl_new_peer_interval: Duration,
|
2019-10-08 13:57:24 -07:00
|
|
|
}
|
|
|
|
|
2019-10-24 13:28:42 -07:00
|
|
|
impl Config {
|
2021-02-25 15:06:27 -08:00
|
|
|
/// Concurrently resolves `peers` into zero or more IP addresses, with a
|
|
|
|
/// timeout of a few seconds on each DNS request.
|
2021-02-02 18:20:26 -08:00
|
|
|
///
|
2021-02-25 15:06:27 -08:00
|
|
|
/// If DNS resolution fails or times out for all peers, continues retrying
|
|
|
|
/// until at least one peer is found.
|
|
|
|
async fn resolve_peers(peers: &HashSet<String>) -> HashSet<SocketAddr> {
|
2021-02-02 18:20:26 -08:00
|
|
|
use futures::stream::StreamExt;
|
|
|
|
|
2021-05-13 18:46:02 -07:00
|
|
|
if peers.is_empty() {
|
2021-06-22 14:59:06 -07:00
|
|
|
warn!(
|
|
|
|
"no initial peers in the network config. \
|
|
|
|
Hint: you must configure at least one peer IP or DNS seeder to run Zebra, \
|
|
|
|
or make sure Zebra's listener port gets inbound connections."
|
|
|
|
);
|
2021-05-13 18:46:02 -07:00
|
|
|
return HashSet::new();
|
|
|
|
}
|
|
|
|
|
2021-02-17 13:09:02 -08:00
|
|
|
loop {
|
2021-02-25 15:06:27 -08:00
|
|
|
// We retry each peer individually, as well as retrying if there are
|
|
|
|
// no peers in the combined list. DNS failures are correlated, so all
|
|
|
|
// peers can fail DNS, leaving Zebra with a small list of custom IP
|
|
|
|
// address peers. Individual retries avoid this issue.
|
2021-02-17 13:09:02 -08:00
|
|
|
let peer_addresses = peers
|
|
|
|
.iter()
|
2021-02-25 15:06:27 -08:00
|
|
|
.map(|s| Config::resolve_host(s, MAX_SINGLE_PEER_RETRIES))
|
2021-02-17 13:09:02 -08:00
|
|
|
.collect::<futures::stream::FuturesUnordered<_>>()
|
|
|
|
.concat()
|
|
|
|
.await;
|
|
|
|
|
|
|
|
if peer_addresses.is_empty() {
|
|
|
|
tracing::info!(
|
|
|
|
?peers,
|
|
|
|
?peer_addresses,
|
|
|
|
"empty peer list after DNS resolution, retrying after {} seconds",
|
|
|
|
crate::constants::DNS_LOOKUP_TIMEOUT.as_secs()
|
|
|
|
);
|
|
|
|
tokio::time::sleep(crate::constants::DNS_LOOKUP_TIMEOUT).await;
|
|
|
|
} else {
|
|
|
|
return peer_addresses;
|
|
|
|
}
|
|
|
|
}
|
2019-10-24 13:28:42 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the initial seed peers based on the configured network.
|
2021-02-02 18:20:26 -08:00
|
|
|
pub async fn initial_peers(&self) -> HashSet<SocketAddr> {
|
2019-10-24 13:28:42 -07:00
|
|
|
match self.network {
|
2021-02-25 15:06:27 -08:00
|
|
|
Network::Mainnet => Config::resolve_peers(&self.initial_mainnet_peers).await,
|
|
|
|
Network::Testnet => Config::resolve_peers(&self.initial_testnet_peers).await,
|
2021-02-02 18:20:26 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-25 15:06:27 -08:00
|
|
|
/// Resolves `host` into zero or more IP addresses, retrying up to
|
|
|
|
/// `max_retries` times.
|
|
|
|
///
|
|
|
|
/// If DNS continues to fail, returns an empty list of addresses.
|
|
|
|
async fn resolve_host(host: &str, max_retries: usize) -> HashSet<SocketAddr> {
|
|
|
|
for retry_count in 1..=max_retries {
|
|
|
|
match Config::resolve_host_once(host).await {
|
|
|
|
Ok(addresses) => return addresses,
|
|
|
|
Err(_) => tracing::info!(?host, ?retry_count, "Retrying peer DNS resolution"),
|
|
|
|
};
|
|
|
|
tokio::time::sleep(crate::constants::DNS_LOOKUP_TIMEOUT).await;
|
|
|
|
}
|
|
|
|
|
|
|
|
HashSet::new()
|
|
|
|
}
|
|
|
|
|
2021-02-02 18:20:26 -08:00
|
|
|
/// Resolves `host` into zero or more IP addresses.
|
|
|
|
///
|
|
|
|
/// If `host` is a DNS name, performs DNS resolution with a timeout of a few seconds.
|
2021-02-25 15:06:27 -08:00
|
|
|
/// If DNS resolution fails or times out, returns an error.
|
|
|
|
async fn resolve_host_once(host: &str) -> Result<HashSet<SocketAddr>, BoxError> {
|
2021-02-02 18:20:26 -08:00
|
|
|
let fut = tokio::net::lookup_host(host);
|
|
|
|
let fut = tokio::time::timeout(crate::constants::DNS_LOOKUP_TIMEOUT, fut);
|
|
|
|
|
|
|
|
match fut.await {
|
2021-09-29 11:08:20 -07:00
|
|
|
Ok(Ok(ip_addrs)) => {
|
|
|
|
let ip_addrs: Vec<SocketAddr> = ip_addrs.map(canonical_socket_addr).collect();
|
|
|
|
|
|
|
|
// if we're logging at debug level,
|
|
|
|
// the full list of IP addresses will be shown in the log message
|
|
|
|
let debug_span = debug_span!("", remote_ip_addrs = ?ip_addrs);
|
|
|
|
let _span_guard = debug_span.enter();
|
|
|
|
info!(seed = ?host, remote_ip_count = ?ip_addrs.len(), "resolved seed peer IP addresses");
|
|
|
|
|
|
|
|
for ip in &ip_addrs {
|
|
|
|
// Count each initial peer, recording the seed config and resolved IP address.
|
|
|
|
//
|
|
|
|
// If an IP is returned by multiple seeds,
|
|
|
|
// each duplicate adds 1 to the initial peer count.
|
|
|
|
// (But we only make one initial connection attempt to each IP.)
|
|
|
|
metrics::counter!(
|
|
|
|
"zcash.net.peers.initial",
|
|
|
|
1,
|
|
|
|
"seed" => host.to_string(),
|
|
|
|
"remote_ip" => ip.to_string()
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(ip_addrs.into_iter().collect())
|
|
|
|
}
|
2021-02-02 18:20:26 -08:00
|
|
|
Ok(Err(e)) => {
|
2021-09-29 11:08:20 -07:00
|
|
|
tracing::info!(?host, ?e, "DNS error resolving peer IP addresses");
|
2021-02-25 15:06:27 -08:00
|
|
|
Err(e.into())
|
2021-02-02 18:20:26 -08:00
|
|
|
}
|
|
|
|
Err(e) => {
|
2021-09-29 11:08:20 -07:00
|
|
|
tracing::info!(?host, ?e, "DNS timeout resolving peer IP addresses");
|
2021-02-25 15:06:27 -08:00
|
|
|
Err(e.into())
|
2021-02-02 18:20:26 -08:00
|
|
|
}
|
2019-10-24 13:28:42 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-10-08 13:57:24 -07:00
|
|
|
impl Default for Config {
|
|
|
|
fn default() -> Config {
|
2019-10-25 20:54:44 -07:00
|
|
|
let mainnet_peers = [
|
|
|
|
"dnsseed.z.cash:8233",
|
|
|
|
"dnsseed.str4d.xyz:8233",
|
2020-06-10 14:08:09 -07:00
|
|
|
"mainnet.seeder.zfnd.org:8233",
|
2020-06-10 18:20:50 -07:00
|
|
|
"mainnet.is.yolo.money:8233",
|
2019-10-25 20:54:44 -07:00
|
|
|
]
|
|
|
|
.iter()
|
|
|
|
.map(|&s| String::from(s))
|
|
|
|
.collect();
|
|
|
|
|
2020-06-10 14:08:09 -07:00
|
|
|
let testnet_peers = [
|
|
|
|
"dnsseed.testnet.z.cash:18233",
|
|
|
|
"testnet.seeder.zfnd.org:18233",
|
2020-06-10 18:20:50 -07:00
|
|
|
"testnet.is.yolo.money:18233",
|
2020-06-10 14:08:09 -07:00
|
|
|
]
|
|
|
|
.iter()
|
|
|
|
.map(|&s| String::from(s))
|
|
|
|
.collect();
|
2019-10-25 20:54:44 -07:00
|
|
|
|
2019-10-08 13:57:24 -07:00
|
|
|
Config {
|
2020-06-18 22:44:59 -07:00
|
|
|
listen_addr: "0.0.0.0:8233"
|
2019-10-16 18:29:45 -07:00
|
|
|
.parse()
|
|
|
|
.expect("Hardcoded address should be parseable"),
|
2019-10-16 15:16:29 -07:00
|
|
|
network: Network::Mainnet,
|
2019-10-25 20:54:44 -07:00
|
|
|
initial_mainnet_peers: mainnet_peers,
|
|
|
|
initial_testnet_peers: testnet_peers,
|
2021-03-09 17:36:05 -08:00
|
|
|
crawl_new_peer_interval: Duration::from_secs(60),
|
2020-09-08 03:04:01 -07:00
|
|
|
|
|
|
|
// The default peerset target size should be large enough to ensure
|
|
|
|
// nodes have a reliable set of peers. But it should also be limited
|
|
|
|
// to a reasonable size, to avoid queueing too many in-flight block
|
|
|
|
// downloads. A large queue of in-flight block downloads can choke a
|
|
|
|
// constrained local network connection.
|
|
|
|
//
|
|
|
|
// We assume that Zebra nodes have at least 10 Mbps bandwidth.
|
|
|
|
// Therefore, a maximum-sized block can take up to 2 seconds to
|
|
|
|
// download. So a full default peer set adds up to 100 seconds worth
|
|
|
|
// of blocks to the queue.
|
|
|
|
//
|
|
|
|
// But the peer set for slow nodes is typically much smaller, due to
|
|
|
|
// the handshake RTT timeout.
|
2020-02-09 20:34:53 -08:00
|
|
|
peerset_initial_target_size: 50,
|
2019-10-08 13:57:24 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-04-21 16:14:29 -07:00
|
|
|
|
|
|
|
impl<'de> Deserialize<'de> for Config {
|
|
|
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
|
|
|
where
|
|
|
|
D: Deserializer<'de>,
|
|
|
|
{
|
|
|
|
#[derive(Deserialize)]
|
|
|
|
#[serde(deny_unknown_fields, default)]
|
|
|
|
struct DConfig {
|
|
|
|
listen_addr: String,
|
|
|
|
network: Network,
|
|
|
|
initial_mainnet_peers: HashSet<String>,
|
|
|
|
initial_testnet_peers: HashSet<String>,
|
|
|
|
peerset_initial_target_size: usize,
|
|
|
|
#[serde(alias = "new_peer_interval")]
|
|
|
|
crawl_new_peer_interval: Duration,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for DConfig {
|
|
|
|
fn default() -> Self {
|
|
|
|
let config = Config::default();
|
|
|
|
Self {
|
|
|
|
listen_addr: config.listen_addr.to_string(),
|
|
|
|
network: config.network,
|
|
|
|
initial_mainnet_peers: config.initial_mainnet_peers,
|
|
|
|
initial_testnet_peers: config.initial_testnet_peers,
|
|
|
|
peerset_initial_target_size: config.peerset_initial_target_size,
|
|
|
|
crawl_new_peer_interval: config.crawl_new_peer_interval,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let config = DConfig::deserialize(deserializer)?;
|
|
|
|
// TODO: perform listener DNS lookups asynchronously with a timeout (#1631)
|
|
|
|
let listen_addr = match config.listen_addr.parse::<SocketAddr>() {
|
|
|
|
Ok(socket) => Ok(socket),
|
|
|
|
Err(_) => match config.listen_addr.parse::<IpAddr>() {
|
|
|
|
Ok(ip) => Ok(SocketAddr::new(ip, config.network.default_port())),
|
|
|
|
Err(err) => Err(de::Error::custom(format!(
|
|
|
|
"{}; Hint: addresses can be a IPv4, IPv6 (with brackets), or a DNS name, the port is optional",
|
|
|
|
err
|
|
|
|
))),
|
|
|
|
},
|
|
|
|
}?;
|
|
|
|
|
|
|
|
Ok(Config {
|
2021-06-21 19:16:59 -07:00
|
|
|
listen_addr: canonical_socket_addr(listen_addr),
|
2021-04-21 16:14:29 -07:00
|
|
|
network: config.network,
|
|
|
|
initial_mainnet_peers: config.initial_mainnet_peers,
|
|
|
|
initial_testnet_peers: config.initial_testnet_peers,
|
|
|
|
peerset_initial_target_size: config.peerset_initial_target_size,
|
|
|
|
crawl_new_peer_interval: config.crawl_new_peer_interval,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests;
|