fix(network): allow more inbound than outbound connections (#3527)
* fix(network): allow more inbound than outbound connections * refactor(network): access constants using consistent paths * fixup! fix(network): allow more inbound than outbound connections * fixup! fixup! fix(network): allow more inbound than outbound connections * refactor(network): convert to standard test module layout Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
This commit is contained in:
parent
aa0fa2fd88
commit
1ffb7a5cd0
|
@ -5623,6 +5623,7 @@ dependencies = [
|
|||
"rand 0.8.4",
|
||||
"regex",
|
||||
"serde",
|
||||
"static_assertions",
|
||||
"thiserror",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
|
|
|
@ -45,6 +45,7 @@ zebra-chain = { path = "../zebra-chain" }
|
|||
[dev-dependencies]
|
||||
proptest = "0.10"
|
||||
proptest-derive = "0.3"
|
||||
static_assertions = "1.1.0"
|
||||
tokio = { version = "1.16.1", features = ["test-util"] }
|
||||
toml = "0.5"
|
||||
|
||||
|
|
|
@ -9,7 +9,14 @@ use serde::{de, Deserialize, Deserializer};
|
|||
|
||||
use zebra_chain::parameters::Network;
|
||||
|
||||
use crate::{constants, protocol::external::canonical_socket_addr, BoxError};
|
||||
use crate::{
|
||||
constants::{
|
||||
DEFAULT_CRAWL_NEW_PEER_INTERVAL, DNS_LOOKUP_TIMEOUT, INBOUND_PEER_LIMIT_MULTIPLIER,
|
||||
OUTBOUND_PEER_LIMIT_MULTIPLIER,
|
||||
},
|
||||
protocol::external::canonical_socket_addr,
|
||||
BoxError,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
@ -82,21 +89,41 @@ impl Config {
|
|||
///
|
||||
/// # Security
|
||||
///
|
||||
/// This is larger than the inbound connection limit,
|
||||
/// so Zebra is more likely to be connected to peers that it has selected.
|
||||
/// See the note at [`INBOUND_PEER_LIMIT_MULTIPLIER`].
|
||||
///
|
||||
/// # Performance
|
||||
///
|
||||
/// Zebra's peer set should be limited to a reasonable size,
|
||||
/// to avoid queueing too many in-flight block downloads.
|
||||
/// A large queue of in-flight block downloads can choke a
|
||||
/// constrained local network connection.
|
||||
///
|
||||
/// We assume that Zebra nodes have at least 10 Mbps bandwidth.
|
||||
/// Therefore, a maximum-sized block can take up to 2 seconds to
|
||||
/// download. So the initial outbound peer set adds up to 100 seconds worth
|
||||
/// of blocks to the queue. If Zebra has reached its outbound peer limit,
|
||||
/// that adds an extra 200 seconds of queued blocks.
|
||||
///
|
||||
/// But the peer set for slow nodes is typically much smaller, due to
|
||||
/// the handshake RTT timeout. And Zebra responds to inbound request
|
||||
/// overloads by dropping peer connections.
|
||||
pub fn peerset_outbound_connection_limit(&self) -> usize {
|
||||
let inbound_limit = self.peerset_inbound_connection_limit();
|
||||
|
||||
inbound_limit + inbound_limit / constants::OUTBOUND_PEER_BIAS_DENOMINATOR
|
||||
self.peerset_initial_target_size * OUTBOUND_PEER_LIMIT_MULTIPLIER
|
||||
}
|
||||
|
||||
/// The maximum number of inbound connections that Zebra will accept at the same time.
|
||||
/// When this limit is reached, Zebra drops new inbound connections without handshaking on them.
|
||||
/// When this limit is reached, Zebra drops new inbound connections,
|
||||
/// without handshaking on them.
|
||||
///
|
||||
/// # Security
|
||||
///
|
||||
/// See the note at [`INBOUND_PEER_LIMIT_MULTIPLIER`].
|
||||
pub fn peerset_inbound_connection_limit(&self) -> usize {
|
||||
self.peerset_initial_target_size
|
||||
self.peerset_initial_target_size * INBOUND_PEER_LIMIT_MULTIPLIER
|
||||
}
|
||||
|
||||
/// The maximum number of inbound and outbound connections that Zebra will have at the same time.
|
||||
/// The maximum number of inbound and outbound connections that Zebra will have
|
||||
/// at the same time.
|
||||
pub fn peerset_total_connection_limit(&self) -> usize {
|
||||
self.peerset_outbound_connection_limit() + self.peerset_inbound_connection_limit()
|
||||
}
|
||||
|
@ -148,9 +175,9 @@ impl Config {
|
|||
?peers,
|
||||
?peer_addresses,
|
||||
"empty peer list after DNS resolution, retrying after {} seconds",
|
||||
crate::constants::DNS_LOOKUP_TIMEOUT.as_secs()
|
||||
DNS_LOOKUP_TIMEOUT.as_secs(),
|
||||
);
|
||||
tokio::time::sleep(crate::constants::DNS_LOOKUP_TIMEOUT).await;
|
||||
tokio::time::sleep(DNS_LOOKUP_TIMEOUT).await;
|
||||
} else {
|
||||
return peer_addresses;
|
||||
}
|
||||
|
@ -167,7 +194,7 @@ impl Config {
|
|||
Ok(addresses) => return addresses,
|
||||
Err(_) => tracing::info!(?host, ?retry_count, "Retrying peer DNS resolution"),
|
||||
};
|
||||
tokio::time::sleep(crate::constants::DNS_LOOKUP_TIMEOUT).await;
|
||||
tokio::time::sleep(DNS_LOOKUP_TIMEOUT).await;
|
||||
}
|
||||
|
||||
HashSet::new()
|
||||
|
@ -179,7 +206,7 @@ impl Config {
|
|||
/// If DNS resolution fails or times out, returns an error.
|
||||
async fn resolve_host_once(host: &str) -> Result<HashSet<SocketAddr>, BoxError> {
|
||||
let fut = tokio::net::lookup_host(host);
|
||||
let fut = tokio::time::timeout(crate::constants::DNS_LOOKUP_TIMEOUT, fut);
|
||||
let fut = tokio::time::timeout(DNS_LOOKUP_TIMEOUT, fut);
|
||||
|
||||
match fut.await {
|
||||
Ok(Ok(ip_addrs)) => {
|
||||
|
@ -247,22 +274,16 @@ impl Default for Config {
|
|||
network: Network::Mainnet,
|
||||
initial_mainnet_peers: mainnet_peers,
|
||||
initial_testnet_peers: testnet_peers,
|
||||
crawl_new_peer_interval: constants::DEFAULT_CRAWL_NEW_PEER_INTERVAL,
|
||||
crawl_new_peer_interval: DEFAULT_CRAWL_NEW_PEER_INTERVAL,
|
||||
|
||||
// # Security
|
||||
//
|
||||
// The default peerset target size should be large enough to ensure
|
||||
// nodes have a reliable set of peers. But it should also be limited
|
||||
// to a reasonable size, to avoid queueing too many in-flight block
|
||||
// downloads. A large queue of in-flight block downloads can choke a
|
||||
// constrained local network connection.
|
||||
// nodes have a reliable set of peers.
|
||||
//
|
||||
// We assume that Zebra nodes have at least 10 Mbps bandwidth.
|
||||
// Therefore, a maximum-sized block can take up to 2 seconds to
|
||||
// download. So a full default peer set adds up to 100 seconds worth
|
||||
// of blocks to the queue.
|
||||
//
|
||||
// But the peer set for slow nodes is typically much smaller, due to
|
||||
// the handshake RTT timeout.
|
||||
peerset_initial_target_size: 50,
|
||||
// But Zebra should only make a small number of initial outbound connections,
|
||||
// so that idle peers don't use too many connection slots.
|
||||
peerset_initial_target_size: 25,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,22 +1,3 @@
|
|||
use super::Config;
|
||||
//! Tests for zebra-network configs.
|
||||
|
||||
#[test]
|
||||
fn parse_config_listen_addr() {
|
||||
let fixtures = vec![
|
||||
("listen_addr = '0.0.0.0'", "0.0.0.0:8233"),
|
||||
("listen_addr = '0.0.0.0:9999'", "0.0.0.0:9999"),
|
||||
(
|
||||
"listen_addr = '0.0.0.0'\nnetwork = 'Testnet'",
|
||||
"0.0.0.0:18233",
|
||||
),
|
||||
(
|
||||
"listen_addr = '0.0.0.0:8233'\nnetwork = 'Testnet'",
|
||||
"0.0.0.0:8233",
|
||||
),
|
||||
];
|
||||
|
||||
for (config, value) in fixtures {
|
||||
let config: Config = toml::from_str(config).unwrap();
|
||||
assert_eq!(config.listen_addr.to_string(), value);
|
||||
}
|
||||
}
|
||||
mod vectors;
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
//! Fixed test vectors for zebra-network configuration.
|
||||
|
||||
use static_assertions::const_assert;
|
||||
|
||||
use crate::{
|
||||
constants::{INBOUND_PEER_LIMIT_MULTIPLIER, OUTBOUND_PEER_LIMIT_MULTIPLIER},
|
||||
Config,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn parse_config_listen_addr() {
|
||||
zebra_test::init();
|
||||
|
||||
let fixtures = vec![
|
||||
("listen_addr = '0.0.0.0'", "0.0.0.0:8233"),
|
||||
("listen_addr = '0.0.0.0:9999'", "0.0.0.0:9999"),
|
||||
(
|
||||
"listen_addr = '0.0.0.0'\nnetwork = 'Testnet'",
|
||||
"0.0.0.0:18233",
|
||||
),
|
||||
(
|
||||
"listen_addr = '0.0.0.0:8233'\nnetwork = 'Testnet'",
|
||||
"0.0.0.0:8233",
|
||||
),
|
||||
];
|
||||
|
||||
for (config, value) in fixtures {
|
||||
let config: Config = toml::from_str(config).unwrap();
|
||||
assert_eq!(config.listen_addr.to_string(), value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Make sure the peer connection limits are consistent with each other.
|
||||
#[test]
|
||||
fn ensure_peer_connection_limits_consistent() {
|
||||
zebra_test::init();
|
||||
|
||||
// Zebra should allow more inbound connections, to avoid connection exhaustion
|
||||
const_assert!(INBOUND_PEER_LIMIT_MULTIPLIER > OUTBOUND_PEER_LIMIT_MULTIPLIER);
|
||||
|
||||
let config = Config::default();
|
||||
|
||||
assert!(
|
||||
config.peerset_inbound_connection_limit() - config.peerset_outbound_connection_limit()
|
||||
>= 50,
|
||||
"default config should allow more inbound connections, to avoid connection exhaustion",
|
||||
);
|
||||
}
|
|
@ -13,23 +13,51 @@ use zebra_chain::{
|
|||
serialization::Duration32,
|
||||
};
|
||||
|
||||
/// The fractional bias towards outbound peers in the peer set,
|
||||
/// if connection limits have been reached.
|
||||
/// A multiplier used to calculate the inbound connection limit for the peer set,
|
||||
///
|
||||
/// Inbound and outbound connections are limited based on
|
||||
/// [`Config.peerset_initial_target_size`].
|
||||
/// When it starts up, Zebra opens [`Config.peerset_initial_target_size`]
|
||||
/// outbound connections.
|
||||
///
|
||||
/// The outbound limit is larger than the inbound limit by:
|
||||
/// `Config.peerset_initial_target_size / OUTBOUND_PEER_BIAS_DENOMINATOR`.
|
||||
/// Then it opens additional outbound connections as needed for network requests,
|
||||
/// and accepts inbound connections initiated by other peers.
|
||||
///
|
||||
/// The inbound and outbound connection limits are calculated from:
|
||||
///
|
||||
/// The inbound limit is:
|
||||
/// `Config.peerset_initial_target_size * INBOUND_PEER_LIMIT_MULTIPLIER`.
|
||||
/// (This is similar to `zcashd`'s default inbound limit.)
|
||||
///
|
||||
/// The outbound limit is:
|
||||
/// `Config.peerset_initial_target_size * OUTBOUND_PEER_LIMIT_MULTIPLIER`.
|
||||
/// (This is a bit larger than `zcashd`'s default outbound limit.)
|
||||
///
|
||||
/// # Security
|
||||
///
|
||||
/// This bias helps make sure that Zebra is connected to a majority of peers
|
||||
/// that it has chosen from its [`AddressBook`].
|
||||
/// Each connection requires one inbound slot and one outbound slot, on two different peers.
|
||||
/// But some peers only make outbound connections, because they are behind a firewall,
|
||||
/// or their lister port address is misconfigured.
|
||||
///
|
||||
/// Zebra allows extra inbound connection slots,
|
||||
/// to prevent accidental connection slot exhaustion.
|
||||
/// (`zcashd` also allows a large number of extra inbound slots.)
|
||||
///
|
||||
/// ## Security Tradeoff
|
||||
///
|
||||
/// Since the inbound peer limit is higher than the outbound peer limit,
|
||||
/// Zebra can be connected to a majority of peers
|
||||
/// that it has *not* chosen from its [`AddressBook`].
|
||||
///
|
||||
/// Inbound peer connections are initiated by the remote peer,
|
||||
/// so inbound peer selection is not controlled by the local node.
|
||||
pub const OUTBOUND_PEER_BIAS_DENOMINATOR: usize = 2;
|
||||
/// This means that an attacker can easily become a majority of a node's peers.
|
||||
///
|
||||
/// However, connection exhaustion is a higher priority.
|
||||
pub const INBOUND_PEER_LIMIT_MULTIPLIER: usize = 5;
|
||||
|
||||
/// A multiplier used to calculate the outbound connection limit for the peer set,
|
||||
///
|
||||
/// See [`INBOUND_PEER_LIMIT_MULTIPLIER`] for details.
|
||||
pub const OUTBOUND_PEER_LIMIT_MULTIPLIER: usize = 3;
|
||||
|
||||
/// The buffer size for the peer set.
|
||||
///
|
||||
|
|
Loading…
Reference in New Issue