Add a Setup enum to manage Inbound network setup internal state
This change encodes a bunch of invariants in the type system, and adds explicit failure states for: * a closed oneshot, * bugs in the initialization code.
This commit is contained in:
parent
32b032204a
commit
eac4fd181a
|
@ -1,5 +1,6 @@
|
||||||
use std::{
|
use std::{
|
||||||
future::Future,
|
future::Future,
|
||||||
|
mem,
|
||||||
pin::Pin,
|
pin::Pin,
|
||||||
sync::{Arc, Mutex},
|
sync::{Arc, Mutex},
|
||||||
task::{Context, Poll},
|
task::{Context, Poll},
|
||||||
|
@ -28,8 +29,53 @@ use downloads::Downloads;
|
||||||
type Outbound = Buffer<BoxService<zn::Request, zn::Response, zn::BoxError>, zn::Request>;
|
type Outbound = Buffer<BoxService<zn::Request, zn::Response, zn::BoxError>, zn::Request>;
|
||||||
type State = Buffer<BoxService<zs::Request, zs::Response, zs::BoxError>, zs::Request>;
|
type State = Buffer<BoxService<zs::Request, zs::Response, zs::BoxError>, zs::Request>;
|
||||||
type Verifier = Buffer<BoxService<Arc<Block>, block::Hash, VerifyChainError>, Arc<Block>>;
|
type Verifier = Buffer<BoxService<Arc<Block>, block::Hash, VerifyChainError>, Arc<Block>>;
|
||||||
|
type InboundDownloads = Downloads<Timeout<Outbound>, Timeout<Verifier>, State>;
|
||||||
|
|
||||||
pub type SetupData = (Outbound, Arc<Mutex<AddressBook>>);
|
pub type NetworkSetupData = (Outbound, Arc<Mutex<AddressBook>>);
|
||||||
|
|
||||||
|
/// Tracks the internal state of the [`Inbound`] service during network setup.
|
||||||
|
pub enum Setup {
|
||||||
|
/// Waiting for network setup to complete.
|
||||||
|
///
|
||||||
|
/// Requests that depend on Zebra's internal network setup are ignored.
|
||||||
|
/// Other requests are answered.
|
||||||
|
AwaitingNetwork {
|
||||||
|
/// A oneshot channel used to receive the address_book and outbound services
|
||||||
|
/// after the network is set up.
|
||||||
|
network_setup: oneshot::Receiver<NetworkSetupData>,
|
||||||
|
|
||||||
|
/// A service that verifies downloaded blocks. Given to `downloads`
|
||||||
|
/// after the network is set up.
|
||||||
|
verifier: Verifier,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Network setup is complete.
|
||||||
|
///
|
||||||
|
/// All requests are answered.
|
||||||
|
Initialized {
|
||||||
|
/// A shared list of peer addresses.
|
||||||
|
address_book: Arc<Mutex<zn::AddressBook>>,
|
||||||
|
|
||||||
|
/// A `futures::Stream` that downloads and verifies gossipped blocks.
|
||||||
|
downloads: Pin<Box<InboundDownloads>>,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Temporary state used in the service's internal network initialization
|
||||||
|
/// code.
|
||||||
|
///
|
||||||
|
/// If this state occurs outside the service initialization code, the
|
||||||
|
/// service panics.
|
||||||
|
FailedInit,
|
||||||
|
|
||||||
|
/// Network setup failed, because the setup channel permanently failed.
|
||||||
|
/// The service keeps returning readiness errors for every request.
|
||||||
|
///
|
||||||
|
/// We keep hold of the closed oneshot, so we can use it to create a
|
||||||
|
/// new error for each `poll_ready` call.
|
||||||
|
FailedRecv {
|
||||||
|
failed_setup: oneshot::Receiver<NetworkSetupData>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
/// Uses the node state to respond to inbound peer requests.
|
/// Uses the node state to respond to inbound peer requests.
|
||||||
///
|
///
|
||||||
|
@ -53,37 +99,10 @@ pub type SetupData = (Outbound, Arc<Mutex<AddressBook>>);
|
||||||
/// responding to block gossip by attempting to download and validate advertised
|
/// responding to block gossip by attempting to download and validate advertised
|
||||||
/// blocks.
|
/// blocks.
|
||||||
pub struct Inbound {
|
pub struct Inbound {
|
||||||
// invariants:
|
/// Provides network-dependent services, if they are available.
|
||||||
// * Before setup: address_book and downloads are None, and the *_setup members are Some
|
|
||||||
// * After setup: address_book and downloads are Some, and the *_setup members are None
|
|
||||||
//
|
|
||||||
// why not use an enum for the inbound state? because it would mean
|
|
||||||
// match-wrapping the body of Service::call rather than just expect()ing
|
|
||||||
// some Options.
|
|
||||||
|
|
||||||
// Setup
|
|
||||||
/// A oneshot channel used to receive the address_book and outbound services
|
|
||||||
/// after the network is set up.
|
|
||||||
///
|
///
|
||||||
/// `None` after the network is set up.
|
/// Some services are unavailable until Zebra has completed network setup.
|
||||||
network_setup: Option<oneshot::Receiver<SetupData>>,
|
network: Setup,
|
||||||
|
|
||||||
/// A service that verifies downloaded blocks. Given to `downloads`
|
|
||||||
/// after the network is set up.
|
|
||||||
///
|
|
||||||
/// `None` after the network is set up and `downloads` is created.
|
|
||||||
verifier_setup: Option<Verifier>,
|
|
||||||
|
|
||||||
// Services and Data Stores
|
|
||||||
/// A shared list of peer addresses.
|
|
||||||
///
|
|
||||||
/// `None` until the network is set up.
|
|
||||||
address_book: Option<Arc<Mutex<zn::AddressBook>>>,
|
|
||||||
|
|
||||||
/// A stream that downloads and verifies gossipped blocks.
|
|
||||||
///
|
|
||||||
/// `None` until the network is set up.
|
|
||||||
downloads: Option<Pin<Box<Downloads<Timeout<Outbound>, Timeout<Verifier>, State>>>>,
|
|
||||||
|
|
||||||
/// A service that manages cached blockchain state.
|
/// A service that manages cached blockchain state.
|
||||||
state: State,
|
state: State,
|
||||||
|
@ -91,15 +110,15 @@ pub struct Inbound {
|
||||||
|
|
||||||
impl Inbound {
|
impl Inbound {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
network_setup: oneshot::Receiver<SetupData>,
|
network_setup: oneshot::Receiver<NetworkSetupData>,
|
||||||
state: State,
|
state: State,
|
||||||
verifier: Verifier,
|
verifier: Verifier,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
network_setup: Some(network_setup),
|
network: Setup::AwaitingNetwork {
|
||||||
verifier_setup: Some(verifier),
|
network_setup,
|
||||||
address_book: None,
|
verifier,
|
||||||
downloads: None,
|
},
|
||||||
state,
|
state,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -112,44 +131,75 @@ impl Service<zn::Request> for Inbound {
|
||||||
Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send + 'static>>;
|
Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send + 'static>>;
|
||||||
|
|
||||||
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
|
||||||
|
use oneshot::error::TryRecvError;
|
||||||
|
|
||||||
// Check whether the network setup is finished, but don't wait for it to
|
// Check whether the network setup is finished, but don't wait for it to
|
||||||
// become ready before reporting readiness. We expect to get it "soon",
|
// become ready before reporting readiness. We expect to get it "soon",
|
||||||
// and reporting unreadiness might cause unwanted load-shedding, since
|
// and reporting unreadiness might cause unwanted load-shedding, since
|
||||||
// the load-shed middleware is unable to distinguish being unready due
|
// the load-shed middleware is unable to distinguish being unready due
|
||||||
// to load from being unready while waiting on setup.
|
// to load from being unready while waiting on setup.
|
||||||
if let Some(mut rx) = self.network_setup.take() {
|
if matches!(self.network, Setup::AwaitingNetwork { .. }) {
|
||||||
use oneshot::error::TryRecvError;
|
// Unfortunately, we can't match, swap, and destructure at the same time
|
||||||
match rx.try_recv() {
|
let mut awaiting_state = Setup::FailedInit;
|
||||||
|
mem::swap(&mut self.network, &mut awaiting_state);
|
||||||
|
if let Setup::AwaitingNetwork {
|
||||||
|
mut network_setup,
|
||||||
|
verifier,
|
||||||
|
} = awaiting_state
|
||||||
|
{
|
||||||
|
match network_setup.try_recv() {
|
||||||
Ok((outbound, address_book)) => {
|
Ok((outbound, address_book)) => {
|
||||||
let verifier = self
|
let downloads = Box::pin(Downloads::new(
|
||||||
.verifier_setup
|
|
||||||
.take()
|
|
||||||
.expect("unexpected missing verifier during inbound network setup");
|
|
||||||
|
|
||||||
self.address_book = Some(address_book);
|
|
||||||
self.downloads = Some(Box::pin(Downloads::new(
|
|
||||||
Timeout::new(outbound, BLOCK_DOWNLOAD_TIMEOUT),
|
Timeout::new(outbound, BLOCK_DOWNLOAD_TIMEOUT),
|
||||||
Timeout::new(verifier, BLOCK_VERIFY_TIMEOUT),
|
Timeout::new(verifier, BLOCK_VERIFY_TIMEOUT),
|
||||||
self.state.clone(),
|
self.state.clone(),
|
||||||
)));
|
));
|
||||||
|
self.network = Setup::Initialized {
|
||||||
self.network_setup = None;
|
address_book,
|
||||||
|
downloads,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
Err(TryRecvError::Empty) => {
|
Err(TryRecvError::Empty) => {
|
||||||
self.network_setup = Some(rx);
|
// There's no setup data yet, so keep waiting for it
|
||||||
}
|
self.network = Setup::AwaitingNetwork {
|
||||||
Err(e @ TryRecvError::Closed) => {
|
network_setup,
|
||||||
// In this case, report that the service failed, and put the
|
verifier,
|
||||||
// failed oneshot back so we'll fail again in case
|
|
||||||
// poll_ready is called after failure.
|
|
||||||
self.network_setup = Some(rx);
|
|
||||||
return Poll::Ready(Err(e.into()));
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
Err(error @ TryRecvError::Closed) => {
|
||||||
|
// Mark the service as failed, because network setup failed
|
||||||
|
error!(?error, "inbound network setup failed");
|
||||||
|
self.network = Setup::FailedRecv {
|
||||||
|
failed_setup: network_setup,
|
||||||
|
};
|
||||||
|
return Poll::Ready(Err(error.into()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unfortunately, we can't combine these matches into an exhaustive match statement,
|
||||||
|
// because they use mutable references, or they depend on the state we've just modified.
|
||||||
|
|
||||||
|
// Make sure we left the network setup in a valid state
|
||||||
|
if matches!(self.network, Setup::FailedInit) {
|
||||||
|
unreachable!("incomplete Inbound initialization");
|
||||||
|
}
|
||||||
|
|
||||||
|
// If network setup failed, report service failure
|
||||||
|
if let Setup::FailedRecv { failed_setup } = &mut self.network {
|
||||||
|
// TryRecvError is not cloneable, so we have to generate a new error from the oneshot,
|
||||||
|
// rather than re-using a clone of the original error
|
||||||
|
let failed_response = failed_setup.try_recv();
|
||||||
|
if let Err(error @ TryRecvError::Closed) = failed_response {
|
||||||
|
return Poll::Ready(Err(error.into()));
|
||||||
|
} else {
|
||||||
|
unreachable!("unexpected response from failed Inbound network setup oneshot");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Clean up completed download tasks, ignoring their results
|
// Clean up completed download tasks, ignoring their results
|
||||||
if let Some(downloads) = self.downloads.as_mut() {
|
if let Setup::Initialized { downloads, .. } = &mut self.network {
|
||||||
while let Poll::Ready(Some(_)) = downloads.as_mut().poll_next(cx) {}
|
while let Poll::Ready(Some(_)) = downloads.as_mut().poll_next(cx) {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,22 +218,21 @@ impl Service<zn::Request> for Inbound {
|
||||||
#[instrument(name = "inbound", skip(self, req))]
|
#[instrument(name = "inbound", skip(self, req))]
|
||||||
fn call(&mut self, req: zn::Request) -> Self::Future {
|
fn call(&mut self, req: zn::Request) -> Self::Future {
|
||||||
match req {
|
match req {
|
||||||
zn::Request::Peers => match self.address_book.as_ref() {
|
zn::Request::Peers => {
|
||||||
Some(addrs) => {
|
if let Setup::Initialized { address_book, .. } = &self.network {
|
||||||
// We could truncate the list to try to not reveal our entire
|
// We could truncate the list to try to not reveal our entire
|
||||||
// peer set. But because we don't monitor repeated requests,
|
// peer set. But because we don't monitor repeated requests,
|
||||||
// this wouldn't actually achieve anything, because a crawler
|
// this wouldn't actually achieve anything, because a crawler
|
||||||
// could just repeatedly query it.
|
// could just repeatedly query it.
|
||||||
let mut peers = addrs.lock().unwrap().sanitized();
|
let mut peers = address_book.lock().unwrap().sanitized();
|
||||||
const MAX_ADDR: usize = 1000; // bitcoin protocol constant
|
const MAX_ADDR: usize = 1000; // bitcoin protocol constant
|
||||||
peers.truncate(MAX_ADDR);
|
peers.truncate(MAX_ADDR);
|
||||||
async { Ok(zn::Response::Peers(peers)) }.boxed()
|
async { Ok(zn::Response::Peers(peers)) }.boxed()
|
||||||
}
|
} else {
|
||||||
None => {
|
|
||||||
info!("ignoring `Peers` request from remote peer during network setup");
|
info!("ignoring `Peers` request from remote peer during network setup");
|
||||||
async { Ok(zn::Response::Nil) }.boxed()
|
async { Ok(zn::Response::Nil) }.boxed()
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
zn::Request::BlocksByHash(hashes) => {
|
zn::Request::BlocksByHash(hashes) => {
|
||||||
// Correctness:
|
// Correctness:
|
||||||
//
|
//
|
||||||
|
@ -246,7 +295,7 @@ impl Service<zn::Request> for Inbound {
|
||||||
async { Ok(zn::Response::Nil) }.boxed()
|
async { Ok(zn::Response::Nil) }.boxed()
|
||||||
}
|
}
|
||||||
zn::Request::AdvertiseBlock(hash) => {
|
zn::Request::AdvertiseBlock(hash) => {
|
||||||
if let Some(downloads) = self.downloads.as_mut() {
|
if let Setup::Initialized { downloads, .. } = &mut self.network {
|
||||||
downloads.download_and_verify(hash);
|
downloads.download_and_verify(hash);
|
||||||
} else {
|
} else {
|
||||||
info!(
|
info!(
|
||||||
|
|
Loading…
Reference in New Issue