Add a Setup enum to manage Inbound network setup internal state

This change encodes a bunch of invariants in the type system,
and adds explicit failure states for:
* a closed oneshot,
* bugs in the initialization code.
This commit is contained in:
teor 2021-01-25 16:11:19 +10:00
parent 32b032204a
commit eac4fd181a
1 changed files with 120 additions and 71 deletions

View File

@ -1,5 +1,6 @@
use std::{
future::Future,
mem,
pin::Pin,
sync::{Arc, Mutex},
task::{Context, Poll},
@ -28,8 +29,53 @@ use downloads::Downloads;
type Outbound = Buffer<BoxService<zn::Request, zn::Response, zn::BoxError>, zn::Request>;
type State = Buffer<BoxService<zs::Request, zs::Response, zs::BoxError>, zs::Request>;
type Verifier = Buffer<BoxService<Arc<Block>, block::Hash, VerifyChainError>, Arc<Block>>;
type InboundDownloads = Downloads<Timeout<Outbound>, Timeout<Verifier>, State>;
pub type SetupData = (Outbound, Arc<Mutex<AddressBook>>);
pub type NetworkSetupData = (Outbound, Arc<Mutex<AddressBook>>);
/// Tracks the internal state of the [`Inbound`] service during network setup.
pub enum Setup {
/// Waiting for network setup to complete.
///
/// Requests that depend on Zebra's internal network setup are ignored.
/// Other requests are answered.
AwaitingNetwork {
/// A oneshot channel used to receive the address_book and outbound services
/// after the network is set up.
network_setup: oneshot::Receiver<NetworkSetupData>,
/// A service that verifies downloaded blocks. Given to `downloads`
/// after the network is set up.
verifier: Verifier,
},
/// Network setup is complete.
///
/// All requests are answered.
Initialized {
/// A shared list of peer addresses.
address_book: Arc<Mutex<zn::AddressBook>>,
/// A `futures::Stream` that downloads and verifies gossipped blocks.
downloads: Pin<Box<InboundDownloads>>,
},
/// Temporary state used in the service's internal network initialization
/// code.
///
/// If this state occurs outside the service initialization code, the
/// service panics.
FailedInit,
/// Network setup failed, because the setup channel permanently failed.
/// The service keeps returning readiness errors for every request.
///
/// We keep hold of the closed oneshot, so we can use it to create a
/// new error for each `poll_ready` call.
FailedRecv {
failed_setup: oneshot::Receiver<NetworkSetupData>,
},
}
/// Uses the node state to respond to inbound peer requests.
///
@ -53,37 +99,10 @@ pub type SetupData = (Outbound, Arc<Mutex<AddressBook>>);
/// responding to block gossip by attempting to download and validate advertised
/// blocks.
pub struct Inbound {
// invariants:
// * Before setup: address_book and downloads are None, and the *_setup members are Some
// * After setup: address_book and downloads are Some, and the *_setup members are None
//
// why not use an enum for the inbound state? because it would mean
// match-wrapping the body of Service::call rather than just expect()ing
// some Options.
// Setup
/// A oneshot channel used to receive the address_book and outbound services
/// after the network is set up.
/// Provides network-dependent services, if they are available.
///
/// `None` after the network is set up.
network_setup: Option<oneshot::Receiver<SetupData>>,
/// A service that verifies downloaded blocks. Given to `downloads`
/// after the network is set up.
///
/// `None` after the network is set up and `downloads` is created.
verifier_setup: Option<Verifier>,
// Services and Data Stores
/// A shared list of peer addresses.
///
/// `None` until the network is set up.
address_book: Option<Arc<Mutex<zn::AddressBook>>>,
/// A stream that downloads and verifies gossipped blocks.
///
/// `None` until the network is set up.
downloads: Option<Pin<Box<Downloads<Timeout<Outbound>, Timeout<Verifier>, State>>>>,
/// Some services are unavailable until Zebra has completed network setup.
network: Setup,
/// A service that manages cached blockchain state.
state: State,
@ -91,15 +110,15 @@ pub struct Inbound {
impl Inbound {
pub fn new(
network_setup: oneshot::Receiver<SetupData>,
network_setup: oneshot::Receiver<NetworkSetupData>,
state: State,
verifier: Verifier,
) -> Self {
Self {
network_setup: Some(network_setup),
verifier_setup: Some(verifier),
address_book: None,
downloads: None,
network: Setup::AwaitingNetwork {
network_setup,
verifier,
},
state,
}
}
@ -112,44 +131,75 @@ impl Service<zn::Request> for Inbound {
Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send + 'static>>;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
use oneshot::error::TryRecvError;
// Check whether the network setup is finished, but don't wait for it to
// become ready before reporting readiness. We expect to get it "soon",
// and reporting unreadiness might cause unwanted load-shedding, since
// the load-shed middleware is unable to distinguish being unready due
// to load from being unready while waiting on setup.
if let Some(mut rx) = self.network_setup.take() {
use oneshot::error::TryRecvError;
match rx.try_recv() {
Ok((outbound, address_book)) => {
let verifier = self
.verifier_setup
.take()
.expect("unexpected missing verifier during inbound network setup");
if matches!(self.network, Setup::AwaitingNetwork { .. }) {
// Unfortunately, we can't match, swap, and destructure at the same time
let mut awaiting_state = Setup::FailedInit;
mem::swap(&mut self.network, &mut awaiting_state);
if let Setup::AwaitingNetwork {
mut network_setup,
verifier,
} = awaiting_state
{
match network_setup.try_recv() {
Ok((outbound, address_book)) => {
let downloads = Box::pin(Downloads::new(
Timeout::new(outbound, BLOCK_DOWNLOAD_TIMEOUT),
Timeout::new(verifier, BLOCK_VERIFY_TIMEOUT),
self.state.clone(),
));
self.network = Setup::Initialized {
address_book,
downloads,
};
}
Err(TryRecvError::Empty) => {
// There's no setup data yet, so keep waiting for it
self.network = Setup::AwaitingNetwork {
network_setup,
verifier,
};
}
Err(error @ TryRecvError::Closed) => {
// Mark the service as failed, because network setup failed
error!(?error, "inbound network setup failed");
self.network = Setup::FailedRecv {
failed_setup: network_setup,
};
return Poll::Ready(Err(error.into()));
}
}
}
}
self.address_book = Some(address_book);
self.downloads = Some(Box::pin(Downloads::new(
Timeout::new(outbound, BLOCK_DOWNLOAD_TIMEOUT),
Timeout::new(verifier, BLOCK_VERIFY_TIMEOUT),
self.state.clone(),
)));
// Unfortunately, we can't combine these matches into an exhaustive match statement,
// because they use mutable references, or they depend on the state we've just modified.
self.network_setup = None;
}
Err(TryRecvError::Empty) => {
self.network_setup = Some(rx);
}
Err(e @ TryRecvError::Closed) => {
// In this case, report that the service failed, and put the
// failed oneshot back so we'll fail again in case
// poll_ready is called after failure.
self.network_setup = Some(rx);
return Poll::Ready(Err(e.into()));
}
};
// Make sure we left the network setup in a valid state
if matches!(self.network, Setup::FailedInit) {
unreachable!("incomplete Inbound initialization");
}
// If network setup failed, report service failure
if let Setup::FailedRecv { failed_setup } = &mut self.network {
// TryRecvError is not cloneable, so we have to generate a new error from the oneshot,
// rather than re-using a clone of the original error
let failed_response = failed_setup.try_recv();
if let Err(error @ TryRecvError::Closed) = failed_response {
return Poll::Ready(Err(error.into()));
} else {
unreachable!("unexpected response from failed Inbound network setup oneshot");
}
}
// Clean up completed download tasks, ignoring their results
if let Some(downloads) = self.downloads.as_mut() {
if let Setup::Initialized { downloads, .. } = &mut self.network {
while let Poll::Ready(Some(_)) = downloads.as_mut().poll_next(cx) {}
}
@ -168,22 +218,21 @@ impl Service<zn::Request> for Inbound {
#[instrument(name = "inbound", skip(self, req))]
fn call(&mut self, req: zn::Request) -> Self::Future {
match req {
zn::Request::Peers => match self.address_book.as_ref() {
Some(addrs) => {
zn::Request::Peers => {
if let Setup::Initialized { address_book, .. } = &self.network {
// We could truncate the list to try to not reveal our entire
// peer set. But because we don't monitor repeated requests,
// this wouldn't actually achieve anything, because a crawler
// could just repeatedly query it.
let mut peers = addrs.lock().unwrap().sanitized();
let mut peers = address_book.lock().unwrap().sanitized();
const MAX_ADDR: usize = 1000; // bitcoin protocol constant
peers.truncate(MAX_ADDR);
async { Ok(zn::Response::Peers(peers)) }.boxed()
}
None => {
} else {
info!("ignoring `Peers` request from remote peer during network setup");
async { Ok(zn::Response::Nil) }.boxed()
}
},
}
zn::Request::BlocksByHash(hashes) => {
// Correctness:
//
@ -246,7 +295,7 @@ impl Service<zn::Request> for Inbound {
async { Ok(zn::Response::Nil) }.boxed()
}
zn::Request::AdvertiseBlock(hash) => {
if let Some(downloads) = self.downloads.as_mut() {
if let Setup::Initialized { downloads, .. } = &mut self.network {
downloads.download_and_verify(hash);
} else {
info!(