Add a Setup enum to manage Inbound network setup internal state

This change encodes a bunch of invariants in the type system,
and adds explicit failure states for:
* a closed oneshot,
* bugs in the initialization code.
This commit is contained in:
teor 2021-01-25 16:11:19 +10:00
parent 32b032204a
commit eac4fd181a
1 changed files with 120 additions and 71 deletions

View File

@ -1,5 +1,6 @@
use std::{ use std::{
future::Future, future::Future,
mem,
pin::Pin, pin::Pin,
sync::{Arc, Mutex}, sync::{Arc, Mutex},
task::{Context, Poll}, task::{Context, Poll},
@ -28,8 +29,53 @@ use downloads::Downloads;
type Outbound = Buffer<BoxService<zn::Request, zn::Response, zn::BoxError>, zn::Request>; type Outbound = Buffer<BoxService<zn::Request, zn::Response, zn::BoxError>, zn::Request>;
type State = Buffer<BoxService<zs::Request, zs::Response, zs::BoxError>, zs::Request>; type State = Buffer<BoxService<zs::Request, zs::Response, zs::BoxError>, zs::Request>;
type Verifier = Buffer<BoxService<Arc<Block>, block::Hash, VerifyChainError>, Arc<Block>>; type Verifier = Buffer<BoxService<Arc<Block>, block::Hash, VerifyChainError>, Arc<Block>>;
type InboundDownloads = Downloads<Timeout<Outbound>, Timeout<Verifier>, State>;
pub type SetupData = (Outbound, Arc<Mutex<AddressBook>>); pub type NetworkSetupData = (Outbound, Arc<Mutex<AddressBook>>);
/// Tracks the internal state of the [`Inbound`] service during network setup.
pub enum Setup {
/// Waiting for network setup to complete.
///
/// Requests that depend on Zebra's internal network setup are ignored.
/// Other requests are answered.
AwaitingNetwork {
/// A oneshot channel used to receive the address_book and outbound services
/// after the network is set up.
network_setup: oneshot::Receiver<NetworkSetupData>,
/// A service that verifies downloaded blocks. Given to `downloads`
/// after the network is set up.
verifier: Verifier,
},
/// Network setup is complete.
///
/// All requests are answered.
Initialized {
/// A shared list of peer addresses.
address_book: Arc<Mutex<zn::AddressBook>>,
/// A `futures::Stream` that downloads and verifies gossipped blocks.
downloads: Pin<Box<InboundDownloads>>,
},
/// Temporary state used in the service's internal network initialization
/// code.
///
/// If this state occurs outside the service initialization code, the
/// service panics.
FailedInit,
/// Network setup failed, because the setup channel permanently failed.
/// The service keeps returning readiness errors for every request.
///
/// We keep hold of the closed oneshot, so we can use it to create a
/// new error for each `poll_ready` call.
FailedRecv {
failed_setup: oneshot::Receiver<NetworkSetupData>,
},
}
/// Uses the node state to respond to inbound peer requests. /// Uses the node state to respond to inbound peer requests.
/// ///
@ -53,37 +99,10 @@ pub type SetupData = (Outbound, Arc<Mutex<AddressBook>>);
/// responding to block gossip by attempting to download and validate advertised /// responding to block gossip by attempting to download and validate advertised
/// blocks. /// blocks.
pub struct Inbound { pub struct Inbound {
// invariants: /// Provides network-dependent services, if they are available.
// * Before setup: address_book and downloads are None, and the *_setup members are Some
// * After setup: address_book and downloads are Some, and the *_setup members are None
//
// why not use an enum for the inbound state? because it would mean
// match-wrapping the body of Service::call rather than just expect()ing
// some Options.
// Setup
/// A oneshot channel used to receive the address_book and outbound services
/// after the network is set up.
/// ///
/// `None` after the network is set up. /// Some services are unavailable until Zebra has completed network setup.
network_setup: Option<oneshot::Receiver<SetupData>>, network: Setup,
/// A service that verifies downloaded blocks. Given to `downloads`
/// after the network is set up.
///
/// `None` after the network is set up and `downloads` is created.
verifier_setup: Option<Verifier>,
// Services and Data Stores
/// A shared list of peer addresses.
///
/// `None` until the network is set up.
address_book: Option<Arc<Mutex<zn::AddressBook>>>,
/// A stream that downloads and verifies gossipped blocks.
///
/// `None` until the network is set up.
downloads: Option<Pin<Box<Downloads<Timeout<Outbound>, Timeout<Verifier>, State>>>>,
/// A service that manages cached blockchain state. /// A service that manages cached blockchain state.
state: State, state: State,
@ -91,15 +110,15 @@ pub struct Inbound {
impl Inbound { impl Inbound {
pub fn new( pub fn new(
network_setup: oneshot::Receiver<SetupData>, network_setup: oneshot::Receiver<NetworkSetupData>,
state: State, state: State,
verifier: Verifier, verifier: Verifier,
) -> Self { ) -> Self {
Self { Self {
network_setup: Some(network_setup), network: Setup::AwaitingNetwork {
verifier_setup: Some(verifier), network_setup,
address_book: None, verifier,
downloads: None, },
state, state,
} }
} }
@ -112,44 +131,75 @@ impl Service<zn::Request> for Inbound {
Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send + 'static>>; Pin<Box<dyn Future<Output = Result<Self::Response, Self::Error>> + Send + 'static>>;
fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> { fn poll_ready(&mut self, cx: &mut Context<'_>) -> Poll<Result<(), Self::Error>> {
use oneshot::error::TryRecvError;
// Check whether the network setup is finished, but don't wait for it to // Check whether the network setup is finished, but don't wait for it to
// become ready before reporting readiness. We expect to get it "soon", // become ready before reporting readiness. We expect to get it "soon",
// and reporting unreadiness might cause unwanted load-shedding, since // and reporting unreadiness might cause unwanted load-shedding, since
// the load-shed middleware is unable to distinguish being unready due // the load-shed middleware is unable to distinguish being unready due
// to load from being unready while waiting on setup. // to load from being unready while waiting on setup.
if let Some(mut rx) = self.network_setup.take() { if matches!(self.network, Setup::AwaitingNetwork { .. }) {
use oneshot::error::TryRecvError; // Unfortunately, we can't match, swap, and destructure at the same time
match rx.try_recv() { let mut awaiting_state = Setup::FailedInit;
Ok((outbound, address_book)) => { mem::swap(&mut self.network, &mut awaiting_state);
let verifier = self if let Setup::AwaitingNetwork {
.verifier_setup mut network_setup,
.take() verifier,
.expect("unexpected missing verifier during inbound network setup"); } = awaiting_state
{
match network_setup.try_recv() {
Ok((outbound, address_book)) => {
let downloads = Box::pin(Downloads::new(
Timeout::new(outbound, BLOCK_DOWNLOAD_TIMEOUT),
Timeout::new(verifier, BLOCK_VERIFY_TIMEOUT),
self.state.clone(),
));
self.network = Setup::Initialized {
address_book,
downloads,
};
}
Err(TryRecvError::Empty) => {
// There's no setup data yet, so keep waiting for it
self.network = Setup::AwaitingNetwork {
network_setup,
verifier,
};
}
Err(error @ TryRecvError::Closed) => {
// Mark the service as failed, because network setup failed
error!(?error, "inbound network setup failed");
self.network = Setup::FailedRecv {
failed_setup: network_setup,
};
return Poll::Ready(Err(error.into()));
}
}
}
}
self.address_book = Some(address_book); // Unfortunately, we can't combine these matches into an exhaustive match statement,
self.downloads = Some(Box::pin(Downloads::new( // because they use mutable references, or they depend on the state we've just modified.
Timeout::new(outbound, BLOCK_DOWNLOAD_TIMEOUT),
Timeout::new(verifier, BLOCK_VERIFY_TIMEOUT),
self.state.clone(),
)));
self.network_setup = None; // Make sure we left the network setup in a valid state
} if matches!(self.network, Setup::FailedInit) {
Err(TryRecvError::Empty) => { unreachable!("incomplete Inbound initialization");
self.network_setup = Some(rx); }
}
Err(e @ TryRecvError::Closed) => { // If network setup failed, report service failure
// In this case, report that the service failed, and put the if let Setup::FailedRecv { failed_setup } = &mut self.network {
// failed oneshot back so we'll fail again in case // TryRecvError is not cloneable, so we have to generate a new error from the oneshot,
// poll_ready is called after failure. // rather than re-using a clone of the original error
self.network_setup = Some(rx); let failed_response = failed_setup.try_recv();
return Poll::Ready(Err(e.into())); if let Err(error @ TryRecvError::Closed) = failed_response {
} return Poll::Ready(Err(error.into()));
}; } else {
unreachable!("unexpected response from failed Inbound network setup oneshot");
}
} }
// Clean up completed download tasks, ignoring their results // Clean up completed download tasks, ignoring their results
if let Some(downloads) = self.downloads.as_mut() { if let Setup::Initialized { downloads, .. } = &mut self.network {
while let Poll::Ready(Some(_)) = downloads.as_mut().poll_next(cx) {} while let Poll::Ready(Some(_)) = downloads.as_mut().poll_next(cx) {}
} }
@ -168,22 +218,21 @@ impl Service<zn::Request> for Inbound {
#[instrument(name = "inbound", skip(self, req))] #[instrument(name = "inbound", skip(self, req))]
fn call(&mut self, req: zn::Request) -> Self::Future { fn call(&mut self, req: zn::Request) -> Self::Future {
match req { match req {
zn::Request::Peers => match self.address_book.as_ref() { zn::Request::Peers => {
Some(addrs) => { if let Setup::Initialized { address_book, .. } = &self.network {
// We could truncate the list to try to not reveal our entire // We could truncate the list to try to not reveal our entire
// peer set. But because we don't monitor repeated requests, // peer set. But because we don't monitor repeated requests,
// this wouldn't actually achieve anything, because a crawler // this wouldn't actually achieve anything, because a crawler
// could just repeatedly query it. // could just repeatedly query it.
let mut peers = addrs.lock().unwrap().sanitized(); let mut peers = address_book.lock().unwrap().sanitized();
const MAX_ADDR: usize = 1000; // bitcoin protocol constant const MAX_ADDR: usize = 1000; // bitcoin protocol constant
peers.truncate(MAX_ADDR); peers.truncate(MAX_ADDR);
async { Ok(zn::Response::Peers(peers)) }.boxed() async { Ok(zn::Response::Peers(peers)) }.boxed()
} } else {
None => {
info!("ignoring `Peers` request from remote peer during network setup"); info!("ignoring `Peers` request from remote peer during network setup");
async { Ok(zn::Response::Nil) }.boxed() async { Ok(zn::Response::Nil) }.boxed()
} }
}, }
zn::Request::BlocksByHash(hashes) => { zn::Request::BlocksByHash(hashes) => {
// Correctness: // Correctness:
// //
@ -246,7 +295,7 @@ impl Service<zn::Request> for Inbound {
async { Ok(zn::Response::Nil) }.boxed() async { Ok(zn::Response::Nil) }.boxed()
} }
zn::Request::AdvertiseBlock(hash) => { zn::Request::AdvertiseBlock(hash) => {
if let Some(downloads) = self.downloads.as_mut() { if let Setup::Initialized { downloads, .. } = &mut self.network {
downloads.download_and_verify(hash); downloads.download_and_verify(hash);
} else { } else {
info!( info!(