Add height and hash info to syncer errors (#4287)

This commit is contained in:
teor 2022-05-11 16:51:06 +10:00 committed by GitHub
parent 17c5e9ad6f
commit fee10ae014
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 119 additions and 79 deletions

View File

@ -866,28 +866,31 @@ where
fn should_restart_sync(e: &BlockDownloadVerifyError) -> bool { fn should_restart_sync(e: &BlockDownloadVerifyError) -> bool {
match e { match e {
// Structural matches: downcasts // Structural matches: downcasts
BlockDownloadVerifyError::Invalid(VerifyChainError::Checkpoint( BlockDownloadVerifyError::Invalid {
VerifyCheckpointError::AlreadyVerified { .. }, error: VerifyChainError::Checkpoint(VerifyCheckpointError::AlreadyVerified { .. }),
)) => { ..
} => {
debug!(error = ?e, "block was already verified, possibly from a previous sync run, continuing"); debug!(error = ?e, "block was already verified, possibly from a previous sync run, continuing");
false false
} }
BlockDownloadVerifyError::Invalid(VerifyChainError::Block( BlockDownloadVerifyError::Invalid {
VerifyBlockError::Block { error:
source: BlockError::AlreadyInChain(_, _), VerifyChainError::Block(VerifyBlockError::Block {
}, source: BlockError::AlreadyInChain(_, _),
)) => { }),
..
} => {
debug!(error = ?e, "block is already in chain, possibly from a previous sync run, continuing"); debug!(error = ?e, "block is already in chain, possibly from a previous sync run, continuing");
false false
} }
// Structural matches: direct // Structural matches: direct
BlockDownloadVerifyError::CancelledDuringDownload BlockDownloadVerifyError::CancelledDuringDownload { .. }
| BlockDownloadVerifyError::CancelledDuringVerification => { | BlockDownloadVerifyError::CancelledDuringVerification { .. } => {
debug!(error = ?e, "block verification was cancelled, continuing"); debug!(error = ?e, "block verification was cancelled, continuing");
false false
} }
BlockDownloadVerifyError::BehindTipHeightLimit => { BlockDownloadVerifyError::BehindTipHeightLimit { .. } => {
debug!( debug!(
error = ?e, error = ?e,
"block height is behind the current state tip, \ "block height is behind the current state tip, \
@ -905,15 +908,16 @@ where
} }
// String matches // String matches
BlockDownloadVerifyError::Invalid(VerifyChainError::Block( BlockDownloadVerifyError::Invalid {
VerifyBlockError::Commit(ref source), error: VerifyChainError::Block(VerifyBlockError::Commit(ref source)),
)) if format!("{:?}", source).contains("block is already committed to the state") => { ..
} if format!("{:?}", source).contains("block is already committed to the state") => {
// TODO: improve this by checking the type (#2908) // TODO: improve this by checking the type (#2908)
debug!(error = ?e, "block is already committed, possibly from a previous sync run, continuing"); debug!(error = ?e, "block is already committed, possibly from a previous sync run, continuing");
false false
} }
BlockDownloadVerifyError::DownloadFailed(ref source) BlockDownloadVerifyError::DownloadFailed { ref error, .. }
if format!("{:?}", source).contains("NotFound") => if format!("{:?}", error).contains("NotFound") =>
{ {
// Covers these errors: // Covers these errors:
// - NotFoundResponse // - NotFoundResponse

View File

@ -62,35 +62,69 @@ impl<Request: Clone> hedge::Policy<Request> for AlwaysHedge {
#[derive(Error, Debug)] #[derive(Error, Debug)]
#[allow(dead_code)] #[allow(dead_code)]
pub enum BlockDownloadVerifyError { pub enum BlockDownloadVerifyError {
#[error("error from the network service")] #[error("permanent readiness error from the network service: {error:?}")]
NetworkError(#[source] BoxError), NetworkServiceError {
#[source]
error: BoxError,
},
#[error("error from the verifier service")] #[error("permanent readiness error from the verifier service: {error:?}")]
VerifierError(#[source] BoxError), VerifierServiceError {
#[source]
error: BoxError,
},
#[error("duplicate block hash queued for download: {hash:?}")] #[error("duplicate block hash queued for download: {hash:?}")]
DuplicateBlockQueuedForDownload { hash: block::Hash }, DuplicateBlockQueuedForDownload { hash: block::Hash },
#[error("error downloading block")] #[error("error downloading block: {error:?} {hash:?}")]
DownloadFailed(#[source] BoxError), DownloadFailed {
#[source]
error: BoxError,
hash: block::Hash,
},
#[error("downloaded block was too far ahead of the chain tip")] #[error("downloaded block was too far ahead of the chain tip: {height:?} {hash:?}")]
AboveLookaheadHeightLimit, AboveLookaheadHeightLimit {
height: block::Height,
hash: block::Hash,
},
#[error("downloaded block was too far behind the chain tip")] #[error("downloaded block was too far behind the chain tip: {height:?} {hash:?}")]
BehindTipHeightLimit, BehindTipHeightLimit {
height: block::Height,
hash: block::Hash,
},
#[error("downloaded block had an invalid height")] #[error("downloaded block had an invalid height: {hash:?}")]
InvalidHeight, InvalidHeight { hash: block::Hash },
#[error("block did not pass consensus validation")] #[error("block failed consensus validation: {error:?} {height:?} {hash:?}")]
Invalid(#[from] zebra_consensus::chain::VerifyChainError), Invalid {
#[source]
error: zebra_consensus::chain::VerifyChainError,
height: block::Height,
hash: block::Hash,
},
#[error("block download / verification was cancelled during download")] #[error("block validation request failed: {error:?} {height:?} {hash:?}")]
CancelledDuringDownload, ValidationRequestError {
#[source]
error: BoxError,
height: block::Height,
hash: block::Hash,
},
#[error("block download / verification was cancelled during verification")] #[error("block download & verification was cancelled during download: {hash:?}")]
CancelledDuringVerification, CancelledDuringDownload { hash: block::Hash },
#[error(
"block download & verification was cancelled during verification: {height:?} {hash:?}"
)]
CancelledDuringVerification {
height: block::Height,
hash: block::Hash,
},
} }
/// Represents a [`Stream`] of download and verification tasks during chain sync. /// Represents a [`Stream`] of download and verification tasks during chain sync.
@ -236,7 +270,7 @@ where
.network .network
.ready() .ready()
.await .await
.map_err(BlockDownloadVerifyError::NetworkError)? .map_err(|error| BlockDownloadVerifyError::NetworkServiceError { error })?
.call(zn::Request::BlocksByHash(std::iter::once(hash).collect())); .call(zn::Request::BlocksByHash(std::iter::once(hash).collect()));
// This oneshot is used to signal cancellation to the download task. // This oneshot is used to signal cancellation to the download task.
@ -254,9 +288,9 @@ where
_ = &mut cancel_rx => { _ = &mut cancel_rx => {
trace!("task cancelled prior to download completion"); trace!("task cancelled prior to download completion");
metrics::counter!("sync.cancelled.download.count", 1); metrics::counter!("sync.cancelled.download.count", 1);
return Err(BlockDownloadVerifyError::CancelledDuringDownload) return Err(BlockDownloadVerifyError::CancelledDuringDownload { hash })
} }
rsp = block_req => rsp.map_err(BlockDownloadVerifyError::DownloadFailed)?, rsp = block_req => rsp.map_err(|error| BlockDownloadVerifyError::DownloadFailed { error, hash})?,
}; };
let block = if let zn::Response::Blocks(blocks) = rsp { let block = if let zn::Response::Blocks(blocks) = rsp {
@ -310,43 +344,8 @@ where
}) })
.unwrap_or(block::Height(0)); .unwrap_or(block::Height(0));
if let Some(block_height) = block.coinbase_height() { let block_height = if let Some(block_height) = block.coinbase_height() {
if block_height > max_lookahead_height { block_height
info!(
?hash,
?block_height,
?tip_height,
?max_lookahead_height,
lookahead_limit = ?lookahead_limit,
"synced block height too far ahead of the tip: dropped downloaded block. \
Hint: Try increasing the value of the lookahead_limit field \
in the sync section of the configuration file."
);
metrics::counter!("sync.max.height.limit.dropped.block.count", 1);
// This error should be very rare during normal operation.
//
// We need to reset the syncer on this error,
// to allow the verifier and state to catch up,
// or prevent it following a bad chain.
//
// If we don't reset the syncer on this error,
// it will continue downloading blocks from a bad chain,
// (or blocks far ahead of the current state tip).
Err(BlockDownloadVerifyError::AboveLookaheadHeightLimit)?;
} else if block_height < min_accepted_height {
debug!(
?hash,
?block_height,
?tip_height,
?min_accepted_height,
behind_tip_limit = ?zs::MAX_BLOCK_REORG_HEIGHT,
"synced block height behind the finalized tip: dropped downloaded block"
);
metrics::counter!("gossip.min.height.limit.dropped.block.count", 1);
Err(BlockDownloadVerifyError::BehindTipHeightLimit)?;
}
} else { } else {
debug!( debug!(
?hash, ?hash,
@ -354,13 +353,50 @@ where
); );
metrics::counter!("sync.no.height.dropped.block.count", 1); metrics::counter!("sync.no.height.dropped.block.count", 1);
Err(BlockDownloadVerifyError::InvalidHeight)?; return Err(BlockDownloadVerifyError::InvalidHeight { hash });
};
if block_height > max_lookahead_height {
info!(
?hash,
?block_height,
?tip_height,
?max_lookahead_height,
lookahead_limit = ?lookahead_limit,
"synced block height too far ahead of the tip: dropped downloaded block. \
Hint: Try increasing the value of the lookahead_limit field \
in the sync section of the configuration file."
);
metrics::counter!("sync.max.height.limit.dropped.block.count", 1);
// This error should be very rare during normal operation.
//
// We need to reset the syncer on this error,
// to allow the verifier and state to catch up,
// or prevent it following a bad chain.
//
// If we don't reset the syncer on this error,
// it will continue downloading blocks from a bad chain,
// (or blocks far ahead of the current state tip).
Err(BlockDownloadVerifyError::AboveLookaheadHeightLimit { height: block_height, hash })?;
} else if block_height < min_accepted_height {
debug!(
?hash,
?block_height,
?tip_height,
?min_accepted_height,
behind_tip_limit = ?zs::MAX_BLOCK_REORG_HEIGHT,
"synced block height behind the finalized tip: dropped downloaded block"
);
metrics::counter!("gossip.min.height.limit.dropped.block.count", 1);
Err(BlockDownloadVerifyError::BehindTipHeightLimit { height: block_height, hash })?;
} }
let rsp = verifier let rsp = verifier
.ready() .ready()
.await .await
.map_err(BlockDownloadVerifyError::VerifierError)? .map_err(|error| BlockDownloadVerifyError::VerifierServiceError { error })?
.call(block); .call(block);
// Prefer the cancel handle if both are ready. // Prefer the cancel handle if both are ready.
let verification = tokio::select! { let verification = tokio::select! {
@ -368,7 +404,7 @@ where
_ = &mut cancel_rx => { _ = &mut cancel_rx => {
trace!("task cancelled prior to verification"); trace!("task cancelled prior to verification");
metrics::counter!("sync.cancelled.verify.count", 1); metrics::counter!("sync.cancelled.verify.count", 1);
return Err(BlockDownloadVerifyError::CancelledDuringVerification) return Err(BlockDownloadVerifyError::CancelledDuringVerification { height: block_height, hash })
} }
verification = rsp => verification, verification = rsp => verification,
}; };
@ -378,8 +414,8 @@ where
verification.map_err(|err| { verification.map_err(|err| {
match err.downcast::<zebra_consensus::chain::VerifyChainError>() { match err.downcast::<zebra_consensus::chain::VerifyChainError>() {
Ok(e) => BlockDownloadVerifyError::Invalid(*e), Ok(error) => BlockDownloadVerifyError::Invalid { error: *error, height: block_height, hash },
Err(e) => BlockDownloadVerifyError::VerifierError(e), Err(error) => BlockDownloadVerifyError::ValidationRequestError { error, height: block_height, hash },
} }
}) })
} }