fix(network): Reconnect with peers after brief network interruption (#7853)

* Fixes bug where Zebra won't reconnect to peers after brief loss of network connectivity

* only dial on timercrawl when theres a new address or zero active outbound conns
This commit is contained in:
Arya 2023-10-27 02:13:16 -04:00 committed by GitHub
parent 0a3790b73e
commit 5367ccbc5c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 4 deletions

View File

@ -896,7 +896,7 @@ where
// There weren't any peers, so try to get more peers.
debug!("demand for peers but no available candidates");
crawl(candidates, demand_tx).await?;
crawl(candidates, demand_tx, false).await?;
Ok(DemandCrawlFinished)
}
@ -910,6 +910,7 @@ where
Ok(TimerCrawl { tick }) => {
let candidates = candidates.clone();
let demand_tx = demand_tx.clone();
let should_always_dial = active_outbound_connections.update_count() == 0;
let crawl_handle = tokio::spawn(
async move {
@ -918,7 +919,7 @@ where
"crawling for more peers in response to the crawl timer"
);
crawl(candidates, demand_tx).await?;
crawl(candidates, demand_tx, should_always_dial).await?;
Ok(TimerCrawlFinished)
}
@ -957,11 +958,12 @@ where
}
/// Try to get more peers using `candidates`, then queue a connection attempt using `demand_tx`.
/// If there were no new peers, the connection attempt is skipped.
/// If there were no new peers and `should_always_dial` is false, the connection attempt is skipped.
#[instrument(skip(candidates, demand_tx))]
async fn crawl<S>(
candidates: Arc<futures::lock::Mutex<CandidateSet<S>>>,
mut demand_tx: futures::channel::mpsc::Sender<MorePeers>,
should_always_dial: bool,
) -> Result<(), BoxError>
where
S: Service<Request, Response = Response, Error = BoxError> + Send + Sync + 'static,
@ -976,7 +978,7 @@ where
result
};
let more_peers = match result {
Ok(more_peers) => more_peers,
Ok(more_peers) => more_peers.or_else(|| should_always_dial.then_some(MorePeers)),
Err(e) => {
info!(
?e,