fix(hermes): reconnect on wh connection termination (#1488)
* fix(hermes): reconnect on wh connection termination `tokio::select` disables the branch that runs the wh connection if it returns OK and it never gets checked again. This change changes the `run` return to never return OK. * refactor(hermes): use Result<!> in pythnet network listener thread
This commit is contained in:
parent
1b13bf651a
commit
f9292177e9
|
@ -1796,7 +1796,7 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "hermes"
|
name = "hermes"
|
||||||
version = "0.5.5"
|
version = "0.5.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anyhow",
|
"anyhow",
|
||||||
"async-trait",
|
"async-trait",
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "hermes"
|
name = "hermes"
|
||||||
version = "0.5.5"
|
version = "0.5.6"
|
||||||
description = "Hermes is an agent that provides Verified Prices from the Pythnet Pyth Oracle."
|
description = "Hermes is an agent that provides Verified Prices from the Pythnet Pyth Oracle."
|
||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
|
|
|
@ -139,7 +139,7 @@ async fn fetch_bridge_data(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn run(store: Arc<State>, pythnet_ws_endpoint: String) -> Result<()> {
|
pub async fn run(store: Arc<State>, pythnet_ws_endpoint: String) -> Result<!> {
|
||||||
let client = PubsubClient::new(pythnet_ws_endpoint.as_ref()).await?;
|
let client = PubsubClient::new(pythnet_ws_endpoint.as_ref()).await?;
|
||||||
|
|
||||||
let config = RpcProgramAccountsConfig {
|
let config = RpcProgramAccountsConfig {
|
||||||
|
@ -160,59 +160,54 @@ pub async fn run(store: Arc<State>, pythnet_ws_endpoint: String) -> Result<()> {
|
||||||
.program_subscribe(&system_program::id(), Some(config))
|
.program_subscribe(&system_program::id(), Some(config))
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
loop {
|
while let Some(update) = notif.next().await {
|
||||||
match notif.next().await {
|
let account: Account = match update.value.account.decode() {
|
||||||
Some(update) => {
|
Some(account) => account,
|
||||||
let account: Account = match update.value.account.decode() {
|
|
||||||
Some(account) => account,
|
|
||||||
None => {
|
|
||||||
tracing::error!(?update, "Failed to decode account from update.");
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let accumulator_messages = AccumulatorMessages::try_from_slice(&account.data);
|
|
||||||
match accumulator_messages {
|
|
||||||
Ok(accumulator_messages) => {
|
|
||||||
let (candidate, _) = Pubkey::find_program_address(
|
|
||||||
&[
|
|
||||||
b"AccumulatorState",
|
|
||||||
&accumulator_messages.ring_index().to_be_bytes(),
|
|
||||||
],
|
|
||||||
&system_program::id(),
|
|
||||||
);
|
|
||||||
|
|
||||||
if candidate.to_string() == update.value.pubkey {
|
|
||||||
let store = store.clone();
|
|
||||||
tokio::spawn(async move {
|
|
||||||
if let Err(err) = Aggregates::store_update(
|
|
||||||
&*store,
|
|
||||||
Update::AccumulatorMessages(accumulator_messages),
|
|
||||||
)
|
|
||||||
.await
|
|
||||||
{
|
|
||||||
tracing::error!(error = ?err, "Failed to store accumulator messages.");
|
|
||||||
}
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
tracing::error!(
|
|
||||||
?candidate,
|
|
||||||
?update.value.pubkey,
|
|
||||||
"Failed to verify message public keys.",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Err(err) => {
|
|
||||||
tracing::error!(error = ?err, "Failed to parse AccumulatorMessages.");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
None => {
|
None => {
|
||||||
return Err(anyhow!("Pythnet network listener terminated"));
|
tracing::error!(?update, "Failed to decode account from update.");
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
||||||
|
let accumulator_messages = AccumulatorMessages::try_from_slice(&account.data);
|
||||||
|
match accumulator_messages {
|
||||||
|
Ok(accumulator_messages) => {
|
||||||
|
let (candidate, _) = Pubkey::find_program_address(
|
||||||
|
&[
|
||||||
|
b"AccumulatorState",
|
||||||
|
&accumulator_messages.ring_index().to_be_bytes(),
|
||||||
|
],
|
||||||
|
&system_program::id(),
|
||||||
|
);
|
||||||
|
|
||||||
|
if candidate.to_string() == update.value.pubkey {
|
||||||
|
let store = store.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
if let Err(err) = Aggregates::store_update(
|
||||||
|
&*store,
|
||||||
|
Update::AccumulatorMessages(accumulator_messages),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
tracing::error!(error = ?err, "Failed to store accumulator messages.");
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
tracing::error!(
|
||||||
|
?candidate,
|
||||||
|
?update.value.pubkey,
|
||||||
|
"Failed to verify message public keys.",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(err) => {
|
||||||
|
tracing::error!(error = ?err, "Failed to parse AccumulatorMessages.");
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Err(anyhow!("Pythnet network listener connection terminated"))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Fetch existing GuardianSet accounts from Wormhole.
|
/// Fetch existing GuardianSet accounts from Wormhole.
|
||||||
|
|
|
@ -49,7 +49,11 @@ use {
|
||||||
Digest,
|
Digest,
|
||||||
Keccak256,
|
Keccak256,
|
||||||
},
|
},
|
||||||
std::sync::Arc,
|
std::{
|
||||||
|
sync::Arc,
|
||||||
|
time::Duration,
|
||||||
|
},
|
||||||
|
tokio::time::Instant,
|
||||||
tonic::Request,
|
tonic::Request,
|
||||||
wormhole_sdk::{
|
wormhole_sdk::{
|
||||||
vaa::{
|
vaa::{
|
||||||
|
@ -158,10 +162,16 @@ mod proto {
|
||||||
pub async fn spawn(opts: RunOptions, state: Arc<State>) -> Result<()> {
|
pub async fn spawn(opts: RunOptions, state: Arc<State>) -> Result<()> {
|
||||||
let mut exit = crate::EXIT.subscribe();
|
let mut exit = crate::EXIT.subscribe();
|
||||||
loop {
|
loop {
|
||||||
|
let current_time = Instant::now();
|
||||||
tokio::select! {
|
tokio::select! {
|
||||||
_ = exit.changed() => break,
|
_ = exit.changed() => break,
|
||||||
Err(err) = run(opts.clone(), state.clone()) => {
|
Err(err) = run(opts.clone(), state.clone()) => {
|
||||||
tracing::error!(error = ?err, "Wormhole gRPC service failed.");
|
tracing::error!(error = ?err, "Wormhole gRPC service failed.");
|
||||||
|
|
||||||
|
if current_time.elapsed() < Duration::from_secs(30) {
|
||||||
|
tracing::error!("Wormhole listener restarting too quickly. Sleep 1s.");
|
||||||
|
tokio::time::sleep(Duration::from_secs(1)).await;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -170,7 +180,7 @@ pub async fn spawn(opts: RunOptions, state: Arc<State>) -> Result<()> {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip(opts, state))]
|
#[tracing::instrument(skip(opts, state))]
|
||||||
async fn run(opts: RunOptions, state: Arc<State>) -> Result<()> {
|
async fn run(opts: RunOptions, state: Arc<State>) -> Result<!> {
|
||||||
let mut client = SpyRpcServiceClient::connect(opts.wormhole.spy_rpc_addr).await?;
|
let mut client = SpyRpcServiceClient::connect(opts.wormhole.spy_rpc_addr).await?;
|
||||||
let mut stream = client
|
let mut stream = client
|
||||||
.subscribe_signed_vaa(Request::new(SubscribeSignedVaaRequest {
|
.subscribe_signed_vaa(Request::new(SubscribeSignedVaaRequest {
|
||||||
|
@ -190,7 +200,7 @@ async fn run(opts: RunOptions, state: Arc<State>) -> Result<()> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Err(anyhow!("Wormhole gRPC stream terminated."))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process a message received via a Wormhole gRPC connection.
|
/// Process a message received via a Wormhole gRPC connection.
|
||||||
|
|
Loading…
Reference in New Issue