Serge/liquidator split tcs and liquidation (#914)

liquidator: split TCS triggering and liquidation job

Concurrent execution of candidate lookup and tx building/sending
- Also added an health assertion IX to protect liqor in multi liquidation scenario
- And a timeout for jupiter v6 queries (avoid blocking liquidation because of slow TCS)
This commit is contained in:
Serge Farny 2024-03-20 15:25:52 +01:00 committed by GitHub
parent 769f940a66
commit f54bb6f0b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 1046 additions and 393 deletions

1
Cargo.lock generated
View File

@ -3540,6 +3540,7 @@ dependencies = [
"futures-core", "futures-core",
"futures-util", "futures-util",
"hdrhistogram", "hdrhistogram",
"indexmap 2.0.0",
"itertools", "itertools",
"jemallocator", "jemallocator",
"jsonrpc-core 18.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "jsonrpc-core 18.0.0 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -49,4 +49,5 @@ tokio-stream = { version = "0.1.9"}
tokio-tungstenite = "0.16.1" tokio-tungstenite = "0.16.1"
tracing = "0.1" tracing = "0.1"
regex = "1.9.5" regex = "1.9.5"
hdrhistogram = "7.5.4" hdrhistogram = "7.5.4"
indexmap = "2.0.0"

View File

@ -136,6 +136,12 @@ pub struct Cli {
#[clap(long, env, value_enum, default_value = "true")] #[clap(long, env, value_enum, default_value = "true")]
pub(crate) take_tcs: BoolArg, pub(crate) take_tcs: BoolArg,
#[clap(long, env, default_value = "30")]
pub(crate) tcs_refresh_timeout_secs: u64,
#[clap(long, env, default_value = "1000")]
pub(crate) tcs_check_interval_ms: u64,
/// profit margin at which to take tcs orders /// profit margin at which to take tcs orders
#[clap(long, env, default_value = "0.0005")] #[clap(long, env, default_value = "0.0005")]
pub(crate) tcs_profit_fraction: f64, pub(crate) tcs_profit_fraction: f64,
@ -178,6 +184,10 @@ pub struct Cli {
#[clap(long, env, default_value = "https://quote-api.jup.ag/v6")] #[clap(long, env, default_value = "https://quote-api.jup.ag/v6")]
pub(crate) jupiter_v6_url: String, pub(crate) jupiter_v6_url: String,
/// override the jupiter http request timeout
#[clap(long, env, default_value = "30")]
pub(crate) jupiter_timeout_secs: u64,
/// provide a jupiter token, currently only for jup v6 /// provide a jupiter token, currently only for jup v6
#[clap(long, env, default_value = "")] #[clap(long, env, default_value = "")]
pub(crate) jupiter_token: String, pub(crate) jupiter_token: String,
@ -191,6 +201,12 @@ pub struct Cli {
#[clap(long, env, value_enum, default_value = "true")] #[clap(long, env, value_enum, default_value = "true")]
pub(crate) telemetry: BoolArg, pub(crate) telemetry: BoolArg,
/// if liquidation is enabled
///
/// might be used to run an instance of liquidator dedicated to TCS and another one for liquidation
#[clap(long, env, value_enum, default_value = "true")]
pub(crate) liquidation_enabled: BoolArg,
/// liquidation refresh timeout in secs /// liquidation refresh timeout in secs
#[clap(long, env, default_value = "30")] #[clap(long, env, default_value = "30")]
pub(crate) liquidation_refresh_timeout_secs: u8, pub(crate) liquidation_refresh_timeout_secs: u8,
@ -216,4 +232,8 @@ pub struct Cli {
/// how long should it wait before logging an oracle error again (for the same token) /// how long should it wait before logging an oracle error again (for the same token)
#[clap(long, env, default_value = "30")] #[clap(long, env, default_value = "30")]
pub(crate) skip_oracle_error_in_logs_duration_secs: u64, pub(crate) skip_oracle_error_in_logs_duration_secs: u64,
/// max number of liquidation/tcs to do concurrently
#[clap(long, env, default_value = "5")]
pub(crate) max_parallel_operations: u64,
} }

View File

@ -9,6 +9,7 @@ use mango_v4_client::{chain_data, MangoClient, PreparedInstructions};
use solana_sdk::signature::Signature; use solana_sdk::signature::Signature;
use futures::{stream, StreamExt, TryStreamExt}; use futures::{stream, StreamExt, TryStreamExt};
use mango_v4::accounts_ix::HealthCheckKind::MaintRatio;
use rand::seq::SliceRandom; use rand::seq::SliceRandom;
use tracing::*; use tracing::*;
use {anyhow::Context, fixed::types::I80F48, solana_sdk::pubkey::Pubkey}; use {anyhow::Context, fixed::types::I80F48, solana_sdk::pubkey::Pubkey};
@ -260,7 +261,22 @@ impl<'a> LiquidateHelper<'a> {
) )
.await .await
.context("creating perp_liq_base_or_positive_pnl_instruction")?; .context("creating perp_liq_base_or_positive_pnl_instruction")?;
liq_ixs.cu = liq_ixs.cu.max(self.config.compute_limit_for_liq_ix); liq_ixs.cu = liq_ixs.cu.max(self.config.compute_limit_for_liq_ix);
let liqor = &self.client.mango_account().await?;
liq_ixs.append(
self.client
.health_check_instruction(
liqor,
self.config.min_health_ratio,
vec![],
vec![*perp_market_index],
MaintRatio,
)
.await?,
);
let txsig = self let txsig = self
.client .client
.send_and_confirm_owner_tx(liq_ixs.to_instructions()) .send_and_confirm_owner_tx(liq_ixs.to_instructions())
@ -501,6 +517,20 @@ impl<'a> LiquidateHelper<'a> {
.await .await
.context("creating liq_token_with_token ix")?; .context("creating liq_token_with_token ix")?;
liq_ixs.cu = liq_ixs.cu.max(self.config.compute_limit_for_liq_ix); liq_ixs.cu = liq_ixs.cu.max(self.config.compute_limit_for_liq_ix);
let liqor = self.client.mango_account().await?;
liq_ixs.append(
self.client
.health_check_instruction(
&liqor,
self.config.min_health_ratio,
vec![asset_token_index, liab_token_index],
vec![],
MaintRatio,
)
.await?,
);
let txsig = self let txsig = self
.client .client
.send_and_confirm_owner_tx(liq_ixs.to_instructions()) .send_and_confirm_owner_tx(liq_ixs.to_instructions())
@ -651,14 +681,11 @@ impl<'a> LiquidateHelper<'a> {
} }
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub async fn maybe_liquidate_account( pub async fn can_liquidate_account(
mango_client: &MangoClient, mango_client: &MangoClient,
account_fetcher: &chain_data::AccountFetcher, account_fetcher: &chain_data::AccountFetcher,
pubkey: &Pubkey, pubkey: &Pubkey,
config: &Config,
) -> anyhow::Result<bool> { ) -> anyhow::Result<bool> {
let liqor_min_health_ratio = I80F48::from_num(config.min_health_ratio);
let account = account_fetcher.fetch_mango_account(pubkey)?; let account = account_fetcher.fetch_mango_account(pubkey)?;
let health_cache = mango_client let health_cache = mango_client
.health_cache(&account) .health_cache(&account)
@ -675,6 +702,18 @@ pub async fn maybe_liquidate_account(
"possible candidate", "possible candidate",
); );
Ok(true)
}
#[allow(clippy::too_many_arguments)]
pub async fn maybe_liquidate_account(
mango_client: &MangoClient,
account_fetcher: &chain_data::AccountFetcher,
pubkey: &Pubkey,
config: &Config,
) -> anyhow::Result<bool> {
let liqor_min_health_ratio = I80F48::from_num(config.min_health_ratio);
// Fetch a fresh account and re-compute // Fetch a fresh account and re-compute
// This is -- unfortunately -- needed because the websocket streams seem to not // This is -- unfortunately -- needed because the websocket streams seem to not
// be great at providing timely updates to the account data. // be great at providing timely updates to the account data.

View File

@ -0,0 +1,238 @@
use crate::cli_args::Cli;
use crate::metrics::Metrics;
use crate::unwrappable_oracle_error::UnwrappableOracleError;
use crate::{liquidate, LiqErrorType, SharedState};
use anchor_lang::prelude::Pubkey;
use itertools::Itertools;
use mango_v4::state::TokenIndex;
use mango_v4_client::error_tracking::ErrorTracking;
use mango_v4_client::{chain_data, MangoClient, MangoClientError};
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant};
use tokio::task::JoinHandle;
use tracing::{error, trace, warn};
#[derive(Clone)]
pub struct LiquidationState {
pub mango_client: Arc<MangoClient>,
pub account_fetcher: Arc<chain_data::AccountFetcher>,
pub liquidation_config: liquidate::Config,
pub errors: Arc<RwLock<ErrorTracking<Pubkey, LiqErrorType>>>,
pub oracle_errors: Arc<RwLock<ErrorTracking<TokenIndex, LiqErrorType>>>,
}
impl LiquidationState {
async fn find_candidates(
&mut self,
accounts_iter: impl Iterator<Item = &Pubkey>,
action: impl Fn(Pubkey) -> anyhow::Result<()>,
) -> anyhow::Result<u64> {
let mut found_counter = 0u64;
use rand::seq::SliceRandom;
let mut accounts = accounts_iter.collect::<Vec<&Pubkey>>();
{
let mut rng = rand::thread_rng();
accounts.shuffle(&mut rng);
}
for pubkey in accounts {
if self.should_skip_execution(pubkey) {
continue;
}
let result =
liquidate::can_liquidate_account(&self.mango_client, &self.account_fetcher, pubkey)
.await;
self.log_or_ignore_error(&result, pubkey);
if result.unwrap_or(false) {
action(*pubkey)?;
found_counter = found_counter + 1;
}
}
Ok(found_counter)
}
fn should_skip_execution(&mut self, pubkey: &Pubkey) -> bool {
let now = Instant::now();
let error_tracking = &mut self.errors;
// Skip a pubkey if there've been too many errors recently
if let Some(error_entry) =
error_tracking
.read()
.unwrap()
.had_too_many_errors(LiqErrorType::Liq, pubkey, now)
{
trace!(
%pubkey,
error_entry.count,
"skip checking account for liquidation, had errors recently",
);
return true;
}
false
}
fn log_or_ignore_error<T>(&mut self, result: &anyhow::Result<T>, pubkey: &Pubkey) {
let error_tracking = &mut self.errors;
if let Err(err) = result.as_ref() {
if let Some((ti, ti_name)) = err.try_unwrap_oracle_error() {
if self
.oracle_errors
.read()
.unwrap()
.had_too_many_errors(LiqErrorType::Liq, &ti, Instant::now())
.is_none()
{
warn!(
"{:?} recording oracle error for token {} {}",
chrono::offset::Utc::now(),
ti_name,
ti
);
}
self.oracle_errors
.write()
.unwrap()
.record(LiqErrorType::Liq, &ti, err.to_string());
return;
}
// Keep track of pubkeys that had errors
error_tracking
.write()
.unwrap()
.record(LiqErrorType::Liq, pubkey, err.to_string());
// Not all errors need to be raised to the user's attention.
let mut is_error = true;
// Simulation errors due to liqee precondition failures on the liquidation instructions
// will commonly happen if our liquidator is late or if there are chain forks.
match err.downcast_ref::<MangoClientError>() {
Some(MangoClientError::SendTransactionPreflightFailure { logs, .. }) => {
if logs.iter().any(|line| {
line.contains("HealthMustBeNegative") || line.contains("IsNotBankrupt")
}) {
is_error = false;
}
}
_ => {}
};
if is_error {
error!("liquidating account {}: {:?}", pubkey, err);
} else {
trace!("liquidating account {}: {:?}", pubkey, err);
}
} else {
error_tracking
.write()
.unwrap()
.clear(LiqErrorType::Liq, pubkey);
}
}
pub async fn maybe_liquidate_and_log_error(&mut self, pubkey: &Pubkey) -> anyhow::Result<bool> {
if self.should_skip_execution(pubkey) {
return Ok(false);
}
let result = liquidate::maybe_liquidate_account(
&self.mango_client,
&self.account_fetcher,
pubkey,
&self.liquidation_config,
)
.await;
self.log_or_ignore_error(&result, pubkey);
return result;
}
}
pub fn spawn_liquidation_job(
cli: &Cli,
shared_state: &Arc<RwLock<SharedState>>,
tx_trigger_sender: async_channel::Sender<()>,
mut liquidation: Box<LiquidationState>,
metrics: &Metrics,
) -> JoinHandle<()> {
tokio::spawn({
let mut interval =
mango_v4_client::delay_interval(Duration::from_millis(cli.check_interval_ms));
let mut metric_liquidation_check = metrics.register_latency("liquidation_check".into());
let mut metric_liquidation_start_end =
metrics.register_latency("liquidation_start_end".into());
let mut liquidation_start_time = None;
let shared_state = shared_state.clone();
async move {
loop {
interval.tick().await;
let account_addresses = {
let mut state = shared_state.write().unwrap();
if !state.one_snapshot_done {
// discard first latency info as it will skew data too much
state.oldest_chain_event_reception_time = None;
continue;
}
if state.oldest_chain_event_reception_time.is_none()
&& liquidation_start_time.is_none()
{
// no new update, skip computing
continue;
}
state.mango_accounts.iter().cloned().collect_vec()
};
liquidation.errors.write().unwrap().update();
liquidation.oracle_errors.write().unwrap().update();
if liquidation_start_time.is_none() {
liquidation_start_time = Some(Instant::now());
}
let found_candidates = liquidation
.find_candidates(account_addresses.iter(), |p| {
if shared_state
.write()
.unwrap()
.liquidation_candidates_accounts
.insert(p)
{
tx_trigger_sender.try_send(())?;
}
Ok(())
})
.await
.unwrap();
if found_candidates > 0 {
tracing::debug!("found {} candidates for liquidation", found_candidates);
}
let mut state = shared_state.write().unwrap();
let reception_time = state.oldest_chain_event_reception_time.unwrap();
let current_time = Instant::now();
state.oldest_chain_event_reception_time = None;
metric_liquidation_check.push(current_time - reception_time);
metric_liquidation_start_end.push(current_time - liquidation_start_time.unwrap());
liquidation_start_time = None;
}
}
})
}

View File

@ -4,33 +4,40 @@ use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use anchor_client::Cluster; use anchor_client::Cluster;
use anyhow::Context;
use clap::Parser; use clap::Parser;
use futures_util::StreamExt;
use mango_v4::state::{PerpMarketIndex, TokenIndex}; use mango_v4::state::{PerpMarketIndex, TokenIndex};
use mango_v4_client::AsyncChannelSendUnlessFull;
use mango_v4_client::{ use mango_v4_client::{
account_update_stream, chain_data, error_tracking::ErrorTracking, keypair_from_cli, account_update_stream, chain_data, error_tracking::ErrorTracking, keypair_from_cli,
snapshot_source, websocket_source, Client, MangoClient, MangoClientError, MangoGroupContext, snapshot_source, websocket_source, Client, MangoClient, MangoGroupContext,
TransactionBuilderConfig, TransactionBuilderConfig,
}; };
use crate::cli_args::{BoolArg, Cli, CliDotenv};
use crate::liquidation_state::LiquidationState;
use crate::rebalance::Rebalancer;
use crate::tcs_state::TcsState;
use crate::token_swap_info::TokenSwapInfoUpdater;
use itertools::Itertools; use itertools::Itertools;
use solana_sdk::commitment_config::CommitmentConfig; use solana_sdk::commitment_config::CommitmentConfig;
use solana_sdk::pubkey::Pubkey; use solana_sdk::pubkey::Pubkey;
use solana_sdk::signer::Signer; use solana_sdk::signer::Signer;
use tokio::task::JoinHandle;
use tracing::*; use tracing::*;
pub mod cli_args; pub mod cli_args;
pub mod liquidate; pub mod liquidate;
mod liquidation_state;
pub mod metrics; pub mod metrics;
pub mod rebalance; pub mod rebalance;
mod tcs_state;
pub mod telemetry; pub mod telemetry;
pub mod token_swap_info; pub mod token_swap_info;
pub mod trigger_tcs; pub mod trigger_tcs;
mod tx_sender;
mod unwrappable_oracle_error; mod unwrappable_oracle_error;
pub mod util; pub mod util;
use crate::unwrappable_oracle_error::UnwrappableOracleError;
use crate::util::{is_mango_account, is_mint_info, is_perp_market}; use crate::util::{is_mango_account, is_mint_info, is_perp_market};
// jemalloc seems to be better at keeping the memory footprint reasonable over // jemalloc seems to be better at keeping the memory footprint reasonable over
@ -69,7 +76,7 @@ async fn main() -> anyhow::Result<()> {
// Client setup // Client setup
// //
let liqor_owner = Arc::new(keypair_from_cli(&cli.liqor_owner)); let liqor_owner = Arc::new(keypair_from_cli(&cli.liqor_owner));
let rpc_url = cli.rpc_url; let rpc_url = cli.rpc_url.clone();
let ws_url = rpc_url.replace("https", "wss"); let ws_url = rpc_url.replace("https", "wss");
let rpc_timeout = Duration::from_secs(10); let rpc_timeout = Duration::from_secs(10);
let cluster = Cluster::Custom(rpc_url.clone(), ws_url.clone()); let cluster = Cluster::Custom(rpc_url.clone(), ws_url.clone());
@ -79,8 +86,9 @@ async fn main() -> anyhow::Result<()> {
.commitment(commitment) .commitment(commitment)
.fee_payer(Some(liqor_owner.clone())) .fee_payer(Some(liqor_owner.clone()))
.timeout(rpc_timeout) .timeout(rpc_timeout)
.jupiter_v6_url(cli.jupiter_v6_url) .jupiter_timeout(Duration::from_secs(cli.jupiter_timeout_secs))
.jupiter_token(cli.jupiter_token) .jupiter_v6_url(cli.jupiter_v6_url.clone())
.jupiter_token(cli.jupiter_token.clone())
.transaction_builder_config( .transaction_builder_config(
TransactionBuilderConfig::builder() TransactionBuilderConfig::builder()
.priority_fee_provider(prio_provider) .priority_fee_provider(prio_provider)
@ -89,7 +97,7 @@ async fn main() -> anyhow::Result<()> {
.build() .build()
.unwrap(), .unwrap(),
) )
.override_send_transaction_urls(cli.override_send_transaction_url) .override_send_transaction_urls(cli.override_send_transaction_url.clone())
.build() .build()
.unwrap(); .unwrap();
@ -207,17 +215,18 @@ async fn main() -> anyhow::Result<()> {
compute_limit_for_liq_ix: cli.compute_limit_for_liquidation, compute_limit_for_liq_ix: cli.compute_limit_for_liquidation,
max_cu_per_transaction: 1_000_000, max_cu_per_transaction: 1_000_000,
refresh_timeout: Duration::from_secs(cli.liquidation_refresh_timeout_secs as u64), refresh_timeout: Duration::from_secs(cli.liquidation_refresh_timeout_secs as u64),
only_allowed_tokens: cli_args::cli_to_hashset::<TokenIndex>(cli.only_allow_tokens), only_allowed_tokens: cli_args::cli_to_hashset::<TokenIndex>(cli.only_allow_tokens.clone()),
forbidden_tokens: cli_args::cli_to_hashset::<TokenIndex>(cli.forbidden_tokens), forbidden_tokens: cli_args::cli_to_hashset::<TokenIndex>(cli.forbidden_tokens.clone()),
only_allowed_perp_markets: cli_args::cli_to_hashset::<PerpMarketIndex>( only_allowed_perp_markets: cli_args::cli_to_hashset::<PerpMarketIndex>(
cli.liquidation_only_allow_perp_markets, cli.liquidation_only_allow_perp_markets.clone(),
), ),
forbidden_perp_markets: cli_args::cli_to_hashset::<PerpMarketIndex>( forbidden_perp_markets: cli_args::cli_to_hashset::<PerpMarketIndex>(
cli.liquidation_forbidden_perp_markets, cli.liquidation_forbidden_perp_markets.clone(),
), ),
}; };
let tcs_config = trigger_tcs::Config { let tcs_config = trigger_tcs::Config {
refresh_timeout: Duration::from_secs(cli.tcs_refresh_timeout_secs),
min_health_ratio: cli.min_health_ratio, min_health_ratio: cli.min_health_ratio,
max_trigger_quote_amount: (cli.tcs_max_trigger_amount * 1e6) as u64, max_trigger_quote_amount: (cli.tcs_max_trigger_amount * 1e6) as u64,
compute_limit_for_trigger: cli.compute_limit_for_tcs, compute_limit_for_trigger: cli.compute_limit_for_tcs,
@ -234,17 +243,19 @@ async fn main() -> anyhow::Result<()> {
forbidden_tokens: liq_config.forbidden_tokens.clone(), forbidden_tokens: liq_config.forbidden_tokens.clone(),
}; };
let mut rebalance_interval = tokio::time::interval(Duration::from_secs(30));
let (rebalance_trigger_sender, rebalance_trigger_receiver) = async_channel::bounded::<()>(1); let (rebalance_trigger_sender, rebalance_trigger_receiver) = async_channel::bounded::<()>(1);
let (tx_tcs_trigger_sender, tx_tcs_trigger_receiver) = async_channel::unbounded::<()>();
let (tx_liq_trigger_sender, tx_liq_trigger_receiver) = async_channel::unbounded::<()>();
let rebalance_config = rebalance::Config { let rebalance_config = rebalance::Config {
enabled: cli.rebalance == BoolArg::True, enabled: cli.rebalance == BoolArg::True,
slippage_bps: cli.rebalance_slippage_bps, slippage_bps: cli.rebalance_slippage_bps,
borrow_settle_excess: (1f64 + cli.rebalance_borrow_settle_excess).max(1f64), borrow_settle_excess: (1f64 + cli.rebalance_borrow_settle_excess).max(1f64),
refresh_timeout: Duration::from_secs(cli.rebalance_refresh_timeout_secs), refresh_timeout: Duration::from_secs(cli.rebalance_refresh_timeout_secs),
jupiter_version: cli.jupiter_version.into(), jupiter_version: cli.jupiter_version.into(),
skip_tokens: cli.rebalance_skip_tokens.unwrap_or_default(), skip_tokens: cli.rebalance_skip_tokens.clone().unwrap_or(Vec::new()),
alternate_jupiter_route_tokens: cli alternate_jupiter_route_tokens: cli
.rebalance_alternate_jupiter_route_tokens .rebalance_alternate_jupiter_route_tokens
.clone()
.unwrap_or_default(), .unwrap_or_default(),
allow_withdraws: signer_is_owner, allow_withdraws: signer_is_owner,
}; };
@ -257,23 +268,39 @@ async fn main() -> anyhow::Result<()> {
config: rebalance_config, config: rebalance_config,
}); });
let mut liquidation = Box::new(LiquidationState { let liquidation = Box::new(LiquidationState {
mango_client: mango_client.clone(),
account_fetcher: account_fetcher.clone(),
liquidation_config: liq_config,
errors: Arc::new(RwLock::new(
ErrorTracking::builder()
.skip_threshold(2)
.skip_threshold_for_type(LiqErrorType::Liq, 5)
.skip_duration(Duration::from_secs(120))
.build()?,
)),
oracle_errors: Arc::new(RwLock::new(
ErrorTracking::builder()
.skip_threshold(1)
.skip_duration(Duration::from_secs(
cli.skip_oracle_error_in_logs_duration_secs,
))
.build()?,
)),
});
let tcs = Box::new(TcsState {
mango_client: mango_client.clone(), mango_client: mango_client.clone(),
account_fetcher, account_fetcher,
liquidation_config: liq_config,
trigger_tcs_config: tcs_config, trigger_tcs_config: tcs_config,
token_swap_info: token_swap_info_updater.clone(), token_swap_info: token_swap_info_updater.clone(),
errors: ErrorTracking::builder() errors: Arc::new(RwLock::new(
.skip_threshold(2) ErrorTracking::builder()
.skip_threshold_for_type(LiqErrorType::Liq, 5) .skip_threshold(2)
.skip_duration(Duration::from_secs(120)) .skip_threshold_for_type(LiqErrorType::Liq, 5)
.build()?, .skip_duration(Duration::from_secs(120))
oracle_errors: ErrorTracking::builder() .build()?,
.skip_threshold(1) )),
.skip_duration(Duration::from_secs(
cli.skip_oracle_error_in_logs_duration_secs,
))
.build()?,
}); });
info!("main loop"); info!("main loop");
@ -374,126 +401,83 @@ async fn main() -> anyhow::Result<()> {
} }
}); });
let mut optional_jobs = vec![];
// Could be refactored to only start the below jobs when the first snapshot is done. // Could be refactored to only start the below jobs when the first snapshot is done.
// But need to take care to abort if the above job aborts beforehand. // But need to take care to abort if the above job aborts beforehand.
if cli.rebalance == BoolArg::True {
let rebalance_job =
spawn_rebalance_job(&shared_state, rebalance_trigger_receiver, rebalancer);
optional_jobs.push(rebalance_job);
}
let rebalance_job = tokio::spawn({ if cli.liquidation_enabled == BoolArg::True {
let shared_state = shared_state.clone(); let liquidation_job = liquidation_state::spawn_liquidation_job(
async move { &cli,
loop { &shared_state,
tokio::select! { tx_liq_trigger_sender.clone(),
_ = rebalance_interval.tick() => {} liquidation.clone(),
_ = rebalance_trigger_receiver.recv() => {} &metrics,
} );
if !shared_state.read().unwrap().one_snapshot_done { optional_jobs.push(liquidation_job);
continue; }
}
if let Err(err) = rebalancer.zero_all_non_quote().await {
error!("failed to rebalance liqor: {:?}", err);
// Workaround: We really need a sequence enforcer in the liquidator since we don't want to if cli.take_tcs == BoolArg::True {
// accidentally send a similar tx again when we incorrectly believe an earlier one got forked let tcs_job = tcs_state::spawn_tcs_job(
// off. For now, hard sleep on error to avoid the most frequent error cases. &cli,
tokio::time::sleep(Duration::from_secs(10)).await; &shared_state,
} tx_tcs_trigger_sender.clone(),
} tcs.clone(),
} &metrics,
}); );
optional_jobs.push(tcs_job);
}
let liquidation_job = tokio::spawn({ if cli.liquidation_enabled == BoolArg::True || cli.take_tcs == BoolArg::True {
let mut interval = let mut tx_sender_jobs = tx_sender::spawn_tx_senders_job(
mango_v4_client::delay_interval(Duration::from_millis(cli.check_interval_ms)); cli.max_parallel_operations,
let mut metric_liquidation_check = metrics.register_latency("liquidation_check".into()); cli.liquidation_enabled == BoolArg::True,
let mut metric_liquidation_start_end = tx_liq_trigger_receiver,
metrics.register_latency("liquidation_start_end".into()); tx_tcs_trigger_receiver,
tx_tcs_trigger_sender,
rebalance_trigger_sender,
shared_state.clone(),
liquidation,
tcs,
);
optional_jobs.append(&mut tx_sender_jobs);
}
let mut liquidation_start_time = None; if cli.telemetry == BoolArg::True {
let mut tcs_start_time = None; optional_jobs.push(spawn_telemetry_job(&cli, mango_client.clone()));
}
let shared_state = shared_state.clone(); let token_swap_info_job =
async move { spawn_token_swap_refresh_job(&cli, shared_state, token_swap_info_updater);
loop { let check_changes_for_abort_job = spawn_context_change_watchdog_job(mango_client.clone());
interval.tick().await;
let account_addresses = { let mut jobs: futures::stream::FuturesUnordered<_> =
let mut state = shared_state.write().unwrap(); vec![data_job, token_swap_info_job, check_changes_for_abort_job]
if !state.one_snapshot_done { .into_iter()
// discard first latency info as it will skew data too much .chain(optional_jobs)
state.oldest_chain_event_reception_time = None; .chain(prio_jobs.into_iter())
continue; .collect();
} jobs.next().await;
if state.oldest_chain_event_reception_time.is_none()
&& liquidation_start_time.is_none()
{
// no new update, skip computing
continue;
}
state.mango_accounts.iter().cloned().collect_vec() error!("a critical job aborted, exiting");
}; Ok(())
}
liquidation.errors.update(); fn spawn_token_swap_refresh_job(
liquidation.oracle_errors.update(); cli: &Cli,
shared_state: Arc<RwLock<SharedState>>,
if liquidation_start_time.is_none() { token_swap_info_updater: Arc<TokenSwapInfoUpdater>,
liquidation_start_time = Some(Instant::now()); ) -> JoinHandle<()> {
} tokio::spawn({
let liquidated = liquidation
.maybe_liquidate_one(account_addresses.iter())
.await;
if !liquidated {
// This will be incorrect if we liquidate the last checked account
// (We will wait for next full run, skewing latency metrics)
// Probability is very low, might not need to be fixed
let mut state = shared_state.write().unwrap();
let reception_time = state.oldest_chain_event_reception_time.unwrap();
let current_time = Instant::now();
state.oldest_chain_event_reception_time = None;
metric_liquidation_check.push(current_time - reception_time);
metric_liquidation_start_end
.push(current_time - liquidation_start_time.unwrap());
liquidation_start_time = None;
}
let mut took_tcs = false;
if !liquidated && cli.take_tcs == BoolArg::True {
tcs_start_time = Some(tcs_start_time.unwrap_or(Instant::now()));
took_tcs = liquidation
.maybe_take_token_conditional_swap(account_addresses.iter())
.await
.unwrap_or_else(|err| {
error!("error during maybe_take_token_conditional_swap: {err}");
false
});
if !took_tcs {
let current_time = Instant::now();
let mut metric_tcs_start_end =
metrics.register_latency("tcs_start_end".into());
metric_tcs_start_end.push(current_time - tcs_start_time.unwrap());
tcs_start_time = None;
}
}
if liquidated || took_tcs {
rebalance_trigger_sender.send_unless_full(()).unwrap();
}
}
}
});
let token_swap_info_job = tokio::spawn({
let mut interval = mango_v4_client::delay_interval(Duration::from_secs( let mut interval = mango_v4_client::delay_interval(Duration::from_secs(
cli.token_swap_refresh_interval_secs, cli.token_swap_refresh_interval_secs,
)); ));
let mut startup_wait = mango_v4_client::delay_interval(Duration::from_secs(1)); let mut startup_wait = mango_v4_client::delay_interval(Duration::from_secs(1));
let shared_state = shared_state.clone();
async move { async move {
loop { loop {
if !shared_state.read().unwrap().one_snapshot_done { if !shared_state.read().unwrap().one_snapshot_done {
@ -517,41 +501,56 @@ async fn main() -> anyhow::Result<()> {
token_swap_info_updater.log_all(); token_swap_info_updater.log_all();
} }
} }
}); })
}
let check_changes_for_abort_job = fn spawn_context_change_watchdog_job(mango_client: Arc<MangoClient>) -> JoinHandle<()> {
tokio::spawn(MangoClient::loop_check_for_context_changes_and_abort( tokio::spawn(MangoClient::loop_check_for_context_changes_and_abort(
mango_client.clone(), mango_client,
Duration::from_secs(300), Duration::from_secs(300),
)); ))
}
if cli.telemetry == BoolArg::True { fn spawn_telemetry_job(cli: &Cli, mango_client: Arc<MangoClient>) -> JoinHandle<()> {
tokio::spawn(telemetry::report_regularly( tokio::spawn(telemetry::report_regularly(
mango_client, mango_client,
cli.min_health_ratio, cli.min_health_ratio,
)); ))
} }
use cli_args::{BoolArg, Cli, CliDotenv}; fn spawn_rebalance_job(
use futures::StreamExt; shared_state: &Arc<RwLock<SharedState>>,
let mut jobs: futures::stream::FuturesUnordered<_> = vec![ rebalance_trigger_receiver: async_channel::Receiver<()>,
data_job, rebalancer: Arc<Rebalancer>,
rebalance_job, ) -> JoinHandle<()> {
liquidation_job, let mut rebalance_interval = tokio::time::interval(Duration::from_secs(30));
token_swap_info_job,
check_changes_for_abort_job,
]
.into_iter()
.chain(prio_jobs.into_iter())
.collect();
jobs.next().await;
error!("a critical job aborted, exiting"); tokio::spawn({
Ok(()) let shared_state = shared_state.clone();
async move {
loop {
tokio::select! {
_ = rebalance_interval.tick() => {}
_ = rebalance_trigger_receiver.recv() => {}
}
if !shared_state.read().unwrap().one_snapshot_done {
continue;
}
if let Err(err) = rebalancer.zero_all_non_quote().await {
error!("failed to rebalance liqor: {:?}", err);
// Workaround: We really need a sequence enforcer in the liquidator since we don't want to
// accidentally send a similar tx again when we incorrectly believe an earlier one got forked
// off. For now, hard sleep on error to avoid the most frequent error cases.
tokio::time::sleep(Duration::from_secs(10)).await;
}
}
}
})
} }
#[derive(Default)] #[derive(Default)]
struct SharedState { pub struct SharedState {
/// Addresses of the MangoAccounts belonging to the mango program. /// Addresses of the MangoAccounts belonging to the mango program.
/// Needed to check health of them all when the cache updates. /// Needed to check health of them all when the cache updates.
mango_accounts: HashSet<Pubkey>, mango_accounts: HashSet<Pubkey>,
@ -561,6 +560,18 @@ struct SharedState {
/// Oldest chain event not processed yet /// Oldest chain event not processed yet
oldest_chain_event_reception_time: Option<Instant>, oldest_chain_event_reception_time: Option<Instant>,
/// Liquidation candidates (locally identified as liquidatable)
liquidation_candidates_accounts: indexmap::set::IndexSet<Pubkey>,
/// Interesting TCS that should be triggered
interesting_tcs: indexmap::set::IndexSet<(Pubkey, u64, u64)>,
/// Liquidation currently being processed by a worker
processing_liquidation: HashSet<Pubkey>,
// TCS currently being processed by a worker
processing_tcs: HashSet<(Pubkey, u64, u64)>,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
@ -584,218 +595,6 @@ impl std::fmt::Display for LiqErrorType {
} }
} }
struct LiquidationState {
mango_client: Arc<MangoClient>,
account_fetcher: Arc<chain_data::AccountFetcher>,
token_swap_info: Arc<token_swap_info::TokenSwapInfoUpdater>,
liquidation_config: liquidate::Config,
trigger_tcs_config: trigger_tcs::Config,
errors: ErrorTracking<Pubkey, LiqErrorType>,
oracle_errors: ErrorTracking<TokenIndex, LiqErrorType>,
}
impl LiquidationState {
async fn maybe_liquidate_one<'b>(
&mut self,
accounts_iter: impl Iterator<Item = &'b Pubkey>,
) -> bool {
use rand::seq::SliceRandom;
let mut accounts = accounts_iter.collect::<Vec<&Pubkey>>();
{
let mut rng = rand::thread_rng();
accounts.shuffle(&mut rng);
}
for pubkey in accounts {
if self
.maybe_liquidate_and_log_error(pubkey)
.await
.unwrap_or(false)
{
return true;
}
}
false
}
async fn maybe_liquidate_and_log_error(&mut self, pubkey: &Pubkey) -> anyhow::Result<bool> {
let now = Instant::now();
let error_tracking = &mut self.errors;
// Skip a pubkey if there've been too many errors recently
if let Some(error_entry) =
error_tracking.had_too_many_errors(LiqErrorType::Liq, pubkey, now)
{
trace!(
%pubkey,
error_entry.count,
"skip checking account for liquidation, had errors recently",
);
return Ok(false);
}
let result = liquidate::maybe_liquidate_account(
&self.mango_client,
&self.account_fetcher,
pubkey,
&self.liquidation_config,
)
.await;
if let Err(err) = result.as_ref() {
if let Some((ti, ti_name)) = err.try_unwrap_oracle_error() {
if self
.oracle_errors
.had_too_many_errors(LiqErrorType::Liq, &ti, Instant::now())
.is_none()
{
warn!(
"{:?} recording oracle error for token {} {}",
chrono::offset::Utc::now(),
ti_name,
ti
);
}
self.oracle_errors
.record(LiqErrorType::Liq, &ti, err.to_string());
return result;
}
// Keep track of pubkeys that had errors
error_tracking.record(LiqErrorType::Liq, pubkey, err.to_string());
// Not all errors need to be raised to the user's attention.
let mut is_error = true;
// Simulation errors due to liqee precondition failures on the liquidation instructions
// will commonly happen if our liquidator is late or if there are chain forks.
match err.downcast_ref::<MangoClientError>() {
Some(MangoClientError::SendTransactionPreflightFailure { logs, .. }) => {
if logs.iter().any(|line| {
line.contains("HealthMustBeNegative") || line.contains("IsNotBankrupt")
}) {
is_error = false;
}
}
_ => {}
};
if is_error {
error!("liquidating account {}: {:?}", pubkey, err);
} else {
trace!("liquidating account {}: {:?}", pubkey, err);
}
} else {
error_tracking.clear(LiqErrorType::Liq, pubkey);
}
result
}
async fn maybe_take_token_conditional_swap(
&mut self,
accounts_iter: impl Iterator<Item = &Pubkey>,
) -> anyhow::Result<bool> {
let accounts = accounts_iter.collect::<Vec<&Pubkey>>();
let now = Instant::now();
let now_ts: u64 = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)?
.as_secs();
let tcs_context = trigger_tcs::Context {
mango_client: self.mango_client.clone(),
account_fetcher: self.account_fetcher.clone(),
token_swap_info: self.token_swap_info.clone(),
config: self.trigger_tcs_config.clone(),
jupiter_quote_cache: Arc::new(trigger_tcs::JupiterQuoteCache::default()),
now_ts,
};
// Find interesting (pubkey, tcsid, volume)
let mut interesting_tcs = Vec::with_capacity(accounts.len());
for pubkey in accounts.iter() {
if let Some(error_entry) =
self.errors
.had_too_many_errors(LiqErrorType::TcsCollectionHard, pubkey, now)
{
trace!(
%pubkey,
error_entry.count,
"skip checking account for tcs, had errors recently",
);
continue;
}
match tcs_context.find_interesting_tcs_for_account(pubkey) {
Ok(v) => {
self.errors.clear(LiqErrorType::TcsCollectionHard, pubkey);
if v.is_empty() {
self.errors
.clear(LiqErrorType::TcsCollectionPartial, pubkey);
self.errors.clear(LiqErrorType::TcsExecution, pubkey);
} else if v.iter().all(|it| it.is_ok()) {
self.errors
.clear(LiqErrorType::TcsCollectionPartial, pubkey);
} else {
for it in v.iter() {
if let Err(e) = it {
self.errors.record(
LiqErrorType::TcsCollectionPartial,
pubkey,
e.to_string(),
);
}
}
}
interesting_tcs.extend(v.iter().filter_map(|it| it.as_ref().ok()));
}
Err(e) => {
self.errors
.record(LiqErrorType::TcsCollectionHard, pubkey, e.to_string());
}
}
}
if interesting_tcs.is_empty() {
return Ok(false);
}
let (txsigs, mut changed_pubkeys) = tcs_context
.execute_tcs(&mut interesting_tcs, &mut self.errors)
.await?;
for pubkey in changed_pubkeys.iter() {
self.errors.clear(LiqErrorType::TcsExecution, pubkey);
}
if txsigs.is_empty() {
return Ok(false);
}
changed_pubkeys.push(self.mango_client.mango_account_address);
// Force a refresh of affected accounts
let slot = self
.account_fetcher
.transaction_max_slot(&txsigs)
.await
.context("transaction_max_slot")?;
if let Err(e) = self
.account_fetcher
.refresh_accounts_via_rpc_until_slot(
&changed_pubkeys,
slot,
self.liquidation_config.refresh_timeout,
)
.await
{
info!(slot, "could not refresh after tcs execution: {}", e);
}
Ok(true)
}
}
fn start_chain_data_metrics(chain: Arc<RwLock<chain_data::ChainData>>, metrics: &metrics::Metrics) { fn start_chain_data_metrics(chain: Arc<RwLock<chain_data::ChainData>>, metrics: &metrics::Metrics) {
let mut interval = mango_v4_client::delay_interval(Duration::from_secs(600)); let mut interval = mango_v4_client::delay_interval(Duration::from_secs(600));

View File

@ -0,0 +1,218 @@
use crate::cli_args::Cli;
use crate::metrics::Metrics;
use crate::token_swap_info::TokenSwapInfoUpdater;
use crate::{trigger_tcs, LiqErrorType, SharedState};
use anchor_lang::prelude::Pubkey;
use anyhow::Context;
use itertools::Itertools;
use mango_v4_client::error_tracking::ErrorTracking;
use mango_v4_client::{chain_data, MangoClient};
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant};
use tokio::task::JoinHandle;
use tracing::{error, info, trace};
pub fn spawn_tcs_job(
cli: &Cli,
shared_state: &Arc<RwLock<SharedState>>,
tx_trigger_sender: async_channel::Sender<()>,
mut tcs: Box<TcsState>,
metrics: &Metrics,
) -> JoinHandle<()> {
tokio::spawn({
let mut interval =
mango_v4_client::delay_interval(Duration::from_millis(cli.tcs_check_interval_ms));
let mut tcs_start_time = None;
let mut metric_tcs_start_end = metrics.register_latency("tcs_start_end".into());
let shared_state = shared_state.clone();
async move {
loop {
interval.tick().await;
let account_addresses = {
let state = shared_state.write().unwrap();
if !state.one_snapshot_done {
continue;
}
state.mango_accounts.iter().cloned().collect_vec()
};
tcs.errors.write().unwrap().update();
tcs_start_time = Some(tcs_start_time.unwrap_or(Instant::now()));
let found_candidates = tcs
.find_candidates(account_addresses.iter(), |candidate| {
if shared_state
.write()
.unwrap()
.interesting_tcs
.insert(candidate)
{
tx_trigger_sender.try_send(())?;
}
Ok(())
})
.await
.unwrap_or_else(|err| {
error!("error during find_candidate: {err}");
0
});
if found_candidates > 0 {
tracing::debug!("found {} candidates for triggering", found_candidates);
}
let current_time = Instant::now();
metric_tcs_start_end.push(current_time - tcs_start_time.unwrap());
tcs_start_time = None;
}
}
})
}
#[derive(Clone)]
pub struct TcsState {
pub mango_client: Arc<MangoClient>,
pub account_fetcher: Arc<chain_data::AccountFetcher>,
pub token_swap_info: Arc<TokenSwapInfoUpdater>,
pub trigger_tcs_config: trigger_tcs::Config,
pub errors: Arc<RwLock<ErrorTracking<Pubkey, LiqErrorType>>>,
}
impl TcsState {
async fn find_candidates(
&mut self,
accounts_iter: impl Iterator<Item = &Pubkey>,
action: impl Fn((Pubkey, u64, u64)) -> anyhow::Result<()>,
) -> anyhow::Result<usize> {
let accounts = accounts_iter.collect::<Vec<&Pubkey>>();
let now = Instant::now();
let now_ts: u64 = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)?
.as_secs();
let tcs_context = trigger_tcs::Context {
mango_client: self.mango_client.clone(),
account_fetcher: self.account_fetcher.clone(),
token_swap_info: self.token_swap_info.clone(),
config: self.trigger_tcs_config.clone(),
jupiter_quote_cache: Arc::new(trigger_tcs::JupiterQuoteCache::default()),
now_ts,
};
let mut found_counter = 0;
// Find interesting (pubkey, tcsid, volume)
for pubkey in accounts.iter() {
if let Some(error_entry) = self.errors.read().unwrap().had_too_many_errors(
LiqErrorType::TcsCollectionHard,
pubkey,
now,
) {
trace!(
%pubkey,
error_entry.count,
"skip checking account for tcs, had errors recently",
);
continue;
}
let candidates = tcs_context.find_interesting_tcs_for_account(pubkey);
let mut error_guard = self.errors.write().unwrap();
match candidates {
Ok(v) => {
error_guard.clear(LiqErrorType::TcsCollectionHard, pubkey);
if v.is_empty() {
error_guard.clear(LiqErrorType::TcsCollectionPartial, pubkey);
error_guard.clear(LiqErrorType::TcsExecution, pubkey);
} else if v.iter().all(|it| it.is_ok()) {
error_guard.clear(LiqErrorType::TcsCollectionPartial, pubkey);
} else {
for it in v.iter() {
if let Err(e) = it {
error_guard.record(
LiqErrorType::TcsCollectionPartial,
pubkey,
e.to_string(),
);
}
}
}
for interesting_candidate_res in v.iter() {
if let Ok(interesting_candidate) = interesting_candidate_res {
action(*interesting_candidate).expect("failed to send TCS candidate");
found_counter += 1;
}
}
}
Err(e) => {
error_guard.record(LiqErrorType::TcsCollectionHard, pubkey, e.to_string());
}
}
}
return Ok(found_counter);
}
pub async fn maybe_take_token_conditional_swap(
&mut self,
mut interesting_tcs: Vec<(Pubkey, u64, u64)>,
) -> anyhow::Result<bool> {
let now_ts: u64 = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)?
.as_secs();
let tcs_context = trigger_tcs::Context {
mango_client: self.mango_client.clone(),
account_fetcher: self.account_fetcher.clone(),
token_swap_info: self.token_swap_info.clone(),
config: self.trigger_tcs_config.clone(),
jupiter_quote_cache: Arc::new(trigger_tcs::JupiterQuoteCache::default()),
now_ts,
};
if interesting_tcs.is_empty() {
return Ok(false);
}
let (txsigs, mut changed_pubkeys) = tcs_context
.execute_tcs(&mut interesting_tcs, self.errors.clone())
.await?;
for pubkey in changed_pubkeys.iter() {
self.errors
.write()
.unwrap()
.clear(LiqErrorType::TcsExecution, pubkey);
}
if txsigs.is_empty() {
return Ok(false);
}
changed_pubkeys.push(self.mango_client.mango_account_address);
// Force a refresh of affected accounts
let slot = self
.account_fetcher
.transaction_max_slot(&txsigs)
.await
.context("transaction_max_slot")?;
if let Err(e) = self
.account_fetcher
.refresh_accounts_via_rpc_until_slot(
&changed_pubkeys,
slot,
self.trigger_tcs_config.refresh_timeout,
)
.await
{
info!(slot, "could not refresh after tcs execution: {}", e);
}
Ok(true)
}
}

View File

@ -1,4 +1,5 @@
use std::collections::HashSet; use std::collections::HashSet;
use std::time::Duration;
use std::{ use std::{
collections::HashMap, collections::HashMap,
pin::Pin, pin::Pin,
@ -15,6 +16,7 @@ use mango_v4::{
use mango_v4_client::{chain_data, jupiter, MangoClient, TransactionBuilder}; use mango_v4_client::{chain_data, jupiter, MangoClient, TransactionBuilder};
use anyhow::Context as AnyhowContext; use anyhow::Context as AnyhowContext;
use mango_v4::accounts_ix::HealthCheckKind::MaintRatio;
use solana_sdk::signature::Signature; use solana_sdk::signature::Signature;
use tracing::*; use tracing::*;
use {fixed::types::I80F48, solana_sdk::pubkey::Pubkey}; use {fixed::types::I80F48, solana_sdk::pubkey::Pubkey};
@ -55,6 +57,7 @@ pub enum Mode {
#[derive(Clone)] #[derive(Clone)]
pub struct Config { pub struct Config {
pub refresh_timeout: Duration,
pub min_health_ratio: f64, pub min_health_ratio: f64,
pub max_trigger_quote_amount: u64, pub max_trigger_quote_amount: u64,
pub compute_limit_for_trigger: u32, pub compute_limit_for_trigger: u32,
@ -1000,7 +1003,7 @@ impl Context {
pub async fn execute_tcs( pub async fn execute_tcs(
&self, &self,
tcs: &mut [(Pubkey, u64, u64)], tcs: &mut [(Pubkey, u64, u64)],
error_tracking: &mut ErrorTracking<Pubkey, LiqErrorType>, error_tracking: Arc<RwLock<ErrorTracking<Pubkey, LiqErrorType>>>,
) -> anyhow::Result<(Vec<Signature>, Vec<Pubkey>)> { ) -> anyhow::Result<(Vec<Signature>, Vec<Pubkey>)> {
use rand::distributions::{Distribution, WeightedError, WeightedIndex}; use rand::distributions::{Distribution, WeightedError, WeightedIndex};
@ -1049,7 +1052,7 @@ impl Context {
} }
Err(e) => { Err(e) => {
trace!(%result.pubkey, "preparation error {:?}", e); trace!(%result.pubkey, "preparation error {:?}", e);
error_tracking.record( error_tracking.write().unwrap().record(
LiqErrorType::TcsExecution, LiqErrorType::TcsExecution,
&result.pubkey, &result.pubkey,
e.to_string(), e.to_string(),
@ -1093,7 +1096,7 @@ impl Context {
}; };
// start the new one // start the new one
if let Some(job) = self.prepare_job(&pubkey, tcs_id, volume, error_tracking) { if let Some(job) = self.prepare_job(&pubkey, tcs_id, volume, error_tracking.clone()) {
pending_volume += volume; pending_volume += volume;
pending.push(job); pending.push(job);
} }
@ -1130,7 +1133,11 @@ impl Context {
Ok(v) => Some((pubkey, v)), Ok(v) => Some((pubkey, v)),
Err(err) => { Err(err) => {
trace!(%pubkey, "execution error {:?}", err); trace!(%pubkey, "execution error {:?}", err);
error_tracking.record(LiqErrorType::TcsExecution, &pubkey, err.to_string()); error_tracking.write().unwrap().record(
LiqErrorType::TcsExecution,
&pubkey,
err.to_string(),
);
None None
} }
}); });
@ -1145,12 +1152,14 @@ impl Context {
pubkey: &Pubkey, pubkey: &Pubkey,
tcs_id: u64, tcs_id: u64,
volume: u64, volume: u64,
error_tracking: &ErrorTracking<Pubkey, LiqErrorType>, error_tracking: Arc<RwLock<ErrorTracking<Pubkey, LiqErrorType>>>,
) -> Option<Pin<Box<dyn Future<Output = PreparationResult> + Send>>> { ) -> Option<Pin<Box<dyn Future<Output = PreparationResult> + Send>>> {
// Skip a pubkey if there've been too many errors recently // Skip a pubkey if there've been too many errors recently
if let Some(error_entry) = if let Some(error_entry) = error_tracking.read().unwrap().had_too_many_errors(
error_tracking.had_too_many_errors(LiqErrorType::TcsExecution, pubkey, Instant::now()) LiqErrorType::TcsExecution,
{ pubkey,
Instant::now(),
) {
trace!( trace!(
"skip checking for tcs on account {pubkey}, had {} errors recently", "skip checking for tcs on account {pubkey}, had {} errors recently",
error_entry.count error_entry.count
@ -1225,6 +1234,27 @@ impl Context {
.instructions .instructions
.append(&mut trigger_ixs.instructions); .append(&mut trigger_ixs.instructions);
let (_, tcs) = liqee.token_conditional_swap_by_id(pending.tcs_id)?;
let affected_tokens = allowed_tokens
.iter()
.chain(&[tcs.buy_token_index, tcs.sell_token_index])
.copied()
.collect_vec();
let liqor = &self.mango_client.mango_account().await?;
tx_builder.instructions.append(
&mut self
.mango_client
.health_check_instruction(
liqor,
self.config.min_health_ratio,
affected_tokens,
vec![],
MaintRatio,
)
.await?
.instructions,
);
let txsig = tx_builder let txsig = tx_builder
.send_and_confirm(&self.mango_client.client) .send_and_confirm(&self.mango_client.client)
.await?; .await?;

View File

@ -0,0 +1,241 @@
use crate::liquidation_state::LiquidationState;
use crate::tcs_state::TcsState;
use crate::SharedState;
use anchor_lang::prelude::Pubkey;
use async_channel::{Receiver, Sender};
use mango_v4_client::AsyncChannelSendUnlessFull;
use std::sync::{Arc, RwLock};
use tokio::task::JoinHandle;
use tracing::{debug, error, trace};
enum WorkerTask {
Liquidation(Pubkey),
Tcs(Vec<(Pubkey, u64, u64)>),
// Given two workers: #0=LIQ_only, #1=LIQ+TCS
// If they are both busy, and the scanning jobs find a new TCS and a new LIQ candidates and enqueue them in the channel
// Then if #1 wake up first, it will consume the LIQ candidate (LIQ always have priority)
// Then when #0 wake up, it will not find any LIQ candidate, and would not do anything (it won't take a TCS)
// But if we do nothing, #1 would never wake up again (no new task in channel)
// So we use this `GiveUpTcs` that will be handled by #0 by queuing a new signal the channel and will wake up #1 again
GiveUpTcs,
// Can happen if TCS is batched (2 TCS enqueued, 2 workers waken, but first one take both tasks)
NoWork,
}
pub fn spawn_tx_senders_job(
max_parallel_operations: u64,
enable_liquidation: bool,
tx_liq_trigger_receiver: Receiver<()>,
tx_tcs_trigger_receiver: Receiver<()>,
tx_tcs_trigger_sender: Sender<()>,
rebalance_trigger_sender: Sender<()>,
shared_state: Arc<RwLock<SharedState>>,
liquidation: Box<LiquidationState>,
tcs: Box<TcsState>,
) -> Vec<JoinHandle<()>> {
if max_parallel_operations < 1 {
error!("max_parallel_operations must be >= 1");
std::process::exit(1)
}
let reserve_one_worker_for_liquidation = max_parallel_operations > 1 && enable_liquidation;
let workers: Vec<JoinHandle<()>> = (0..max_parallel_operations)
.map(|worker_id| {
tokio::spawn({
let shared_state = shared_state.clone();
let receiver_liq = tx_liq_trigger_receiver.clone();
let receiver_tcs = tx_tcs_trigger_receiver.clone();
let sender_tcs = tx_tcs_trigger_sender.clone();
let rebalance_trigger_sender = rebalance_trigger_sender.clone();
let liquidation = liquidation.clone();
let tcs = tcs.clone();
async move {
worker_loop(
shared_state,
receiver_liq,
receiver_tcs,
sender_tcs,
rebalance_trigger_sender,
liquidation,
tcs,
worker_id,
reserve_one_worker_for_liquidation && worker_id == 0,
)
.await;
}
})
})
.collect();
workers
}
async fn worker_loop(
shared_state: Arc<RwLock<SharedState>>,
liq_receiver: Receiver<()>,
tcs_receiver: Receiver<()>,
tcs_sender: Sender<()>,
rebalance_trigger_sender: Sender<()>,
mut liquidation: Box<LiquidationState>,
mut tcs: Box<TcsState>,
id: u64,
only_liquidation: bool,
) {
loop {
debug!(
"Worker #{} waiting for task (only_liq={})",
id, only_liquidation
);
let _ = if only_liquidation {
liq_receiver.recv().await.expect("receive failed")
} else {
tokio::select!(
_ = liq_receiver.recv() => {},
_ = tcs_receiver.recv() => {},
)
};
// a task must be available to process
// find it in global shared state, and mark it as processing
let task = worker_pull_task(&shared_state, id, only_liquidation)
.expect("Worker woke up but has nothing to do");
// execute the task
let need_rebalancing = match &task {
WorkerTask::Liquidation(l) => worker_execute_liquidation(&mut liquidation, *l).await,
WorkerTask::Tcs(t) => worker_execute_tcs(&mut tcs, t.clone()).await,
WorkerTask::GiveUpTcs => worker_give_up_tcs(&tcs_sender).await,
WorkerTask::NoWork => false,
};
if need_rebalancing {
rebalance_trigger_sender.send_unless_full(()).unwrap();
}
// remove from shared state
worker_finalize_task(&shared_state, id, task, need_rebalancing);
}
}
async fn worker_give_up_tcs(sender: &Sender<()>) -> bool {
sender.send(()).await.expect("sending task failed");
false
}
async fn worker_execute_tcs(tcs: &mut Box<TcsState>, candidates: Vec<(Pubkey, u64, u64)>) -> bool {
tcs.maybe_take_token_conditional_swap(candidates)
.await
.unwrap_or(false)
}
async fn worker_execute_liquidation(
liquidation: &mut Box<LiquidationState>,
candidate: Pubkey,
) -> bool {
liquidation
.maybe_liquidate_and_log_error(&candidate)
.await
.unwrap_or(false)
}
fn worker_pull_task(
shared_state: &Arc<RwLock<SharedState>>,
id: u64,
only_liquidation: bool,
) -> anyhow::Result<WorkerTask> {
let mut writer = shared_state.write().unwrap();
// print out list of all task for debugging
for x in &writer.liquidation_candidates_accounts {
if !writer.processing_liquidation.contains(x) {
trace!(" - LIQ {:?}", x);
}
}
// next liq task to execute
if let Some(liq_candidate) = writer
.liquidation_candidates_accounts
.iter()
.find(|x| !writer.processing_liquidation.contains(x))
.copied()
{
debug!("worker #{} got a liq candidate -> {}", id, liq_candidate);
writer.processing_liquidation.insert(liq_candidate);
return Ok(WorkerTask::Liquidation(liq_candidate));
}
let tcs_todo = writer.interesting_tcs.len() - writer.processing_tcs.len();
if only_liquidation {
debug!("worker #{} giving up TCS (todo count: {})", id, tcs_todo);
return Ok(WorkerTask::GiveUpTcs);
}
for x in &writer.interesting_tcs {
if !writer.processing_tcs.contains(x) {
trace!(" - TCS {:?}", x);
}
}
// next tcs task to execute
let max_tcs_batch_size = 20;
let tcs_candidates: Vec<(Pubkey, u64, u64)> = writer
.interesting_tcs
.iter()
.filter(|x| !writer.processing_tcs.contains(x))
.take(max_tcs_batch_size)
.copied()
.collect();
for tcs_candidate in &tcs_candidates {
debug!(
"worker #{} got a tcs candidate -> {:?} (out of {})",
id,
tcs_candidate,
writer.interesting_tcs.len()
);
writer.processing_tcs.insert(tcs_candidate.clone());
}
if tcs_candidates.len() > 0 {
Ok(WorkerTask::Tcs(tcs_candidates))
} else {
debug!("worker #{} got nothing", id);
Ok(WorkerTask::NoWork)
}
}
fn worker_finalize_task(
shared_state: &Arc<RwLock<SharedState>>,
id: u64,
task: WorkerTask,
done: bool,
) {
let mut writer = shared_state.write().unwrap();
match task {
WorkerTask::Liquidation(liq) => {
debug!(
"worker #{} - checked liq {:?} with success ? {}",
id, liq, done
);
writer.liquidation_candidates_accounts.shift_remove(&liq);
writer.processing_liquidation.remove(&liq);
}
WorkerTask::Tcs(tcs_list) => {
for tcs in tcs_list {
debug!(
"worker #{} - checked tcs {:?} with success ? {}",
id, tcs, done
);
writer.interesting_tcs.shift_remove(&tcs);
writer.processing_tcs.remove(&tcs);
}
}
WorkerTask::GiveUpTcs => {}
WorkerTask::NoWork => {}
}
}

View File

@ -17,7 +17,9 @@ use futures::{stream, StreamExt, TryFutureExt, TryStreamExt};
use itertools::Itertools; use itertools::Itertools;
use tracing::*; use tracing::*;
use mango_v4::accounts_ix::{Serum3OrderType, Serum3SelfTradeBehavior, Serum3Side}; use mango_v4::accounts_ix::{
HealthCheckKind, Serum3OrderType, Serum3SelfTradeBehavior, Serum3Side,
};
use mango_v4::accounts_zerocopy::KeyedAccountSharedData; use mango_v4::accounts_zerocopy::KeyedAccountSharedData;
use mango_v4::health::HealthCache; use mango_v4::health::HealthCache;
use mango_v4::state::{ use mango_v4::state::{
@ -80,6 +82,12 @@ pub struct ClientConfig {
#[builder(default = "Duration::from_secs(60)")] #[builder(default = "Duration::from_secs(60)")]
pub timeout: Duration, pub timeout: Duration,
/// Jupiter Timeout, defaults to 30s
///
/// This timeout applies to jupiter requests.
#[builder(default = "Duration::from_secs(30)")]
pub jupiter_timeout: Duration,
#[builder(default)] #[builder(default)]
pub transaction_builder_config: TransactionBuilderConfig, pub transaction_builder_config: TransactionBuilderConfig,
@ -560,6 +568,48 @@ impl MangoClient {
self.send_and_confirm_owner_tx(ixs.to_instructions()).await self.send_and_confirm_owner_tx(ixs.to_instructions()).await
} }
/// Assert that health of account is > N
pub async fn health_check_instruction(
&self,
account: &MangoAccountValue,
min_health_value: f64,
affected_tokens: Vec<TokenIndex>,
affected_perp_markets: Vec<PerpMarketIndex>,
check_kind: HealthCheckKind,
) -> anyhow::Result<PreparedInstructions> {
let (health_check_metas, health_cu) = self
.derive_health_check_remaining_account_metas(
account,
affected_tokens,
vec![],
affected_perp_markets,
)
.await?;
let ixs = PreparedInstructions::from_vec(
vec![Instruction {
program_id: mango_v4::id(),
accounts: {
let mut ams = anchor_lang::ToAccountMetas::to_account_metas(
&mango_v4::accounts::HealthCheck {
group: self.group(),
account: self.mango_account_address,
},
None,
);
ams.extend(health_check_metas.into_iter());
ams
},
data: anchor_lang::InstructionData::data(&mango_v4::instruction::HealthCheck {
min_health_value,
check_kind,
}),
}],
self.instruction_cu(health_cu),
);
Ok(ixs)
}
/// Creates token withdraw instructions for the MangoClient's account/owner. /// Creates token withdraw instructions for the MangoClient's account/owner.
/// The `account` state is passed in separately so changes during the tx can be /// The `account` state is passed in separately so changes during the tx can be
/// accounted for when deriving health accounts. /// accounted for when deriving health accounts.
@ -2094,7 +2144,10 @@ impl MangoClient {
// jupiter // jupiter
pub fn jupiter_v6(&self) -> jupiter::v6::JupiterV6 { pub fn jupiter_v6(&self) -> jupiter::v6::JupiterV6 {
jupiter::v6::JupiterV6 { mango_client: self } jupiter::v6::JupiterV6 {
mango_client: self,
timeout_duration: self.client.config.jupiter_timeout,
}
} }
pub fn jupiter(&self) -> jupiter::Jupiter { pub fn jupiter(&self) -> jupiter::Jupiter {

View File

@ -1,4 +1,5 @@
use std::str::FromStr; use std::str::FromStr;
use std::time::Duration;
use anchor_lang::prelude::Pubkey; use anchor_lang::prelude::Pubkey;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -139,6 +140,7 @@ impl TryFrom<&AccountMeta> for solana_sdk::instruction::AccountMeta {
pub struct JupiterV6<'a> { pub struct JupiterV6<'a> {
pub mango_client: &'a MangoClient, pub mango_client: &'a MangoClient,
pub timeout_duration: Duration,
} }
impl<'a> JupiterV6<'a> { impl<'a> JupiterV6<'a> {
@ -204,6 +206,7 @@ impl<'a> JupiterV6<'a> {
.http_client .http_client
.get(format!("{}/quote", config.jupiter_v6_url)) .get(format!("{}/quote", config.jupiter_v6_url))
.query(&query_args) .query(&query_args)
.timeout(self.timeout_duration)
.send() .send()
.await .await
.context("quote request to jupiter")?; .context("quote request to jupiter")?;
@ -290,6 +293,7 @@ impl<'a> JupiterV6<'a> {
destination_token_account: None, // default to user ata destination_token_account: None, // default to user ata
quote_response: quote.clone(), quote_response: quote.clone(),
}) })
.timeout(self.timeout_duration)
.send() .send()
.await .await
.context("swap transaction request to jupiter")?; .context("swap transaction request to jupiter")?;

View File

@ -20,19 +20,29 @@ impl<T, E: std::fmt::Debug> AnyhowWrap for Result<T, E> {
/// Push to an async_channel::Sender and ignore if the channel is full /// Push to an async_channel::Sender and ignore if the channel is full
pub trait AsyncChannelSendUnlessFull<T> { pub trait AsyncChannelSendUnlessFull<T> {
/// Send a message if the channel isn't full /// Send a message if the channel isn't full
fn send_unless_full(&self, msg: T) -> Result<(), async_channel::SendError<T>>; fn send_unless_full(&self, msg: T) -> anyhow::Result<()>;
} }
impl<T> AsyncChannelSendUnlessFull<T> for async_channel::Sender<T> { impl<T> AsyncChannelSendUnlessFull<T> for async_channel::Sender<T> {
fn send_unless_full(&self, msg: T) -> Result<(), async_channel::SendError<T>> { fn send_unless_full(&self, msg: T) -> anyhow::Result<()> {
use async_channel::*; use async_channel::*;
match self.try_send(msg) { match self.try_send(msg) {
Ok(()) => Ok(()), Ok(()) => Ok(()),
Err(TrySendError::Closed(msg)) => Err(async_channel::SendError(msg)), Err(TrySendError::Closed(_)) => Err(anyhow::format_err!("channel is closed")),
Err(TrySendError::Full(_)) => Ok(()), Err(TrySendError::Full(_)) => Ok(()),
} }
} }
} }
impl<T> AsyncChannelSendUnlessFull<T> for tokio::sync::mpsc::Sender<T> {
fn send_unless_full(&self, msg: T) -> anyhow::Result<()> {
use tokio::sync::mpsc::*;
match self.try_send(msg) {
Ok(()) => Ok(()),
Err(error::TrySendError::Closed(_)) => Err(anyhow::format_err!("channel is closed")),
Err(error::TrySendError::Full(_)) => Ok(()),
}
}
}
/// Like tokio::time::interval(), but with Delay as default MissedTickBehavior /// Like tokio::time::interval(), but with Delay as default MissedTickBehavior
/// ///

View File

@ -7,8 +7,6 @@ use mango_v4::accounts_ix::{HealthCheck, HealthCheckKind};
use mango_v4::error::MangoError; use mango_v4::error::MangoError;
use solana_sdk::transport::TransportError; use solana_sdk::transport::TransportError;
// TODO FAS
#[tokio::test] #[tokio::test]
async fn test_health_check() -> Result<(), TransportError> { async fn test_health_check() -> Result<(), TransportError> {
let context = TestContext::new().await; let context = TestContext::new().await;

View File

@ -51,6 +51,7 @@ This creates a bunch of to-be-liquidated accounts as well as a LIQOR account.
Run the liquidator on the group with the liqor account. Run the liquidator on the group with the liqor account.
Since devnet doesn't have any jupiter, run with Since devnet doesn't have any jupiter, run with
``` ```
JUPITER_VERSION=mock JUPITER_VERSION=mock
TCS_MODE=borrow-buy TCS_MODE=borrow-buy