lite-rpc/blockstore/tests/blockstore_integration_test...

use log::{debug, error, info, warn};
use solana_lite_rpc_blockstore::block_stores::postgres::postgres_block_store_query::PostgresQueryBlockStore;
use solana_lite_rpc_blockstore::block_stores::postgres::postgres_block_store_writer::PostgresBlockStore;
use solana_lite_rpc_blockstore::block_stores::postgres::PostgresSessionConfig;
use solana_lite_rpc_cluster_endpoints::geyser_grpc_connector::{
    GrpcConnectionTimeouts, GrpcSourceConfig,
};
use solana_lite_rpc_cluster_endpoints::grpc_multiplex::{
    create_grpc_multiplex_blocks_subscription, create_grpc_multiplex_processed_slots_subscription,
};
use solana_lite_rpc_core::structures::epoch::{EpochCache, EpochRef};
use solana_lite_rpc_core::structures::produced_block::ProducedBlock;
use solana_lite_rpc_core::structures::slot_notification::SlotNotification;
use solana_lite_rpc_core::types::{BlockStream, SlotStream};
use solana_rpc_client::nonblocking::rpc_client::RpcClient;
use solana_sdk::commitment_config::CommitmentConfig;
use std::sync::Arc;
use std::time::{Duration, Instant};
use std::{env, process};
use tokio::sync::broadcast::error::RecvError;
use tokio::sync::broadcast::Receiver;
use tokio::task::JoinHandle;
use tokio::time::sleep;
use tokio_util::sync::CancellationToken;
use tracing_subscriber::EnvFilter;

const CHANNEL_SIZE_WARNING_THRESHOLD: usize = 5;
#[ignore = "need to enable postgres"]
#[tokio::test]
async fn storage_test() {
    // RUST_LOG=info,storage_integration_tests=debug,solana_lite_rpc_blockstore=trace
    tracing_subscriber::fmt()
        .with_env_filter(EnvFilter::from_default_env())
        .init();
    configure_panic_hook();

    let no_pgconfig = env::var("PG_CONFIG").is_err();

    let pg_session_config = if no_pgconfig {
        info!("No PG_CONFIG env - use hartcoded defaults for integration test");
        PostgresSessionConfig::new_for_tests()
    } else {
        info!("PG_CONFIG env defined");
        PostgresSessionConfig::new_from_env().unwrap().unwrap()
    };

    let rpc_url = std::env::var("RPC_URL").expect("env var RPC_URL is mandatory");

    let rpc_client = Arc::new(RpcClient::new(rpc_url));

    let grpc_addr = env::var("GRPC_ADDR").expect("need grpc url for green");
    let grpc_x_token = env::var("GRPC_X_TOKEN").ok();

    info!(
        "Using grpc source on {} ({})",
        grpc_addr,
        grpc_x_token.is_some()
    );

    let timeouts = GrpcConnectionTimeouts {
        connect_timeout: Duration::from_secs(5),
        request_timeout: Duration::from_secs(5),
        subscribe_timeout: Duration::from_secs(5),
        receive_timeout: Duration::from_secs(5),
    };

    let grpc_config = GrpcSourceConfig::new(grpc_addr, grpc_x_token, None, timeouts.clone());
    let grpc_sources = vec![grpc_config];

    let (slot_notifier, _jh_multiplex_slotstream) =
        create_grpc_multiplex_processed_slots_subscription(grpc_sources.clone());

    let (blocks_notifier, _jh_multiplex_blockstream) =
        create_grpc_multiplex_blocks_subscription(grpc_sources);

    let (epoch_cache, _) = EpochCache::bootstrap_epoch(&rpc_client).await.unwrap();

    let block_storage_query = Arc::new(
        PostgresQueryBlockStore::new(epoch_cache.clone(), pg_session_config.clone()).await,
    );

    let block_storage = Arc::new(PostgresBlockStore::new(epoch_cache, pg_session_config).await);
    let current_epoch = rpc_client.get_epoch_info().await.unwrap().epoch;
    block_storage
        .drop_epoch_schema(EpochRef::new(current_epoch))
        .await
        .unwrap();
    block_storage
        .drop_epoch_schema(EpochRef::new(current_epoch).get_next_epoch())
        .await
        .unwrap();

    let (jh1_1, first_init) =
        storage_prepare_epoch_schema(slot_notifier.resubscribe(), block_storage.clone());
    // coordinate initial epoch schema creation
    first_init.cancelled().await;

    let jh1_2 = storage_listen(blocks_notifier.resubscribe(), block_storage.clone());
    let jh2 = block_debug_listen(blocks_notifier.resubscribe());
    let jh3 =
        spawn_client_to_blockstorage(block_storage_query.clone(), blocks_notifier.resubscribe());
    drop(blocks_notifier);

    let seconds_to_run = env::var("SECONDS_TO_RUN")
        .map(|s| s.parse::<u64>().expect("a number"))
        .unwrap_or(20);
    info!("Run tests for some time ({} seconds) ...", seconds_to_run);
    sleep(Duration::from_secs(seconds_to_run)).await;

    jh1_1.abort();
    jh1_2.abort();
    jh2.abort();
    jh3.abort();

    info!("Tests aborted forcefully by design.");
}

// TODO this is a race condition as the .save might get called before the schema was prepared
fn storage_prepare_epoch_schema(
    slot_notifier: SlotStream,
    postgres_storage: Arc<PostgresBlockStore>,
) -> (JoinHandle<()>, CancellationToken) {
    let mut debounce_slot = 0;
    let building_epoch_schema = CancellationToken::new();
    let first_run_signal = building_epoch_schema.clone();
    let join_handle = tokio::spawn(async move {
        let mut slot_notifier = slot_notifier;
        loop {
            match slot_notifier.recv().await {
                Ok(SlotNotification { processed_slot, .. }) => {
                    if processed_slot >= debounce_slot {
                        let created = postgres_storage
                            .prepare_epoch_schema(processed_slot)
                            .await
                            .unwrap();
                        first_run_signal.cancel();
                        debounce_slot = processed_slot + 64; // wait a bit before hammering the DB again
                        if created {
                            debug!("Async job prepared schema at slot {}", processed_slot);
                        } else {
                            debug!(
                                "Async job for preparing schema at slot {} was a noop",
                                processed_slot
                            );
                        }
                    }
                }
                _ => {
                    warn!("Error receiving slot - continue");
                }
            }
        }
    });
    (join_handle, building_epoch_schema)
}

/// run the optimizer at least every n slots
const OPTIMIZE_EVERY_N_SLOTS: u64 = 10;
/// wait at least n slots before running the optimizer again
const OPTIMIZE_DEBOUNCE_SLOTS: u64 = 4;

// note: the consumer lags far behind the ingress of blocks and transactions
fn storage_listen(
    block_notifier: BlockStream,
    block_storage: Arc<PostgresBlockStore>,
) -> JoinHandle<()> {
    tokio::spawn(async move {
        let mut last_optimizer_run = 0;
        let mut block_notifier = block_notifier;
        // this is the critical write loop
        loop {
            match block_notifier.recv().await {
                Ok(block) => {
                    if block.commitment_config != CommitmentConfig::confirmed() {
                        debug!(
                            "Skip block {}@{} due to commitment level",
                            block.slot, block.commitment_config.commitment
                        );
                        continue;
                    }
                    let started = Instant::now();
                    debug!(
                        "Storage task received block: {}@{} with {} txs",
                        block.slot,
                        block.commitment_config.commitment,
                        block.transactions.len()
                    );

                    if block_notifier.len() > CHANNEL_SIZE_WARNING_THRESHOLD {
                        warn!(
                            "(soft_realtime) Block queue is growing - {} elements",
                            block_notifier.len()
                        );
                    }

                    // TODO we should intercept finalized blocks and try to update only the status optimistically

                    // avoid backpressure here!

                    block_storage.save_block(&block).await.unwrap();

                    // we should be faster than 150ms here
                    let elapsed = started.elapsed();
                    debug!(
                        "Successfully stored block {} to postgres which took {:.2}ms - remaining {} queue elements",
                        block.slot,
                        elapsed.as_secs_f64() * 1000.0, block_notifier.len()
                    );
                    if elapsed > Duration::from_millis(150) {
                        warn!("(soft_realtime) Write operation was slow!");
                    }

                    // debounce for 4 slots but run at least every 10 slots
                    if block.slot > last_optimizer_run + OPTIMIZE_EVERY_N_SLOTS
                        || block.slot > last_optimizer_run + OPTIMIZE_DEBOUNCE_SLOTS
                            && started.elapsed() < Duration::from_millis(200)
                            && block_notifier.is_empty()
                    {
                        debug!(
                            "Use extra time to do some optimization (slot {})",
                            block.slot
                        );
                        block_storage
                            .optimize_blocks_table(block.slot)
                            .await
                            .unwrap();
                        last_optimizer_run = block.slot;
                    }
                } // -- Ok
                Err(RecvError::Lagged(missed_blocks)) => {
                    warn!(
                        "Could not keep up with producer - missed {} blocks",
                        missed_blocks
                    );
                }
                Err(other_err) => {
                    warn!("Error receiving block: {:?}", other_err);
                }
            }

            // ...
        }
    })
}

fn block_debug_listen(block_notifier: BlockStream) -> JoinHandle<()> {
    tokio::spawn(async move {
        let mut last_highest_slot_number = 0;
        let mut block_notifier = block_notifier;

        loop {
            match block_notifier.recv().await {
                Ok(block) => {
                    debug!(
                        "Saw block: {}@{} with {} txs",
                        block.slot,
                        block.commitment_config.commitment,
                        block.transactions.len()
                    );

                    // check monotony
                    // note: this succeeds if poll_block parallelism is 1 (see NUM_PARALLEL_BLOCKS)
                    if block.commitment_config == CommitmentConfig::confirmed() {
                        if block.slot > last_highest_slot_number {
                            last_highest_slot_number = block.slot;
                        } else {
                            // note: ATM this fails very often (using the RPC poller)
                            warn!(
                                "Monotonic check failed - block {} is out of order, last highest was {}",
                                block.slot, last_highest_slot_number
                            );
                        }
                    }
                } // -- Ok
                Err(RecvError::Lagged(missed_blocks)) => {
                    warn!(
                        "Could not keep up with producer - missed {} blocks",
                        missed_blocks
                    );
                }
                Err(other_err) => {
                    panic!("Error receiving block: {:?}", other_err);
                }
            }

            // ...
        }
    })
}

fn spawn_client_to_blockstorage(
    block_storage_query: Arc<PostgresQueryBlockStore>,
    mut blocks_notifier: Receiver<ProducedBlock>,
) -> JoinHandle<()> {
    tokio::spawn(async move {
        // note: no startup deloy
        loop {
            match blocks_notifier.recv().await {
                Ok(produced_block) => {
                    if produced_block.commitment_config != CommitmentConfig::confirmed() {
                        continue;
                    }
                    let confirmed_slot = produced_block.slot;
                    // we cannot expect the most recent data
                    let query_slot = confirmed_slot - 3;
                    match block_storage_query.query_block(query_slot).await {
                        Ok(pb) => {
                            info!(
                                "Query result for slot {}: {}",
                                query_slot,
                                to_string_without_transactions(&pb)
                            );
                            for tx in pb.transactions.iter().take(10) {
                                info!("  - tx: {}", tx.signature);
                            }
                            if pb.transactions.len() > 10 {
                                info!("  - ... and {} more", pb.transactions.len() - 10);
                            }
                        }
                        Err(err) => {
                            info!("Query did not return produced block: {}", err);
                        }
                    }
                    // Query result for slot 245710738
                    // Inserting block 245710741 to schema rpc2a_epoch_581 postgres took 1.52ms
                }
                Err(_err) => {
                    warn!("Aborting client");
                    break;
                }
            }
            sleep(Duration::from_secs(1)).await;
        }
    })
}

fn to_string_without_transactions(produced_block: &ProducedBlock) -> String {
    format!(
        r#"
            leader_id={:?}
            blockhash={}
            block_height={}
            slot={}
            parent_slot={}
            block_time={}
            commitment_config={}
            previous_blockhash={}
            num_transactions={}
        "#,
        produced_block.leader_id,
        produced_block.blockhash,
        produced_block.block_height,
        produced_block.slot,
        produced_block.parent_slot,
        produced_block.block_time,
        produced_block.commitment_config.commitment,
        produced_block.previous_blockhash,
        produced_block.transactions.len(),
        // rewards
        // transactions
    )
}

fn configure_panic_hook() {
    let default_panic = std::panic::take_hook();
    std::panic::set_hook(Box::new(move |panic_info| {
        default_panic(panic_info);
        // e.g. panicked at 'BANG', lite-rpc/tests/blockstore_integration_tests:260:25
        error!("{}", panic_info);
        eprintln!("{}", panic_info);
        process::exit(12);
    }));
}