solana/src/tvu.rs

440 lines
15 KiB
Rust
Raw Normal View History

//! The `tvu` module implements the Transaction Validation Unit, a
//! multi-stage transaction validation pipeline in software.
2018-06-14 17:05:12 -07:00
//!
//! 1. BlobFetchStage
//! - Incoming blobs are picked up from the TVU sockets and repair socket.
//! 2. RetransmitStage
2018-06-15 14:49:22 -07:00
//! - Blobs are windowed until a contiguous chunk is available. This stage also repairs and
//! retransmits blobs that are in the queue.
//! 3. ReplayStage
2018-06-15 14:49:22 -07:00
//! - Transactions in blobs are processed and applied to the bank.
//! - TODO We need to verify the signatures in the blobs.
//! 4. StorageStage
//! - Generating the keys used to encrypt the ledger and sample it for storage mining.
2018-12-07 19:16:27 -08:00
use crate::bank::Bank;
use crate::blob_fetch_stage::BlobFetchStage;
2019-02-07 20:52:39 -08:00
use crate::blocktree::Blocktree;
2018-12-07 19:16:27 -08:00
use crate::cluster_info::ClusterInfo;
use crate::entry_stream_stage::EntryStreamStage;
use crate::replay_stage::ReplayStage;
2018-12-07 19:16:27 -08:00
use crate::retransmit_stage::RetransmitStage;
use crate::service::Service;
use crate::storage_stage::{StorageStage, StorageState};
use crate::streamer::BlobSender;
use crate::tpu::{TpuReturnType, TpuRotationReceiver, TpuRotationSender};
use crate::voting_keypair::VotingKeypair;
2018-11-16 08:04:46 -08:00
use solana_sdk::hash::Hash;
2019-01-30 19:28:48 -08:00
use solana_sdk::signature::{Keypair, KeypairUtil};
use std::net::UdpSocket;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{channel, Receiver, SyncSender};
use std::sync::{Arc, RwLock};
use std::thread;
pub type TvuReturnType = TpuReturnType;
pub type TvuRotationSender = TpuRotationSender;
pub type TvuRotationReceiver = TpuRotationReceiver;
pub struct Tvu {
fetch_stage: BlobFetchStage,
retransmit_stage: RetransmitStage,
replay_stage: ReplayStage,
entry_stream_stage: EntryStreamStage,
storage_stage: StorageStage,
exit: Arc<AtomicBool>,
last_entry_id: Arc<RwLock<Hash>>,
}
pub struct Sockets {
pub fetch: Vec<UdpSocket>,
pub repair: UdpSocket,
pub retransmit: UdpSocket,
}
impl Tvu {
/// This service receives messages from a leader in the network and processes the transactions
2018-05-14 14:33:11 -07:00
/// on the bank state.
/// # Arguments
/// * `bank` - The bank state.
/// * `entry_height` - Initial ledger height
/// * `blob_index` - Index of last processed blob
/// * `last_entry_id` - Hash of the last entry
2018-10-08 19:55:54 -07:00
/// * `cluster_info` - The cluster_info state.
/// * `sockets` - My fetch, repair, and restransmit sockets
2019-02-07 20:52:39 -08:00
/// * `blocktree` - the ledger itself
#[allow(clippy::new_ret_no_self, clippy::too_many_arguments)]
2018-05-23 07:11:11 -07:00
pub fn new(
voting_keypair: Option<Arc<VotingKeypair>>,
bank: &Arc<Bank>,
blob_index: u64,
2018-06-27 12:35:58 -07:00
entry_height: u64,
last_entry_id: Hash,
cluster_info: &Arc<RwLock<ClusterInfo>>,
sockets: Sockets,
2019-02-07 20:52:39 -08:00
blocktree: Arc<Blocktree>,
storage_rotate_count: u64,
to_leader_sender: &TvuRotationSender,
storage_state: &StorageState,
entry_stream: Option<&String>,
ledger_signal_sender: SyncSender<bool>,
ledger_signal_receiver: Receiver<bool>,
) -> (Self, BlobSender) {
let exit = Arc::new(AtomicBool::new(false));
let keypair: Arc<Keypair> = cluster_info
.read()
.expect("Unable to read from cluster_info during Tvu creation")
.keypair
.clone();
let Sockets {
repair: repair_socket,
fetch: fetch_sockets,
retransmit: retransmit_socket,
} = sockets;
let (blob_fetch_sender, blob_fetch_receiver) = channel();
let repair_socket = Arc::new(repair_socket);
let mut blob_sockets: Vec<Arc<UdpSocket>> =
fetch_sockets.into_iter().map(Arc::new).collect();
blob_sockets.push(repair_socket.clone());
let fetch_stage =
BlobFetchStage::new_multi_socket(blob_sockets, &blob_fetch_sender, exit.clone());
//TODO
//the packets coming out of blob_receiver need to be sent to the GPU and verified
//then sent to the window, which does the erasure coding reconstruction
let retransmit_stage = RetransmitStage::new(
bank,
2019-02-07 20:52:39 -08:00
blocktree.clone(),
2018-10-08 19:55:54 -07:00
&cluster_info,
Arc::new(retransmit_socket),
repair_socket,
blob_fetch_receiver,
bank.leader_scheduler.clone(),
exit.clone(),
2018-05-22 15:17:59 -07:00
);
let l_last_entry_id = Arc::new(RwLock::new(last_entry_id));
let (replay_stage, ledger_entry_receiver) = ReplayStage::new(
2019-01-30 19:28:48 -08:00
keypair.pubkey(),
voting_keypair,
2019-02-07 20:52:39 -08:00
blocktree.clone(),
bank.clone(),
cluster_info.clone(),
exit.clone(),
blob_index,
l_last_entry_id.clone(),
to_leader_sender,
ledger_signal_sender,
ledger_signal_receiver,
);
let (entry_stream_stage, entry_stream_receiver) = EntryStreamStage::new(
ledger_entry_receiver,
entry_stream,
bank.leader_scheduler.clone(),
exit.clone(),
);
let storage_stage = StorageStage::new(
storage_state,
entry_stream_receiver,
2019-02-07 20:52:39 -08:00
Some(blocktree),
2019-01-17 14:41:48 -08:00
&keypair,
&exit.clone(),
entry_height,
storage_rotate_count,
2019-01-17 14:41:48 -08:00
&cluster_info,
);
(
Tvu {
fetch_stage,
retransmit_stage,
replay_stage,
entry_stream_stage,
storage_stage,
exit,
last_entry_id: l_last_entry_id,
},
blob_fetch_sender,
)
}
2018-07-09 13:53:18 -07:00
#[cfg(test)]
pub fn get_pause(&self) -> Arc<AtomicBool> {
self.replay_stage.get_pause()
}
pub fn get_state(&self) -> Hash {
*self.last_entry_id.read().unwrap()
}
Leader scheduler plumbing (#1440) * Added LeaderScheduler module and tests * plumbing for LeaderScheduler in Fullnode + tests. Add vote processing for active set to ReplicateStage and WriteStage * Add LeaderScheduler plumbing for Tvu, window, and tests * Fix bank and switch tests to use new LeaderScheduler * move leader rotation check from window service to replicate stage * Add replicate_stage leader rotation exit test * removed leader scheduler from the window service and associated modules/tests * Corrected is_leader calculation in repair() function in window.rs * Integrate LeaderScheduler with write_stage for leader to validator transitions * Integrated LeaderScheduler with BroadcastStage * Removed gossip leader rotation from crdt * Add multi validator, leader test * Comments and cleanup * Remove unneeded checks from broadcast stage * Fix case where a validator/leader need to immediately transition on startup after reading ledger and seeing they are not in the correct role * Set new leader in validator -> validator transitions * Clean up for PR comments, refactor LeaderScheduler from process_entry/process_ledger_tail * Cleaned out LeaderScheduler options, implemented LeaderScheduler strategy that only picks the bootstrap leader to support existing tests, drone/airdrops * Ignore test_full_leader_validator_network test due to bug where the next leader in line fails to get the last entry before rotation (b/c it hasn't started up yet). Added a test test_dropped_handoff_recovery go track this bug
2018-10-10 16:49:41 -07:00
pub fn is_exited(&self) -> bool {
self.exit.load(Ordering::Relaxed)
}
pub fn exit(&self) {
// Call exit to make sure replay stage is unblocked from a channel it may be blocked on.
// Then replay stage will set the self.exit variable and cause the rest of the
// pipeline to exit
self.replay_stage.exit();
}
2019-02-08 08:06:27 -08:00
pub fn close(self) -> thread::Result<()> {
self.exit();
2018-07-09 13:53:18 -07:00
self.join()
}
}
impl Service for Tvu {
2019-02-08 08:06:27 -08:00
type JoinReturnType = ();
2019-02-08 08:06:27 -08:00
fn join(self) -> thread::Result<()> {
Leader scheduler plumbing (#1440) * Added LeaderScheduler module and tests * plumbing for LeaderScheduler in Fullnode + tests. Add vote processing for active set to ReplicateStage and WriteStage * Add LeaderScheduler plumbing for Tvu, window, and tests * Fix bank and switch tests to use new LeaderScheduler * move leader rotation check from window service to replicate stage * Add replicate_stage leader rotation exit test * removed leader scheduler from the window service and associated modules/tests * Corrected is_leader calculation in repair() function in window.rs * Integrate LeaderScheduler with write_stage for leader to validator transitions * Integrated LeaderScheduler with BroadcastStage * Removed gossip leader rotation from crdt * Add multi validator, leader test * Comments and cleanup * Remove unneeded checks from broadcast stage * Fix case where a validator/leader need to immediately transition on startup after reading ledger and seeing they are not in the correct role * Set new leader in validator -> validator transitions * Clean up for PR comments, refactor LeaderScheduler from process_entry/process_ledger_tail * Cleaned out LeaderScheduler options, implemented LeaderScheduler strategy that only picks the bootstrap leader to support existing tests, drone/airdrops * Ignore test_full_leader_validator_network test due to bug where the next leader in line fails to get the last entry before rotation (b/c it hasn't started up yet). Added a test test_dropped_handoff_recovery go track this bug
2018-10-10 16:49:41 -07:00
self.retransmit_stage.join()?;
self.fetch_stage.join()?;
self.storage_stage.join()?;
self.entry_stream_stage.join()?;
2019-02-08 08:06:27 -08:00
self.replay_stage.join()?;
Ok(())
}
}
#[cfg(test)]
2018-05-23 10:49:48 -07:00
pub mod tests {
2019-02-08 08:10:28 -08:00
use super::*;
2018-12-07 19:16:27 -08:00
use crate::bank::Bank;
2019-02-07 20:52:39 -08:00
use crate::blocktree::get_tmp_ledger_path;
2018-12-07 19:16:27 -08:00
use crate::cluster_info::{ClusterInfo, Node};
use crate::entry::Entry;
2019-01-24 12:04:04 -08:00
use crate::genesis_block::GenesisBlock;
use crate::gossip_service::GossipService;
2018-12-07 19:16:27 -08:00
use crate::packet::SharedBlob;
2019-02-08 08:10:28 -08:00
use crate::storage_stage::STORAGE_ROTATE_TEST_COUNT;
2018-12-07 19:16:27 -08:00
use crate::streamer;
use bincode::serialize;
2018-12-04 20:19:48 -08:00
use solana_sdk::system_transaction::SystemTransaction;
use std::fs::remove_dir_all;
use std::time::Duration;
fn new_gossip(
2018-10-08 19:55:54 -07:00
cluster_info: Arc<RwLock<ClusterInfo>>,
gossip: UdpSocket,
exit: Arc<AtomicBool>,
) -> GossipService {
GossipService::new(&cluster_info, None, gossip, exit)
}
#[test]
fn test_tvu_exit() {
solana_logger::setup();
let leader = Node::new_localhost();
let target1_keypair = Keypair::new();
let target1 = Node::new_localhost_with_pubkey(target1_keypair.pubkey());
let starting_balance = 10_000;
let (genesis_block, _mint_keypair) = GenesisBlock::new(starting_balance);
let bank = Arc::new(Bank::new(&genesis_block));
//start cluster_info1
let mut cluster_info1 = ClusterInfo::new(target1.info.clone());
cluster_info1.insert_info(leader.info.clone());
cluster_info1.set_leader(leader.info.id);
let cref1 = Arc::new(RwLock::new(cluster_info1));
let cur_hash = Hash::default();
2019-02-07 20:52:39 -08:00
let blocktree_path = get_tmp_ledger_path("test_replay");
let (blocktree, l_sender, l_receiver) = Blocktree::open_with_signal(&blocktree_path)
.expect("Expected to successfully open ledger");
let vote_account_keypair = Arc::new(Keypair::new());
let voting_keypair = VotingKeypair::new_local(&vote_account_keypair);
let (sender, _receiver) = channel();
let (tvu, _blob_sender) = Tvu::new(
Some(Arc::new(voting_keypair)),
&bank,
0,
0,
cur_hash,
&cref1,
{
Sockets {
repair: target1.sockets.repair,
retransmit: target1.sockets.retransmit,
fetch: target1.sockets.tvu,
}
},
2019-02-07 20:52:39 -08:00
Arc::new(blocktree),
STORAGE_ROTATE_TEST_COUNT,
&sender,
&StorageState::default(),
None,
l_sender,
l_receiver,
);
tvu.close().expect("close");
}
/// Test that message sent from leader to target1 and replayed to target2
#[test]
2018-11-13 17:13:25 -08:00
#[ignore]
fn test_replay() {
solana_logger::setup();
let leader = Node::new_localhost();
2018-08-09 07:56:04 -07:00
let target1_keypair = Keypair::new();
let target1 = Node::new_localhost_with_pubkey(target1_keypair.pubkey());
let target2 = Node::new_localhost();
let exit = Arc::new(AtomicBool::new(false));
2018-10-08 19:55:54 -07:00
//start cluster_info_l
let mut cluster_info_l = ClusterInfo::new(leader.info.clone());
2018-10-08 19:55:54 -07:00
cluster_info_l.set_leader(leader.info.id);
2018-10-08 19:55:54 -07:00
let cref_l = Arc::new(RwLock::new(cluster_info_l));
let dr_l = new_gossip(cref_l, leader.sockets.gossip, exit.clone());
2018-10-08 19:55:54 -07:00
//start cluster_info2
let mut cluster_info2 = ClusterInfo::new(target2.info.clone());
cluster_info2.insert_info(leader.info.clone());
2018-10-08 19:55:54 -07:00
cluster_info2.set_leader(leader.info.id);
let leader_id = leader.info.id;
2018-10-08 19:55:54 -07:00
let cref2 = Arc::new(RwLock::new(cluster_info2));
let dr_2 = new_gossip(cref2, target2.sockets.gossip, exit.clone());
// setup some blob services to send blobs into the socket
// to simulate the source peer and get blobs out of the socket to
// simulate target peer
let (s_reader, r_reader) = channel();
let blob_sockets: Vec<Arc<UdpSocket>> =
target2.sockets.tvu.into_iter().map(Arc::new).collect();
let t_receiver = streamer::blob_receiver(blob_sockets[0].clone(), exit.clone(), s_reader);
// simulate leader sending messages
let (s_responder, r_responder) = channel();
let t_responder = streamer::responder(
"test_replay",
2018-11-05 09:50:58 -08:00
Arc::new(leader.sockets.retransmit),
r_responder,
);
let starting_balance = 10_000;
2019-01-24 12:04:04 -08:00
let (genesis_block, mint_keypair) = GenesisBlock::new(starting_balance);
let tvu_addr = target1.info.tvu;
let bank = Arc::new(Bank::new(&genesis_block));
2018-10-08 19:55:54 -07:00
//start cluster_info1
let mut cluster_info1 = ClusterInfo::new(target1.info.clone());
cluster_info1.insert_info(leader.info.clone());
2018-10-08 19:55:54 -07:00
cluster_info1.set_leader(leader.info.id);
let cref1 = Arc::new(RwLock::new(cluster_info1));
let dr_1 = new_gossip(cref1.clone(), target1.sockets.gossip, exit.clone());
let mut cur_hash = Hash::default();
2019-02-07 20:52:39 -08:00
let blocktree_path = get_tmp_ledger_path("test_replay");
let (blocktree, l_sender, l_receiver) = Blocktree::open_with_signal(&blocktree_path)
.expect("Expected to successfully open ledger");
let vote_account_keypair = Arc::new(Keypair::new());
let voting_keypair = VotingKeypair::new_local(&vote_account_keypair);
let (sender, _) = channel();
let (tvu, _) = Tvu::new(
Some(Arc::new(voting_keypair)),
&bank,
2018-06-27 12:35:58 -07:00
0,
0,
cur_hash,
&cref1,
{
Sockets {
repair: target1.sockets.repair,
retransmit: target1.sockets.retransmit,
fetch: target1.sockets.tvu,
}
},
2019-02-07 20:52:39 -08:00
Arc::new(blocktree),
STORAGE_ROTATE_TEST_COUNT,
&sender,
&StorageState::default(),
None,
l_sender,
l_receiver,
2018-05-23 08:29:24 -07:00
);
let mut alice_ref_balance = starting_balance;
let mut msgs = Vec::new();
let mut blob_idx = 0;
let num_transfers = 10;
let transfer_amount = 501;
2018-08-09 07:56:04 -07:00
let bob_keypair = Keypair::new();
for i in 0..num_transfers {
let entry0 = Entry::new(&cur_hash, 0, i, vec![]);
cur_hash = entry0.id;
bank.register_tick(&cur_hash);
let entry_tick0 = Entry::new(&cur_hash, 0, i + 1, vec![]);
cur_hash = entry_tick0.id;
let tx0 = SystemTransaction::new_account(
2019-01-24 12:04:04 -08:00
&mint_keypair,
bob_keypair.pubkey(),
transfer_amount,
cur_hash,
0,
);
bank.register_tick(&cur_hash);
let entry_tick1 = Entry::new(&cur_hash, 0, i + 1, vec![]);
cur_hash = entry_tick1.id;
let entry1 = Entry::new(&cur_hash, 0, i + num_transfers, vec![tx0]);
bank.register_tick(&entry1.id);
let entry_tick2 = Entry::new(&entry1.id, 0, i + 1, vec![]);
cur_hash = entry_tick2.id;
alice_ref_balance -= transfer_amount;
for entry in vec![entry0, entry_tick0, entry_tick1, entry1, entry_tick2] {
let b = SharedBlob::default();
2018-06-25 17:16:32 -07:00
{
let mut w = b.write().unwrap();
w.set_index(blob_idx);
blob_idx += 1;
w.set_id(&leader_id);
2018-06-25 17:16:32 -07:00
let serialized_entry = serialize(&entry).unwrap();
w.data_mut()[..serialized_entry.len()].copy_from_slice(&serialized_entry);
w.set_size(serialized_entry.len());
w.meta.set_addr(&tvu_addr);
2018-06-25 17:16:32 -07:00
}
msgs.push(b);
}
}
// send the blobs into the socket
s_responder.send(msgs).expect("send");
drop(s_responder);
// receive retransmitted messages
let timer = Duration::new(1, 0);
while let Ok(_msg) = r_reader.recv_timeout(timer) {
trace!("got msg");
}
2019-01-24 12:04:04 -08:00
let alice_balance = bank.get_balance(&mint_keypair.pubkey());
assert_eq!(alice_balance, alice_ref_balance);
let bob_balance = bank.get_balance(&bob_keypair.pubkey());
assert_eq!(bob_balance, starting_balance - alice_ref_balance);
2018-07-09 13:53:18 -07:00
tvu.close().expect("close");
exit.store(true, Ordering::Relaxed);
dr_l.join().expect("join");
dr_2.join().expect("join");
dr_1.join().expect("join");
t_receiver.join().expect("join");
t_responder.join().expect("join");
2019-02-07 20:52:39 -08:00
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
let _ignored = remove_dir_all(&blocktree_path);
}
}