solana/src/tpu.rs

295 lines
8.3 KiB
Rust
Raw Normal View History

2018-05-14 16:36:19 -07:00
//! The `tpu` module implements the Transaction Processing Unit, a
//! multi-stage transaction processing pipeline in software.
2018-05-14 16:36:19 -07:00
use crate::banking_stage::{BankingStage, UnprocessedPackets};
use crate::blocktree::Blocktree;
use crate::broadcast_service::BroadcastService;
use crate::cluster_info::ClusterInfo;
use crate::cluster_info_vote_listener::ClusterInfoVoteListener;
2018-12-07 19:16:27 -08:00
use crate::fetch_stage::FetchStage;
use crate::poh_service::PohServiceConfig;
2018-12-07 19:16:27 -08:00
use crate::service::Service;
use crate::sigverify_stage::SigVerifyStage;
use crate::tpu_forwarder::TpuForwarder;
use solana_runtime::bank::Bank;
2018-11-16 08:04:46 -08:00
use solana_sdk::hash::Hash;
use solana_sdk::pubkey::Pubkey;
2018-05-14 16:36:19 -07:00
use std::net::UdpSocket;
use std::sync::atomic::{AtomicBool, Ordering};
2019-02-20 18:16:37 -08:00
use std::sync::mpsc::channel;
use std::sync::{Arc, RwLock};
use std::thread;
pub enum TpuMode {
Leader(LeaderServices),
Forwarder(ForwarderServices),
}
pub struct LeaderServices {
fetch_stage: FetchStage,
sigverify_stage: SigVerifyStage,
banking_stage: BankingStage,
cluster_info_vote_listener: ClusterInfoVoteListener,
broadcast_service: BroadcastService,
}
impl LeaderServices {
fn new(
fetch_stage: FetchStage,
sigverify_stage: SigVerifyStage,
banking_stage: BankingStage,
cluster_info_vote_listener: ClusterInfoVoteListener,
broadcast_service: BroadcastService,
) -> Self {
LeaderServices {
fetch_stage,
sigverify_stage,
banking_stage,
cluster_info_vote_listener,
broadcast_service,
}
}
fn exit(&self) {
self.fetch_stage.close();
}
fn join(self) -> thread::Result<()> {
let mut results = vec![];
results.push(self.fetch_stage.join());
results.push(self.sigverify_stage.join());
results.push(self.cluster_info_vote_listener.join());
results.push(self.banking_stage.join());
let broadcast_result = self.broadcast_service.join();
for result in results {
result?;
}
let _ = broadcast_result?;
Ok(())
}
fn close(self) -> thread::Result<()> {
self.exit();
self.join()
}
}
pub struct ForwarderServices {
tpu_forwarder: TpuForwarder,
}
impl ForwarderServices {
fn new(tpu_forwarder: TpuForwarder) -> Self {
ForwarderServices { tpu_forwarder }
}
fn exit(&self) {
self.tpu_forwarder.close();
}
fn join(self) -> thread::Result<()> {
self.tpu_forwarder.join()
}
fn close(self) -> thread::Result<()> {
self.exit();
self.join()
}
}
pub struct Tpu {
2019-02-07 21:13:39 -08:00
tpu_mode: Option<TpuMode>,
exit: Arc<AtomicBool>,
id: Pubkey,
2019-02-13 15:45:49 -08:00
cluster_info: Arc<RwLock<ClusterInfo>>,
2018-05-14 16:36:19 -07:00
}
impl Tpu {
pub fn new(id: Pubkey, cluster_info: &Arc<RwLock<ClusterInfo>>) -> Self {
Self {
2019-02-07 21:13:39 -08:00
tpu_mode: None,
exit: Arc::new(AtomicBool::new(false)),
id,
2019-02-13 15:45:49 -08:00
cluster_info: cluster_info.clone(),
}
}
fn mode_exit(&mut self) {
match &mut self.tpu_mode {
Some(TpuMode::Leader(svcs)) => {
svcs.exit();
}
Some(TpuMode::Forwarder(svcs)) => {
svcs.exit();
}
None => (),
}
}
fn mode_close(&mut self) {
let tpu_mode = self.tpu_mode.take();
if let Some(tpu_mode) = tpu_mode {
match tpu_mode {
TpuMode::Leader(svcs) => {
let _ = svcs.close();
}
TpuMode::Forwarder(svcs) => {
let _ = svcs.close();
}
}
}
2019-02-07 21:13:39 -08:00
}
fn forward_unprocessed_packets(
tpu: &std::net::SocketAddr,
unprocessed_packets: UnprocessedPackets,
) -> std::io::Result<()> {
let socket = UdpSocket::bind("0.0.0.0:0")?;
for (packets, start_index) in unprocessed_packets {
let packets = packets.read().unwrap();
for packet in packets.packets.iter().skip(start_index) {
socket.send_to(&packet.data[..packet.meta.size], tpu)?;
}
}
Ok(())
}
fn close_and_forward_unprocessed_packets(&mut self) {
self.mode_exit();
let unprocessed_packets = match self.tpu_mode.as_mut() {
Some(TpuMode::Leader(svcs)) => {
svcs.banking_stage.join_and_collect_unprocessed_packets()
}
Some(TpuMode::Forwarder(svcs)) => {
svcs.tpu_forwarder.join_and_collect_unprocessed_packets()
}
None => vec![],
};
if !unprocessed_packets.is_empty() {
let tpu = self.cluster_info.read().unwrap().leader_data().unwrap().tpu;
info!("forwarding unprocessed packets to new leader at {:?}", tpu);
Tpu::forward_unprocessed_packets(&tpu, unprocessed_packets).unwrap_or_else(|err| {
warn!("Failed to forward unprocessed transactions: {:?}", err)
});
}
self.mode_close();
}
pub fn switch_to_forwarder(&mut self, leader_id: Pubkey, transactions_sockets: Vec<UdpSocket>) {
self.close_and_forward_unprocessed_packets();
self.cluster_info.write().unwrap().set_leader(leader_id);
2019-02-13 15:45:49 -08:00
let tpu_forwarder = TpuForwarder::new(transactions_sockets, self.cluster_info.clone());
2019-02-07 21:13:39 -08:00
self.tpu_mode = Some(TpuMode::Forwarder(ForwarderServices::new(tpu_forwarder)));
}
#[allow(clippy::too_many_arguments)]
pub fn switch_to_leader(
&mut self,
2019-02-21 11:37:48 -08:00
bank: Arc<Bank>,
tick_duration: PohServiceConfig,
transactions_sockets: Vec<UdpSocket>,
broadcast_socket: UdpSocket,
sigverify_disabled: bool,
slot: u64,
2019-02-21 11:37:48 -08:00
last_entry_id: Hash,
blocktree: &Arc<Blocktree>,
) {
self.close_and_forward_unprocessed_packets();
2019-02-07 21:13:39 -08:00
self.cluster_info.write().unwrap().set_leader(self.id);
self.exit = Arc::new(AtomicBool::new(false));
let (packet_sender, packet_receiver) = channel();
let fetch_stage = FetchStage::new_with_sender(
transactions_sockets,
self.exit.clone(),
&packet_sender.clone(),
);
2019-02-13 15:45:49 -08:00
let cluster_info_vote_listener = ClusterInfoVoteListener::new(
self.exit.clone(),
self.cluster_info.clone(),
packet_sender,
);
2018-05-14 16:36:19 -07:00
let (sigverify_stage, verified_receiver) =
SigVerifyStage::new(packet_receiver, sigverify_disabled);
2018-05-14 16:36:19 -07:00
// TODO: Fix BankingStage/BroadcastService to operate on `slot` directly instead of
// `max_tick_height`
2019-02-22 19:50:59 -08:00
let max_tick_height = (slot + 1) * bank.ticks_per_slot() - 1;
let blob_index = blocktree
.meta(slot)
.expect("Database error")
.map(|meta| meta.consumed)
.unwrap_or(0);
let (banking_stage, entry_receiver) = BankingStage::new(
&bank,
verified_receiver,
tick_duration,
2019-02-21 11:37:48 -08:00
&last_entry_id,
max_tick_height,
self.id,
);
let broadcast_service = BroadcastService::new(
slot,
2019-02-21 11:37:48 -08:00
bank,
broadcast_socket,
2019-02-13 15:45:49 -08:00
self.cluster_info.clone(),
blob_index,
entry_receiver,
self.exit.clone(),
blocktree,
);
2018-05-14 16:36:19 -07:00
let svcs = LeaderServices::new(
fetch_stage,
sigverify_stage,
banking_stage,
cluster_info_vote_listener,
broadcast_service,
);
2019-02-07 21:13:39 -08:00
self.tpu_mode = Some(TpuMode::Leader(svcs));
}
pub fn is_leader(&self) -> Option<bool> {
match self.tpu_mode {
Some(TpuMode::Leader(_)) => Some(true),
Some(TpuMode::Forwarder(_)) => Some(false),
None => None,
}
}
pub fn exit(&self) {
self.exit.store(true, Ordering::Relaxed);
}
2018-07-09 13:53:18 -07:00
Leader scheduler plumbing (#1440) * Added LeaderScheduler module and tests * plumbing for LeaderScheduler in Fullnode + tests. Add vote processing for active set to ReplicateStage and WriteStage * Add LeaderScheduler plumbing for Tvu, window, and tests * Fix bank and switch tests to use new LeaderScheduler * move leader rotation check from window service to replicate stage * Add replicate_stage leader rotation exit test * removed leader scheduler from the window service and associated modules/tests * Corrected is_leader calculation in repair() function in window.rs * Integrate LeaderScheduler with write_stage for leader to validator transitions * Integrated LeaderScheduler with BroadcastStage * Removed gossip leader rotation from crdt * Add multi validator, leader test * Comments and cleanup * Remove unneeded checks from broadcast stage * Fix case where a validator/leader need to immediately transition on startup after reading ledger and seeing they are not in the correct role * Set new leader in validator -> validator transitions * Clean up for PR comments, refactor LeaderScheduler from process_entry/process_ledger_tail * Cleaned out LeaderScheduler options, implemented LeaderScheduler strategy that only picks the bootstrap leader to support existing tests, drone/airdrops * Ignore test_full_leader_validator_network test due to bug where the next leader in line fails to get the last entry before rotation (b/c it hasn't started up yet). Added a test test_dropped_handoff_recovery go track this bug
2018-10-10 16:49:41 -07:00
pub fn is_exited(&self) -> bool {
self.exit.load(Ordering::Relaxed)
}
pub fn close(mut self) -> thread::Result<()> {
self.mode_close();
2018-07-09 13:53:18 -07:00
self.join()
}
}
impl Service for Tpu {
2019-02-08 08:06:27 -08:00
type JoinReturnType = ();
2019-02-08 08:06:27 -08:00
fn join(self) -> thread::Result<()> {
match self.tpu_mode {
Some(TpuMode::Leader(svcs)) => svcs.join()?,
Some(TpuMode::Forwarder(svcs)) => svcs.join()?,
2019-02-08 08:06:27 -08:00
None => (),
}
2019-02-08 08:06:27 -08:00
Ok(())
2018-05-14 16:36:19 -07:00
}
}