2018-05-22 14:26:28 -07:00
|
|
|
//! The `replicate_stage` replicates transactions broadcast by the leader.
|
|
|
|
|
2018-05-22 15:30:46 -07:00
|
|
|
use bank::Bank;
|
2018-10-08 19:55:54 -07:00
|
|
|
use cluster_info::ClusterInfo;
|
2018-07-16 18:33:50 -07:00
|
|
|
use counter::Counter;
|
2018-09-21 16:01:24 -07:00
|
|
|
use entry::EntryReceiver;
|
2018-10-10 16:49:41 -07:00
|
|
|
use leader_scheduler::LeaderScheduler;
|
2018-09-21 16:01:24 -07:00
|
|
|
use ledger::{Block, LedgerWriter};
|
2018-08-06 11:35:45 -07:00
|
|
|
use log::Level;
|
2018-07-05 15:29:49 -07:00
|
|
|
use result::{Error, Result};
|
2018-07-03 21:14:08 -07:00
|
|
|
use service::Service;
|
2018-10-10 16:49:41 -07:00
|
|
|
use signature::{Keypair, KeypairUtil};
|
2018-07-05 12:01:40 -07:00
|
|
|
use std::net::UdpSocket;
|
2018-09-25 15:41:29 -07:00
|
|
|
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
2018-07-05 12:01:40 -07:00
|
|
|
use std::sync::mpsc::channel;
|
2018-07-05 15:29:49 -07:00
|
|
|
use std::sync::mpsc::RecvTimeoutError;
|
2018-07-05 12:01:40 -07:00
|
|
|
use std::sync::{Arc, RwLock};
|
2018-07-03 21:14:08 -07:00
|
|
|
use std::thread::{self, Builder, JoinHandle};
|
2018-05-22 15:30:46 -07:00
|
|
|
use std::time::Duration;
|
2018-09-26 20:58:06 -07:00
|
|
|
use std::time::Instant;
|
2018-09-10 08:32:52 -07:00
|
|
|
use streamer::{responder, BlobSender};
|
|
|
|
use vote_stage::send_validator_vote;
|
2018-05-22 14:26:28 -07:00
|
|
|
|
2018-10-10 16:49:41 -07:00
|
|
|
#[derive(Debug, PartialEq, Eq, Clone)]
|
|
|
|
pub enum ReplicateStageReturnType {
|
|
|
|
LeaderRotation(u64),
|
|
|
|
}
|
|
|
|
|
2018-09-25 15:41:29 -07:00
|
|
|
// Implement a destructor for the ReplicateStage thread to signal it exited
|
|
|
|
// even on panics
|
|
|
|
struct Finalizer {
|
|
|
|
exit_sender: Arc<AtomicBool>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Finalizer {
|
|
|
|
fn new(exit_sender: Arc<AtomicBool>) -> Self {
|
|
|
|
Finalizer { exit_sender }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Implement a destructor for Finalizer.
|
|
|
|
impl Drop for Finalizer {
|
|
|
|
fn drop(&mut self) {
|
|
|
|
self.exit_sender.clone().store(true, Ordering::Relaxed);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-22 14:26:28 -07:00
|
|
|
pub struct ReplicateStage {
|
2018-10-10 16:49:41 -07:00
|
|
|
t_responder: JoinHandle<()>,
|
|
|
|
t_replicate: JoinHandle<Option<ReplicateStageReturnType>>,
|
2018-05-22 14:26:28 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
impl ReplicateStage {
|
2018-06-15 14:27:06 -07:00
|
|
|
/// Process entry blobs, already in order
|
2018-07-05 12:01:40 -07:00
|
|
|
fn replicate_requests(
|
|
|
|
bank: &Arc<Bank>,
|
2018-10-08 19:55:54 -07:00
|
|
|
cluster_info: &Arc<RwLock<ClusterInfo>>,
|
2018-09-21 16:01:24 -07:00
|
|
|
window_receiver: &EntryReceiver,
|
2018-08-05 22:04:27 -07:00
|
|
|
ledger_writer: Option<&mut LedgerWriter>,
|
2018-09-10 08:32:52 -07:00
|
|
|
keypair: &Arc<Keypair>,
|
2018-09-26 20:58:06 -07:00
|
|
|
vote_blob_sender: Option<&BlobSender>,
|
2018-10-10 16:49:41 -07:00
|
|
|
entry_height: &mut u64,
|
|
|
|
leader_scheduler: &Arc<RwLock<LeaderScheduler>>,
|
2018-07-05 12:01:40 -07:00
|
|
|
) -> Result<()> {
|
2018-05-22 15:30:46 -07:00
|
|
|
let timer = Duration::new(1, 0);
|
2018-09-21 16:01:24 -07:00
|
|
|
//coalesce all the available entries into a single vote
|
|
|
|
let mut entries = window_receiver.recv_timeout(timer)?;
|
2018-07-05 12:01:40 -07:00
|
|
|
while let Ok(mut more) = window_receiver.try_recv() {
|
2018-09-21 16:01:24 -07:00
|
|
|
entries.append(&mut more);
|
2018-07-05 12:01:40 -07:00
|
|
|
}
|
2018-08-06 00:59:42 -07:00
|
|
|
|
2018-10-10 16:49:41 -07:00
|
|
|
let mut res = Ok(());
|
|
|
|
{
|
|
|
|
let mut num_entries_to_write = entries.len();
|
|
|
|
for (i, entry) in entries.iter().enumerate() {
|
|
|
|
res = bank.process_entry(&entry);
|
|
|
|
Bank::process_entry_votes(
|
|
|
|
&bank,
|
|
|
|
&entry,
|
|
|
|
*entry_height + i as u64 + 1,
|
|
|
|
&mut *leader_scheduler.write().unwrap(),
|
|
|
|
);
|
|
|
|
|
|
|
|
{
|
|
|
|
let ls_lock = leader_scheduler.read().unwrap();
|
|
|
|
if ls_lock.is_leader_rotation_height(
|
|
|
|
// i is zero indexed, so current entry height for this entry is actually the
|
|
|
|
// old entry height + i + 1
|
|
|
|
*entry_height + i as u64 + 1,
|
|
|
|
) {
|
|
|
|
let my_id = keypair.pubkey();
|
|
|
|
let scheduled_leader =
|
|
|
|
ls_lock.get_scheduled_leader(*entry_height + i as u64 + 1).expect("Scheduled leader id should never be unknown while processing entries");
|
|
|
|
cluster_info.write().unwrap().set_leader(scheduled_leader);
|
|
|
|
if my_id == scheduled_leader {
|
|
|
|
num_entries_to_write = i + 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if res.is_err() {
|
|
|
|
// TODO: This will return early from the first entry that has an erroneous
|
|
|
|
// transaction, instad of processing the rest of the entries in the vector
|
|
|
|
// of received entries. This is in line with previous behavior when
|
|
|
|
// bank.process_entries() was used to process the entries, but doesn't solve the
|
|
|
|
// issue that the bank state was still changed, leading to inconsistencies with the
|
|
|
|
// leader as the leader currently should not be publishing erroneous transactions
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If leader rotation happened, only write the entries up to leader rotation.
|
|
|
|
entries.truncate(num_entries_to_write);
|
|
|
|
}
|
2018-09-26 20:58:06 -07:00
|
|
|
|
|
|
|
if let Some(sender) = vote_blob_sender {
|
2018-10-08 19:55:54 -07:00
|
|
|
send_validator_vote(bank, keypair, cluster_info, sender)?;
|
2018-09-26 20:58:06 -07:00
|
|
|
}
|
2018-09-10 08:32:52 -07:00
|
|
|
|
2018-10-10 16:49:41 -07:00
|
|
|
cluster_info.write().unwrap().insert_votes(&entries.votes());
|
2018-08-06 00:59:42 -07:00
|
|
|
|
2018-08-06 11:35:45 -07:00
|
|
|
inc_new_counter_info!(
|
2018-07-16 18:33:50 -07:00
|
|
|
"replicate-transactions",
|
|
|
|
entries.iter().map(|x| x.transactions.len()).sum()
|
|
|
|
);
|
2018-08-06 00:59:42 -07:00
|
|
|
|
2018-10-10 16:49:41 -07:00
|
|
|
let entries_len = entries.len() as u64;
|
2018-08-06 00:59:42 -07:00
|
|
|
// TODO: move this to another stage?
|
2018-10-10 16:49:41 -07:00
|
|
|
// TODO: In line with previous behavior, this will write all the entries even if
|
|
|
|
// an error occurred processing one of the entries (causing the rest of the entries to
|
|
|
|
// not be processed).
|
2018-08-05 22:04:27 -07:00
|
|
|
if let Some(ledger_writer) = ledger_writer {
|
2018-08-06 00:59:42 -07:00
|
|
|
ledger_writer.write_entries(entries)?;
|
2018-08-05 22:04:27 -07:00
|
|
|
}
|
2018-08-03 11:06:06 -07:00
|
|
|
|
2018-10-10 16:49:41 -07:00
|
|
|
*entry_height += entries_len;
|
2018-09-03 02:48:11 -07:00
|
|
|
res?;
|
2018-05-22 15:30:46 -07:00
|
|
|
Ok(())
|
|
|
|
}
|
2018-09-25 15:41:29 -07:00
|
|
|
|
2018-07-05 12:01:40 -07:00
|
|
|
pub fn new(
|
2018-09-14 01:53:18 -07:00
|
|
|
keypair: Arc<Keypair>,
|
2018-07-05 12:01:40 -07:00
|
|
|
bank: Arc<Bank>,
|
2018-10-08 19:55:54 -07:00
|
|
|
cluster_info: Arc<RwLock<ClusterInfo>>,
|
2018-09-21 16:01:24 -07:00
|
|
|
window_receiver: EntryReceiver,
|
2018-08-05 22:04:27 -07:00
|
|
|
ledger_path: Option<&str>,
|
2018-09-25 15:41:29 -07:00
|
|
|
exit: Arc<AtomicBool>,
|
2018-10-10 16:49:41 -07:00
|
|
|
entry_height: u64,
|
|
|
|
leader_scheduler: Arc<RwLock<LeaderScheduler>>,
|
2018-07-05 12:01:40 -07:00
|
|
|
) -> Self {
|
|
|
|
let (vote_blob_sender, vote_blob_receiver) = channel();
|
|
|
|
let send = UdpSocket::bind("0.0.0.0:0").expect("bind");
|
2018-09-18 08:02:57 -07:00
|
|
|
let t_responder = responder("replicate_stage", Arc::new(send), vote_blob_receiver);
|
2018-07-19 21:27:35 -07:00
|
|
|
|
2018-08-10 17:56:08 -07:00
|
|
|
let mut ledger_writer = ledger_path.map(|p| LedgerWriter::open(p, false).unwrap());
|
2018-09-10 08:32:52 -07:00
|
|
|
let keypair = Arc::new(keypair);
|
2018-05-22 15:30:46 -07:00
|
|
|
|
2018-07-05 12:01:40 -07:00
|
|
|
let t_replicate = Builder::new()
|
2018-05-30 13:38:15 -07:00
|
|
|
.name("solana-replicate-stage".to_string())
|
2018-09-25 15:41:29 -07:00
|
|
|
.spawn(move || {
|
2018-09-24 21:31:20 -07:00
|
|
|
let _exit = Finalizer::new(exit);
|
2018-09-26 20:58:06 -07:00
|
|
|
let now = Instant::now();
|
|
|
|
let mut next_vote_secs = 1;
|
2018-10-10 16:49:41 -07:00
|
|
|
let mut entry_height_ = entry_height;
|
2018-09-25 15:41:29 -07:00
|
|
|
loop {
|
2018-10-10 16:49:41 -07:00
|
|
|
let leader_id = leader_scheduler
|
|
|
|
.read()
|
|
|
|
.unwrap()
|
|
|
|
.get_scheduled_leader(entry_height_)
|
|
|
|
.expect("Scheduled leader id should never be unknown at this point");
|
|
|
|
if leader_id == keypair.pubkey() {
|
|
|
|
return Some(ReplicateStageReturnType::LeaderRotation(entry_height_));
|
|
|
|
}
|
|
|
|
|
2018-09-26 20:58:06 -07:00
|
|
|
// Only vote once a second.
|
|
|
|
let vote_sender = if now.elapsed().as_secs() > next_vote_secs {
|
|
|
|
next_vote_secs += 1;
|
|
|
|
Some(&vote_blob_sender)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
};
|
|
|
|
|
2018-09-25 15:41:29 -07:00
|
|
|
if let Err(e) = Self::replicate_requests(
|
|
|
|
&bank,
|
2018-10-08 19:55:54 -07:00
|
|
|
&cluster_info,
|
2018-09-25 15:41:29 -07:00
|
|
|
&window_receiver,
|
|
|
|
ledger_writer.as_mut(),
|
|
|
|
&keypair,
|
2018-09-26 20:58:06 -07:00
|
|
|
vote_sender,
|
2018-10-10 16:49:41 -07:00
|
|
|
&mut entry_height_,
|
|
|
|
&leader_scheduler,
|
2018-09-25 15:41:29 -07:00
|
|
|
) {
|
|
|
|
match e {
|
|
|
|
Error::RecvTimeoutError(RecvTimeoutError::Disconnected) => break,
|
|
|
|
Error::RecvTimeoutError(RecvTimeoutError::Timeout) => (),
|
|
|
|
_ => error!("{:?}", e),
|
|
|
|
}
|
2018-07-05 15:29:49 -07:00
|
|
|
}
|
2018-05-30 13:38:15 -07:00
|
|
|
}
|
2018-07-19 21:27:35 -07:00
|
|
|
|
2018-10-10 16:49:41 -07:00
|
|
|
None
|
|
|
|
}).unwrap();
|
2018-07-19 21:27:35 -07:00
|
|
|
|
2018-10-10 16:49:41 -07:00
|
|
|
ReplicateStage {
|
|
|
|
t_responder,
|
|
|
|
t_replicate,
|
|
|
|
}
|
2018-05-22 14:26:28 -07:00
|
|
|
}
|
|
|
|
}
|
2018-07-03 21:14:08 -07:00
|
|
|
|
|
|
|
impl Service for ReplicateStage {
|
2018-10-10 16:49:41 -07:00
|
|
|
type JoinReturnType = Option<ReplicateStageReturnType>;
|
|
|
|
|
|
|
|
fn join(self) -> thread::Result<Option<ReplicateStageReturnType>> {
|
|
|
|
self.t_responder.join()?;
|
|
|
|
self.t_replicate.join()
|
|
|
|
}
|
|
|
|
}
|
2018-09-13 14:00:17 -07:00
|
|
|
|
2018-10-10 16:49:41 -07:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use cluster_info::{ClusterInfo, Node};
|
|
|
|
use fullnode::Fullnode;
|
|
|
|
use leader_scheduler::{make_active_set_entries, LeaderScheduler, LeaderSchedulerConfig};
|
|
|
|
use ledger::{genesis, next_entries_mut, LedgerWriter};
|
|
|
|
use logger;
|
|
|
|
use replicate_stage::{ReplicateStage, ReplicateStageReturnType};
|
|
|
|
use service::Service;
|
|
|
|
use signature::{Keypair, KeypairUtil};
|
|
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
|
|
|
use std::sync::mpsc::channel;
|
|
|
|
use std::sync::{Arc, RwLock};
|
|
|
|
|
|
|
|
#[test]
|
2018-10-11 15:17:46 -07:00
|
|
|
#[ignore]
|
2018-10-10 16:49:41 -07:00
|
|
|
pub fn test_replicate_stage_leader_rotation_exit() {
|
|
|
|
logger::setup();
|
|
|
|
|
|
|
|
// Set up dummy node to host a ReplicateStage
|
|
|
|
let my_keypair = Keypair::new();
|
|
|
|
let my_id = my_keypair.pubkey();
|
|
|
|
let my_node = Node::new_localhost_with_pubkey(my_id);
|
|
|
|
let cluster_info_me = ClusterInfo::new(my_node.info.clone()).expect("ClusterInfo::new");
|
|
|
|
|
|
|
|
// Create a ledger
|
|
|
|
let (mint, my_ledger_path) = genesis("test_replicate_stage_leader_rotation_exit", 10_000);
|
|
|
|
let genesis_entries = mint.create_entries();
|
|
|
|
let mut last_id = genesis_entries
|
|
|
|
.last()
|
|
|
|
.expect("expected at least one genesis entry")
|
|
|
|
.id;
|
|
|
|
|
|
|
|
// Write two entries to the ledger so that the validator is in the active set:
|
|
|
|
// 1) Give the validator a nonzero number of tokens 2) A vote from the validator .
|
|
|
|
// This will cause leader rotation after the bootstrap height
|
|
|
|
let mut ledger_writer = LedgerWriter::open(&my_ledger_path, false).unwrap();
|
|
|
|
let bootstrap_entries = make_active_set_entries(&my_keypair, &mint.keypair(), &last_id);
|
|
|
|
last_id = bootstrap_entries.last().unwrap().id;
|
|
|
|
let ledger_initial_len = (genesis_entries.len() + bootstrap_entries.len()) as u64;
|
|
|
|
ledger_writer.write_entries(bootstrap_entries).unwrap();
|
|
|
|
|
|
|
|
// Set up the LeaderScheduler so that this this node becomes the leader at
|
|
|
|
// bootstrap_height = num_bootstrap_slots * leader_rotation_interval
|
|
|
|
let old_leader_id = Keypair::new().pubkey();
|
|
|
|
let leader_rotation_interval = 10;
|
|
|
|
let num_bootstrap_slots = 2;
|
|
|
|
let bootstrap_height = num_bootstrap_slots * leader_rotation_interval;
|
|
|
|
let leader_scheduler_config = LeaderSchedulerConfig::new(
|
|
|
|
old_leader_id,
|
|
|
|
Some(bootstrap_height),
|
|
|
|
Some(leader_rotation_interval),
|
|
|
|
Some(leader_rotation_interval * 2),
|
|
|
|
Some(bootstrap_height),
|
|
|
|
);
|
|
|
|
|
|
|
|
let mut leader_scheduler = LeaderScheduler::new(&leader_scheduler_config);
|
|
|
|
|
|
|
|
// Set up the bank
|
|
|
|
let (bank, _, _) = Fullnode::new_bank_from_ledger(&my_ledger_path, &mut leader_scheduler);
|
|
|
|
|
|
|
|
let leader_scheduler = Arc::new(RwLock::new(leader_scheduler));
|
|
|
|
|
|
|
|
// Set up the replicate stage
|
|
|
|
let (entry_sender, entry_receiver) = channel();
|
|
|
|
let exit = Arc::new(AtomicBool::new(false));
|
|
|
|
let replicate_stage = ReplicateStage::new(
|
|
|
|
Arc::new(my_keypair),
|
|
|
|
Arc::new(bank),
|
|
|
|
Arc::new(RwLock::new(cluster_info_me)),
|
|
|
|
entry_receiver,
|
|
|
|
Some(&my_ledger_path),
|
|
|
|
exit.clone(),
|
|
|
|
ledger_initial_len,
|
|
|
|
leader_scheduler.clone(),
|
|
|
|
);
|
|
|
|
|
|
|
|
// Send enough entries to trigger leader rotation
|
|
|
|
let extra_entries = leader_rotation_interval;
|
|
|
|
let total_entries_to_send = (bootstrap_height + extra_entries) as usize;
|
|
|
|
let mut num_hashes = 0;
|
|
|
|
let mut entries_to_send = vec![];
|
|
|
|
|
|
|
|
while entries_to_send.len() < total_entries_to_send {
|
|
|
|
let entries = next_entries_mut(&mut last_id, &mut num_hashes, vec![]);
|
|
|
|
last_id = entries.last().expect("expected at least one entry").id;
|
|
|
|
entries_to_send.extend(entries);
|
2018-07-05 12:01:40 -07:00
|
|
|
}
|
2018-10-10 16:49:41 -07:00
|
|
|
|
|
|
|
entries_to_send.truncate(total_entries_to_send);
|
|
|
|
entry_sender.send(entries_to_send).unwrap();
|
|
|
|
|
|
|
|
// Wait for replicate_stage to exit and check return value is correct
|
|
|
|
assert_eq!(
|
|
|
|
Some(ReplicateStageReturnType::LeaderRotation(bootstrap_height)),
|
|
|
|
replicate_stage.join().expect("replicate stage join")
|
|
|
|
);
|
|
|
|
|
|
|
|
assert_eq!(exit.load(Ordering::Relaxed), true);
|
|
|
|
|
|
|
|
//Check ledger height is correct
|
|
|
|
let mut leader_scheduler = Arc::try_unwrap(leader_scheduler)
|
|
|
|
.expect("Multiple references to this RwLock still exist")
|
|
|
|
.into_inner()
|
|
|
|
.expect("RwLock for LeaderScheduler is still locked");
|
|
|
|
|
|
|
|
let (_, entry_height, _) =
|
|
|
|
Fullnode::new_bank_from_ledger(&my_ledger_path, &mut leader_scheduler);
|
|
|
|
|
|
|
|
assert_eq!(entry_height, bootstrap_height);
|
2018-07-03 21:14:08 -07:00
|
|
|
}
|
|
|
|
}
|