verify that blobs match a known leader for the slot (#3927)
* validate that blobs match a known leader for the slot * clippy
This commit is contained in:
parent
b27b515186
commit
4b04c37c36
|
@ -21,7 +21,6 @@ use solana_client::rpc_client::RpcClient;
|
||||||
use solana_client::rpc_request::RpcRequest;
|
use solana_client::rpc_request::RpcRequest;
|
||||||
use solana_client::thin_client::{create_client, ThinClient};
|
use solana_client::thin_client::{create_client, ThinClient};
|
||||||
use solana_sdk::client::{AsyncClient, SyncClient};
|
use solana_sdk::client::{AsyncClient, SyncClient};
|
||||||
|
|
||||||
use solana_sdk::hash::{Hash, Hasher};
|
use solana_sdk::hash::{Hash, Hasher};
|
||||||
use solana_sdk::signature::{Keypair, KeypairUtil, Signature};
|
use solana_sdk::signature::{Keypair, KeypairUtil, Signature};
|
||||||
use solana_sdk::system_transaction;
|
use solana_sdk::system_transaction;
|
||||||
|
@ -235,6 +234,7 @@ impl Replicator {
|
||||||
let (retransmit_sender, retransmit_receiver) = channel();
|
let (retransmit_sender, retransmit_receiver) = channel();
|
||||||
|
|
||||||
let window_service = WindowService::new(
|
let window_service = WindowService::new(
|
||||||
|
None, //TODO: need a way to validate blobs... https://github.com/solana-labs/solana/issues/3924
|
||||||
blocktree.clone(),
|
blocktree.clone(),
|
||||||
cluster_info.clone(),
|
cluster_info.clone(),
|
||||||
blob_fetch_receiver,
|
blob_fetch_receiver,
|
||||||
|
|
|
@ -71,6 +71,7 @@ fn retransmitter(
|
||||||
cluster_info: Arc<RwLock<ClusterInfo>>,
|
cluster_info: Arc<RwLock<ClusterInfo>>,
|
||||||
r: BlobReceiver,
|
r: BlobReceiver,
|
||||||
) -> JoinHandle<()> {
|
) -> JoinHandle<()> {
|
||||||
|
let bank_forks = bank_forks.clone();
|
||||||
Builder::new()
|
Builder::new()
|
||||||
.name("solana-retransmitter".to_string())
|
.name("solana-retransmitter".to_string())
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
|
@ -99,7 +100,7 @@ pub struct RetransmitStage {
|
||||||
impl RetransmitStage {
|
impl RetransmitStage {
|
||||||
#[allow(clippy::new_ret_no_self)]
|
#[allow(clippy::new_ret_no_self)]
|
||||||
pub fn new(
|
pub fn new(
|
||||||
bank_forks: &Arc<RwLock<BankForks>>,
|
bank_forks: Arc<RwLock<BankForks>>,
|
||||||
blocktree: Arc<Blocktree>,
|
blocktree: Arc<Blocktree>,
|
||||||
cluster_info: &Arc<RwLock<ClusterInfo>>,
|
cluster_info: &Arc<RwLock<ClusterInfo>>,
|
||||||
retransmit_socket: Arc<UdpSocket>,
|
retransmit_socket: Arc<UdpSocket>,
|
||||||
|
@ -116,6 +117,7 @@ impl RetransmitStage {
|
||||||
retransmit_receiver,
|
retransmit_receiver,
|
||||||
);
|
);
|
||||||
let window_service = WindowService::new(
|
let window_service = WindowService::new(
|
||||||
|
Some(bank_forks),
|
||||||
blocktree,
|
blocktree,
|
||||||
cluster_info.clone(),
|
cluster_info.clone(),
|
||||||
fetch_stage_receiver,
|
fetch_stage_receiver,
|
||||||
|
|
|
@ -102,7 +102,7 @@ impl Tvu {
|
||||||
//the packets coming out of blob_receiver need to be sent to the GPU and verified
|
//the packets coming out of blob_receiver need to be sent to the GPU and verified
|
||||||
//then sent to the window, which does the erasure coding reconstruction
|
//then sent to the window, which does the erasure coding reconstruction
|
||||||
let retransmit_stage = RetransmitStage::new(
|
let retransmit_stage = RetransmitStage::new(
|
||||||
&bank_forks,
|
bank_forks.clone(),
|
||||||
blocktree.clone(),
|
blocktree.clone(),
|
||||||
&cluster_info,
|
&cluster_info,
|
||||||
Arc::new(retransmit_socket),
|
Arc::new(retransmit_socket),
|
||||||
|
|
|
@ -1,15 +1,17 @@
|
||||||
//! `window_service` handles the data plane incoming blobs, storing them in
|
//! `window_service` handles the data plane incoming blobs, storing them in
|
||||||
//! blocktree and retransmitting where required
|
//! blocktree and retransmitting where required
|
||||||
//!
|
//!
|
||||||
|
use crate::bank_forks::BankForks;
|
||||||
use crate::blocktree::Blocktree;
|
use crate::blocktree::Blocktree;
|
||||||
use crate::cluster_info::ClusterInfo;
|
use crate::cluster_info::ClusterInfo;
|
||||||
use crate::packet::{SharedBlob, BLOB_HEADER_SIZE};
|
use crate::leader_schedule_utils::slot_leader_at;
|
||||||
|
use crate::packet::{Blob, SharedBlob, BLOB_HEADER_SIZE};
|
||||||
use crate::repair_service::{RepairService, RepairSlotRange};
|
use crate::repair_service::{RepairService, RepairSlotRange};
|
||||||
use crate::result::{Error, Result};
|
use crate::result::{Error, Result};
|
||||||
use crate::service::Service;
|
use crate::service::Service;
|
||||||
use crate::streamer::{BlobReceiver, BlobSender};
|
use crate::streamer::{BlobReceiver, BlobSender};
|
||||||
use solana_metrics::counter::Counter;
|
use solana_metrics::counter::Counter;
|
||||||
use solana_metrics::{influxdb, submit};
|
use solana_runtime::bank::Bank;
|
||||||
use solana_sdk::pubkey::Pubkey;
|
use solana_sdk::pubkey::Pubkey;
|
||||||
use solana_sdk::timing::duration_as_ms;
|
use solana_sdk::timing::duration_as_ms;
|
||||||
use std::net::UdpSocket;
|
use std::net::UdpSocket;
|
||||||
|
@ -67,33 +69,56 @@ fn process_blobs(blobs: &[SharedBlob], blocktree: &Arc<Blocktree>) -> Result<()>
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// drop blobs that are from myself or not from the correct leader for the
|
||||||
|
/// blob's slot
|
||||||
|
fn should_retransmit_and_persist(blob: &Blob, bank: Option<&Arc<Bank>>, my_id: &Pubkey) -> bool {
|
||||||
|
let slot_leader_id = bank.and_then(|bank| slot_leader_at(blob.slot(), &bank));
|
||||||
|
|
||||||
|
if blob.id() == *my_id {
|
||||||
|
inc_new_counter_info!("streamer-recv_window-circular_transmission", 1);
|
||||||
|
false
|
||||||
|
} else if slot_leader_id == None {
|
||||||
|
inc_new_counter_info!("streamer-recv_window-unknown_leader", 1);
|
||||||
|
true
|
||||||
|
} else if slot_leader_id != Some(blob.id()) {
|
||||||
|
inc_new_counter_info!("streamer-recv_window-wrong_leader", 1);
|
||||||
|
false
|
||||||
|
} else {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn recv_window(
|
fn recv_window(
|
||||||
|
bank_forks: Option<&Arc<RwLock<BankForks>>>,
|
||||||
blocktree: &Arc<Blocktree>,
|
blocktree: &Arc<Blocktree>,
|
||||||
id: &Pubkey,
|
my_id: &Pubkey,
|
||||||
r: &BlobReceiver,
|
r: &BlobReceiver,
|
||||||
retransmit: &BlobSender,
|
retransmit: &BlobSender,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let timer = Duration::from_millis(200);
|
let timer = Duration::from_millis(200);
|
||||||
let mut dq = r.recv_timeout(timer)?;
|
let mut blobs = r.recv_timeout(timer)?;
|
||||||
|
|
||||||
while let Ok(mut nq) = r.try_recv() {
|
while let Ok(mut blob) = r.try_recv() {
|
||||||
dq.append(&mut nq)
|
blobs.append(&mut blob)
|
||||||
}
|
}
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
inc_new_counter_info!("streamer-recv_window-recv", dq.len(), 100);
|
inc_new_counter_info!("streamer-recv_window-recv", blobs.len());
|
||||||
|
|
||||||
submit(
|
blobs.retain(|blob| {
|
||||||
influxdb::Point::new("recv-window")
|
should_retransmit_and_persist(
|
||||||
.add_field("count", influxdb::Value::Integer(dq.len() as i64))
|
&blob.read().unwrap(),
|
||||||
.to_owned(),
|
bank_forks
|
||||||
);
|
.map(|bank_forks| bank_forks.read().unwrap().working_bank())
|
||||||
|
.as_ref(),
|
||||||
|
my_id,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
retransmit_blobs(&dq, retransmit, id)?;
|
retransmit_blobs(&blobs, retransmit, my_id)?;
|
||||||
|
|
||||||
//send a contiguous set of blocks
|
trace!("{} num blobs received: {}", my_id, blobs.len());
|
||||||
trace!("{} num blobs received: {}", id, dq.len());
|
|
||||||
|
|
||||||
process_blobs(&dq, blocktree)?;
|
process_blobs(&blobs, blocktree)?;
|
||||||
|
|
||||||
trace!(
|
trace!(
|
||||||
"Elapsed processing time in recv_window(): {}",
|
"Elapsed processing time in recv_window(): {}",
|
||||||
|
@ -128,6 +153,7 @@ pub struct WindowService {
|
||||||
|
|
||||||
impl WindowService {
|
impl WindowService {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
|
bank_forks: Option<Arc<RwLock<BankForks>>>,
|
||||||
blocktree: Arc<Blocktree>,
|
blocktree: Arc<Blocktree>,
|
||||||
cluster_info: Arc<RwLock<ClusterInfo>>,
|
cluster_info: Arc<RwLock<ClusterInfo>>,
|
||||||
r: BlobReceiver,
|
r: BlobReceiver,
|
||||||
|
@ -144,6 +170,7 @@ impl WindowService {
|
||||||
repair_slot_range,
|
repair_slot_range,
|
||||||
);
|
);
|
||||||
let exit = exit.clone();
|
let exit = exit.clone();
|
||||||
|
let bank_forks = bank_forks.clone();
|
||||||
let t_window = Builder::new()
|
let t_window = Builder::new()
|
||||||
.name("solana-window".to_string())
|
.name("solana-window".to_string())
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
|
@ -154,7 +181,9 @@ impl WindowService {
|
||||||
if exit.load(Ordering::Relaxed) {
|
if exit.load(Ordering::Relaxed) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if let Err(e) = recv_window(&blocktree, &id, &r, &retransmit) {
|
if let Err(e) =
|
||||||
|
recv_window(bank_forks.as_ref(), &blocktree, &id, &r, &retransmit)
|
||||||
|
{
|
||||||
match e {
|
match e {
|
||||||
Error::RecvTimeoutError(RecvTimeoutError::Disconnected) => break,
|
Error::RecvTimeoutError(RecvTimeoutError::Disconnected) => break,
|
||||||
Error::RecvTimeoutError(RecvTimeoutError::Timeout) => (),
|
Error::RecvTimeoutError(RecvTimeoutError::Timeout) => (),
|
||||||
|
@ -187,12 +216,15 @@ impl Service for WindowService {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::bank_forks::BankForks;
|
||||||
use crate::blocktree::{get_tmp_ledger_path, Blocktree};
|
use crate::blocktree::{get_tmp_ledger_path, Blocktree};
|
||||||
use crate::cluster_info::{ClusterInfo, Node};
|
use crate::cluster_info::{ClusterInfo, Node};
|
||||||
use crate::entry::{make_consecutive_blobs, make_tiny_test_entries, EntrySlice};
|
use crate::entry::{make_consecutive_blobs, make_tiny_test_entries, EntrySlice};
|
||||||
use crate::packet::index_blobs;
|
use crate::packet::{index_blobs, Blob};
|
||||||
use crate::service::Service;
|
use crate::service::Service;
|
||||||
use crate::streamer::{blob_receiver, responder};
|
use crate::streamer::{blob_receiver, responder};
|
||||||
|
use solana_runtime::bank::Bank;
|
||||||
|
use solana_sdk::genesis_block::GenesisBlock;
|
||||||
use solana_sdk::hash::Hash;
|
use solana_sdk::hash::Hash;
|
||||||
use std::fs::remove_dir_all;
|
use std::fs::remove_dir_all;
|
||||||
use std::net::UdpSocket;
|
use std::net::UdpSocket;
|
||||||
|
@ -224,6 +256,46 @@ mod test {
|
||||||
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
|
Blocktree::destroy(&blocktree_path).expect("Expected successful database destruction");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_should_retransmit_and_persist() {
|
||||||
|
let me_id = Pubkey::new_rand();
|
||||||
|
let leader_id = Pubkey::new_rand();
|
||||||
|
let bank = Arc::new(Bank::new(
|
||||||
|
&GenesisBlock::new_with_leader(100, &leader_id, 10).0,
|
||||||
|
));
|
||||||
|
|
||||||
|
let mut blob = Blob::default();
|
||||||
|
blob.set_id(&leader_id);
|
||||||
|
|
||||||
|
// without a Bank and blobs not from me, blob continues
|
||||||
|
assert_eq!(should_retransmit_and_persist(&blob, None, &me_id), true);
|
||||||
|
|
||||||
|
// with a Bank for slot 0, blob continues
|
||||||
|
assert_eq!(
|
||||||
|
should_retransmit_and_persist(&blob, Some(&bank), &me_id),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
// set the blob to have come from the wrong leader
|
||||||
|
blob.set_id(&Pubkey::new_rand());
|
||||||
|
assert_eq!(
|
||||||
|
should_retransmit_and_persist(&blob, Some(&bank), &me_id),
|
||||||
|
false
|
||||||
|
);
|
||||||
|
|
||||||
|
// with a Bank and no idea who leader is, we keep the blobs (for now)
|
||||||
|
// TODO: persistr in blocktree that we didn't know who the leader was at the time?
|
||||||
|
blob.set_slot(100);
|
||||||
|
assert_eq!(
|
||||||
|
should_retransmit_and_persist(&blob, Some(&bank), &me_id),
|
||||||
|
true
|
||||||
|
);
|
||||||
|
|
||||||
|
// if the blob came back from me, it doesn't continue, whether or not I have a bank
|
||||||
|
blob.set_id(&me_id);
|
||||||
|
assert_eq!(should_retransmit_and_persist(&blob, None, &me_id), false);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
pub fn window_send_test() {
|
pub fn window_send_test() {
|
||||||
solana_logger::setup();
|
solana_logger::setup();
|
||||||
|
@ -244,6 +316,10 @@ mod test {
|
||||||
Blocktree::open(&blocktree_path).expect("Expected to be able to open database ledger"),
|
Blocktree::open(&blocktree_path).expect("Expected to be able to open database ledger"),
|
||||||
);
|
);
|
||||||
let t_window = WindowService::new(
|
let t_window = WindowService::new(
|
||||||
|
Some(Arc::new(RwLock::new(BankForks::new(
|
||||||
|
0,
|
||||||
|
Bank::new(&GenesisBlock::new_with_leader(100, &me_id, 10).0),
|
||||||
|
)))),
|
||||||
blocktree,
|
blocktree,
|
||||||
subs,
|
subs,
|
||||||
r_reader,
|
r_reader,
|
||||||
|
@ -316,6 +392,10 @@ mod test {
|
||||||
Blocktree::open(&blocktree_path).expect("Expected to be able to open database ledger"),
|
Blocktree::open(&blocktree_path).expect("Expected to be able to open database ledger"),
|
||||||
);
|
);
|
||||||
let t_window = WindowService::new(
|
let t_window = WindowService::new(
|
||||||
|
Some(Arc::new(RwLock::new(BankForks::new(
|
||||||
|
0,
|
||||||
|
Bank::new(&GenesisBlock::new_with_leader(100, &me_id, 10).0),
|
||||||
|
)))),
|
||||||
blocktree,
|
blocktree,
|
||||||
subs.clone(),
|
subs.clone(),
|
||||||
r_reader,
|
r_reader,
|
||||||
|
|
Loading…
Reference in New Issue