Broadcast stage tuning (#5989)
This commit is contained in:
parent
6657312f44
commit
ca9d4e34df
|
@ -52,7 +52,7 @@ pub const NUM_THREADS: u32 = 4;
|
||||||
|
|
||||||
const TOTAL_BUFFERED_PACKETS: usize = 500_000;
|
const TOTAL_BUFFERED_PACKETS: usize = 500_000;
|
||||||
|
|
||||||
const MAX_NUM_TRANSACTIONS_PER_BATCH: usize = 512;
|
const MAX_NUM_TRANSACTIONS_PER_BATCH: usize = 128;
|
||||||
|
|
||||||
/// Stores the stage's thread handle and output receiver.
|
/// Stores the stage's thread handle and output receiver.
|
||||||
pub struct BankingStage {
|
pub struct BankingStage {
|
||||||
|
|
|
@ -8,7 +8,7 @@ use crate::poh_recorder::WorkingBankEntry;
|
||||||
use crate::result::{Error, Result};
|
use crate::result::{Error, Result};
|
||||||
use crate::service::Service;
|
use crate::service::Service;
|
||||||
use crate::staking_utils;
|
use crate::staking_utils;
|
||||||
use solana_metrics::{datapoint, inc_new_counter_error, inc_new_counter_info};
|
use solana_metrics::{datapoint_info, inc_new_counter_error, inc_new_counter_info};
|
||||||
use std::net::UdpSocket;
|
use std::net::UdpSocket;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::mpsc::{Receiver, RecvTimeoutError};
|
use std::sync::mpsc::{Receiver, RecvTimeoutError};
|
||||||
|
|
|
@ -15,6 +15,11 @@ pub(super) struct ReceiveResults {
|
||||||
pub last_tick: u64,
|
pub last_tick: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Theis parameter tunes how many entries are received in one iteration of recv loop
|
||||||
|
/// This will prevent broadcast stage from consuming more entries, that could have led
|
||||||
|
/// to delays in shredding, and broadcasting shreds to peer validators
|
||||||
|
const RECEIVE_ENTRY_COUNT_THRESHOLD: usize = 8;
|
||||||
|
|
||||||
pub(super) fn recv_slot_entries(receiver: &Receiver<WorkingBankEntry>) -> Result<ReceiveResults> {
|
pub(super) fn recv_slot_entries(receiver: &Receiver<WorkingBankEntry>) -> Result<ReceiveResults> {
|
||||||
let timer = Duration::new(1, 0);
|
let timer = Duration::new(1, 0);
|
||||||
let (bank, (entry, mut last_tick)) = receiver.recv_timeout(timer)?;
|
let (bank, (entry, mut last_tick)) = receiver.recv_timeout(timer)?;
|
||||||
|
@ -38,6 +43,10 @@ pub(super) fn recv_slot_entries(receiver: &Receiver<WorkingBankEntry>) -> Result
|
||||||
last_tick = tick_height;
|
last_tick = tick_height;
|
||||||
entries.push(entry);
|
entries.push(entry);
|
||||||
|
|
||||||
|
if entries.len() >= RECEIVE_ENTRY_COUNT_THRESHOLD {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
assert!(last_tick <= max_tick_height);
|
assert!(last_tick <= max_tick_height);
|
||||||
if last_tick == max_tick_height {
|
if last_tick == max_tick_height {
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -23,16 +23,19 @@ impl StandardBroadcastRun {
|
||||||
|
|
||||||
fn update_broadcast_stats(
|
fn update_broadcast_stats(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
receive_entries_elapsed: u64,
|
||||||
|
shredding_elapsed: u64,
|
||||||
|
insert_shreds_elapsed: u64,
|
||||||
broadcast_elapsed: u64,
|
broadcast_elapsed: u64,
|
||||||
run_elapsed: u64,
|
run_elapsed: u64,
|
||||||
num_entries: usize,
|
num_entries: usize,
|
||||||
to_blobs_elapsed: u64,
|
num_shreds: usize,
|
||||||
blob_index: u64,
|
blob_index: u64,
|
||||||
) {
|
) {
|
||||||
inc_new_counter_info!("broadcast_service-time_ms", broadcast_elapsed as usize);
|
inc_new_counter_info!("broadcast_service-time_ms", broadcast_elapsed as usize);
|
||||||
|
|
||||||
self.stats.num_entries.push(num_entries);
|
self.stats.num_entries.push(num_entries);
|
||||||
self.stats.to_blobs_elapsed.push(to_blobs_elapsed);
|
self.stats.to_blobs_elapsed.push(shredding_elapsed);
|
||||||
self.stats.run_elapsed.push(run_elapsed);
|
self.stats.run_elapsed.push(run_elapsed);
|
||||||
if self.stats.num_entries.len() >= 16 {
|
if self.stats.num_entries.len() >= 16 {
|
||||||
info!(
|
info!(
|
||||||
|
@ -44,7 +47,16 @@ impl StandardBroadcastRun {
|
||||||
self.stats.run_elapsed.clear();
|
self.stats.run_elapsed.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
datapoint!("broadcast-service", ("transmit-index", blob_index, i64));
|
datapoint_info!(
|
||||||
|
"broadcast-service",
|
||||||
|
("num_entries", num_entries as i64, i64),
|
||||||
|
("num_shreds", num_shreds as i64, i64),
|
||||||
|
("receive_time", receive_entries_elapsed as i64, i64),
|
||||||
|
("shredding_time", shredding_elapsed as i64, i64),
|
||||||
|
("insert_shred_time", insert_shreds_elapsed as i64, i64),
|
||||||
|
("broadcast_time", broadcast_elapsed as i64, i64),
|
||||||
|
("transmit-index", blob_index as i64, i64),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -65,7 +77,6 @@ impl BroadcastRun for StandardBroadcastRun {
|
||||||
inc_new_counter_info!("broadcast_service-entries_received", num_entries);
|
inc_new_counter_info!("broadcast_service-entries_received", num_entries);
|
||||||
|
|
||||||
// 2) Convert entries to blobs + generate coding blobs
|
// 2) Convert entries to blobs + generate coding blobs
|
||||||
let to_blobs_start = Instant::now();
|
|
||||||
let keypair = &cluster_info.read().unwrap().keypair.clone();
|
let keypair = &cluster_info.read().unwrap().keypair.clone();
|
||||||
let latest_shred_index = blocktree
|
let latest_shred_index = blocktree
|
||||||
.meta(bank.slot())
|
.meta(bank.slot())
|
||||||
|
@ -79,6 +90,7 @@ impl BroadcastRun for StandardBroadcastRun {
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let to_shreds_start = Instant::now();
|
||||||
let (shred_infos, latest_shred_index) = entries_to_shreds(
|
let (shred_infos, latest_shred_index) = entries_to_shreds(
|
||||||
receive_results.entries,
|
receive_results.entries,
|
||||||
last_tick,
|
last_tick,
|
||||||
|
@ -88,14 +100,15 @@ impl BroadcastRun for StandardBroadcastRun {
|
||||||
latest_shred_index,
|
latest_shred_index,
|
||||||
parent_slot,
|
parent_slot,
|
||||||
);
|
);
|
||||||
|
let to_shreds_elapsed = to_shreds_start.elapsed();
|
||||||
|
|
||||||
let all_seeds: Vec<[u8; 32]> = shred_infos.iter().map(|s| s.seed()).collect();
|
let all_seeds: Vec<[u8; 32]> = shred_infos.iter().map(|s| s.seed()).collect();
|
||||||
let num_shreds = shred_infos.len();
|
let num_shreds = shred_infos.len();
|
||||||
|
let insert_shreds_start = Instant::now();
|
||||||
blocktree
|
blocktree
|
||||||
.insert_shreds(shred_infos.clone(), None)
|
.insert_shreds(shred_infos.clone(), None)
|
||||||
.expect("Failed to insert shreds in blocktree");
|
.expect("Failed to insert shreds in blocktree");
|
||||||
|
let insert_shreds_elapsed = insert_shreds_start.elapsed();
|
||||||
let to_blobs_elapsed = to_blobs_start.elapsed();
|
|
||||||
|
|
||||||
// 3) Start broadcast step
|
// 3) Start broadcast step
|
||||||
let broadcast_start = Instant::now();
|
let broadcast_start = Instant::now();
|
||||||
|
@ -111,14 +124,17 @@ impl BroadcastRun for StandardBroadcastRun {
|
||||||
stakes.as_ref(),
|
stakes.as_ref(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
inc_new_counter_debug!("streamer-broadcast-sent", num_shreds);
|
|
||||||
|
|
||||||
let broadcast_elapsed = broadcast_start.elapsed();
|
let broadcast_elapsed = broadcast_start.elapsed();
|
||||||
self.update_broadcast_stats(
|
self.update_broadcast_stats(
|
||||||
|
duration_as_ms(&receive_elapsed),
|
||||||
|
duration_as_ms(&to_shreds_elapsed),
|
||||||
|
duration_as_ms(&insert_shreds_elapsed),
|
||||||
duration_as_ms(&broadcast_elapsed),
|
duration_as_ms(&broadcast_elapsed),
|
||||||
duration_as_ms(&(receive_elapsed + to_blobs_elapsed + broadcast_elapsed)),
|
duration_as_ms(
|
||||||
|
&(receive_elapsed + to_shreds_elapsed + insert_shreds_elapsed + broadcast_elapsed),
|
||||||
|
),
|
||||||
num_entries,
|
num_entries,
|
||||||
duration_as_ms(&to_blobs_elapsed),
|
num_shreds,
|
||||||
latest_shred_index,
|
latest_shred_index,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -694,7 +694,12 @@ impl ReplayStage {
|
||||||
let bank_progress = &mut progress
|
let bank_progress = &mut progress
|
||||||
.entry(bank.slot())
|
.entry(bank.slot())
|
||||||
.or_insert_with(|| ForkProgress::new(bank.last_blockhash()));
|
.or_insert_with(|| ForkProgress::new(bank.last_blockhash()));
|
||||||
let result = Self::verify_and_process_entries(&bank, &entries, &bank_progress.last_entry);
|
let result = Self::verify_and_process_entries(
|
||||||
|
&bank,
|
||||||
|
&entries,
|
||||||
|
&bank_progress.last_entry,
|
||||||
|
bank_progress.num_blobs,
|
||||||
|
);
|
||||||
bank_progress.num_blobs += num;
|
bank_progress.num_blobs += num;
|
||||||
if let Some(last_entry) = entries.last() {
|
if let Some(last_entry) = entries.last() {
|
||||||
bank_progress.last_entry = last_entry.hash;
|
bank_progress.last_entry = last_entry.hash;
|
||||||
|
@ -707,14 +712,16 @@ impl ReplayStage {
|
||||||
bank: &Bank,
|
bank: &Bank,
|
||||||
entries: &[Entry],
|
entries: &[Entry],
|
||||||
last_entry: &Hash,
|
last_entry: &Hash,
|
||||||
|
shred_index: usize,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if !entries.verify(last_entry) {
|
if !entries.verify(last_entry) {
|
||||||
warn!(
|
warn!(
|
||||||
"entry verification failed {} {} {} {}",
|
"entry verification failed {} {} {} {} {}",
|
||||||
entries.len(),
|
entries.len(),
|
||||||
bank.tick_height(),
|
bank.tick_height(),
|
||||||
last_entry,
|
last_entry,
|
||||||
bank.last_blockhash()
|
bank.last_blockhash(),
|
||||||
|
shred_index
|
||||||
);
|
);
|
||||||
|
|
||||||
datapoint_error!(
|
datapoint_error!(
|
||||||
|
|
|
@ -23,8 +23,6 @@ lazy_static! {
|
||||||
{ serialized_size(&DataShredHeader::default()).unwrap() as usize };
|
{ serialized_size(&DataShredHeader::default()).unwrap() as usize };
|
||||||
static ref SIZE_OF_SIGNATURE: usize =
|
static ref SIZE_OF_SIGNATURE: usize =
|
||||||
{ bincode::serialized_size(&Signature::default()).unwrap() as usize };
|
{ bincode::serialized_size(&Signature::default()).unwrap() as usize };
|
||||||
static ref SIZE_OF_EMPTY_VEC: usize =
|
|
||||||
{ bincode::serialized_size(&vec![0u8; 0]).unwrap() as usize };
|
|
||||||
static ref SIZE_OF_SHRED_TYPE: usize = { bincode::serialized_size(&0u8).unwrap() as usize };
|
static ref SIZE_OF_SHRED_TYPE: usize = { bincode::serialized_size(&0u8).unwrap() as usize };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,6 +35,69 @@ thread_local!(static PAR_THREAD_POOL: RefCell<ThreadPool> = RefCell::new(rayon::
|
||||||
pub const DATA_SHRED: u8 = 0b1010_0101;
|
pub const DATA_SHRED: u8 = 0b1010_0101;
|
||||||
pub const CODING_SHRED: u8 = 0b0101_1010;
|
pub const CODING_SHRED: u8 = 0b0101_1010;
|
||||||
|
|
||||||
|
/// This limit comes from reed solomon library, but unfortunately they don't have
|
||||||
|
/// a public constant defined for it.
|
||||||
|
const MAX_DATA_SHREDS_PER_FEC_BLOCK: u32 = 16;
|
||||||
|
|
||||||
|
/// Based on rse benchmarks, the optimal erasure config uses 16 data shreds and 4 coding shreds
|
||||||
|
pub const RECOMMENDED_FEC_RATE: f32 = 0.25;
|
||||||
|
|
||||||
|
const LAST_SHRED_IN_SLOT: u8 = 0b0000_0001;
|
||||||
|
const DATA_COMPLETE_SHRED: u8 = 0b0000_0010;
|
||||||
|
|
||||||
|
/// A common header that is present at start of every shred
|
||||||
|
#[derive(Serialize, Clone, Deserialize, Default, PartialEq, Debug)]
|
||||||
|
pub struct ShredCommonHeader {
|
||||||
|
pub signature: Signature,
|
||||||
|
pub slot: u64,
|
||||||
|
pub index: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A common header that is present at start of every data shred
|
||||||
|
#[derive(Serialize, Clone, Deserialize, PartialEq, Debug)]
|
||||||
|
pub struct DataShredHeader {
|
||||||
|
pub common_header: CodingShredHeader,
|
||||||
|
pub data_header: ShredCommonHeader,
|
||||||
|
pub parent_offset: u16,
|
||||||
|
pub flags: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The coding shred header has FEC information
|
||||||
|
#[derive(Serialize, Clone, Deserialize, PartialEq, Debug)]
|
||||||
|
pub struct CodingShredHeader {
|
||||||
|
pub shred_type: u8,
|
||||||
|
pub coding_header: ShredCommonHeader,
|
||||||
|
pub num_data_shreds: u16,
|
||||||
|
pub num_coding_shreds: u16,
|
||||||
|
pub position: u16,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for DataShredHeader {
|
||||||
|
fn default() -> Self {
|
||||||
|
DataShredHeader {
|
||||||
|
common_header: CodingShredHeader {
|
||||||
|
shred_type: DATA_SHRED,
|
||||||
|
..CodingShredHeader::default()
|
||||||
|
},
|
||||||
|
data_header: ShredCommonHeader::default(),
|
||||||
|
parent_offset: 0,
|
||||||
|
flags: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for CodingShredHeader {
|
||||||
|
fn default() -> Self {
|
||||||
|
CodingShredHeader {
|
||||||
|
shred_type: CODING_SHRED,
|
||||||
|
coding_header: ShredCommonHeader::default(),
|
||||||
|
num_data_shreds: 0,
|
||||||
|
num_coding_shreds: 0,
|
||||||
|
position: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
pub struct Shred {
|
pub struct Shred {
|
||||||
pub headers: DataShredHeader,
|
pub headers: DataShredHeader,
|
||||||
|
@ -180,69 +241,6 @@ impl Shred {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This limit comes from reed solomon library, but unfortunately they don't have
|
|
||||||
/// a public constant defined for it.
|
|
||||||
const MAX_DATA_SHREDS_PER_FEC_BLOCK: u32 = 16;
|
|
||||||
|
|
||||||
/// Based on rse benchmarks, the optimal erasure config uses 16 data shreds and 4 coding shreds
|
|
||||||
pub const RECOMMENDED_FEC_RATE: f32 = 0.25;
|
|
||||||
|
|
||||||
const LAST_SHRED_IN_SLOT: u8 = 0b0000_0001;
|
|
||||||
const DATA_COMPLETE_SHRED: u8 = 0b0000_0010;
|
|
||||||
|
|
||||||
/// A common header that is present at start of every shred
|
|
||||||
#[derive(Serialize, Clone, Deserialize, Default, PartialEq, Debug)]
|
|
||||||
pub struct ShredCommonHeader {
|
|
||||||
pub signature: Signature,
|
|
||||||
pub slot: u64,
|
|
||||||
pub index: u32,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A common header that is present at start of every data shred
|
|
||||||
#[derive(Serialize, Clone, Deserialize, PartialEq, Debug)]
|
|
||||||
pub struct DataShredHeader {
|
|
||||||
pub common_header: CodingShredHeader,
|
|
||||||
pub data_header: ShredCommonHeader,
|
|
||||||
pub parent_offset: u16,
|
|
||||||
pub flags: u8,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The coding shred header has FEC information
|
|
||||||
#[derive(Serialize, Clone, Deserialize, PartialEq, Debug)]
|
|
||||||
pub struct CodingShredHeader {
|
|
||||||
pub shred_type: u8,
|
|
||||||
pub coding_header: ShredCommonHeader,
|
|
||||||
pub num_data_shreds: u16,
|
|
||||||
pub num_coding_shreds: u16,
|
|
||||||
pub position: u16,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for DataShredHeader {
|
|
||||||
fn default() -> Self {
|
|
||||||
DataShredHeader {
|
|
||||||
common_header: CodingShredHeader {
|
|
||||||
shred_type: DATA_SHRED,
|
|
||||||
..CodingShredHeader::default()
|
|
||||||
},
|
|
||||||
data_header: ShredCommonHeader::default(),
|
|
||||||
parent_offset: 0,
|
|
||||||
flags: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for CodingShredHeader {
|
|
||||||
fn default() -> Self {
|
|
||||||
CodingShredHeader {
|
|
||||||
shred_type: CODING_SHRED,
|
|
||||||
coding_header: ShredCommonHeader::default(),
|
|
||||||
num_data_shreds: 0,
|
|
||||||
num_coding_shreds: 0,
|
|
||||||
position: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Shredder {
|
pub struct Shredder {
|
||||||
slot: u64,
|
slot: u64,
|
||||||
|
|
Loading…
Reference in New Issue