track fec set turbine stats (#23989)
This commit is contained in:
parent
6a7f6585ce
commit
ee6bb0d5d3
|
@ -1682,6 +1682,9 @@ impl ReplayStage {
|
||||||
blockstore
|
blockstore
|
||||||
.set_dead_slot(slot)
|
.set_dead_slot(slot)
|
||||||
.expect("Failed to mark slot as dead in blockstore");
|
.expect("Failed to mark slot as dead in blockstore");
|
||||||
|
|
||||||
|
blockstore.slots_stats.mark_dead(slot);
|
||||||
|
|
||||||
rpc_subscriptions.notify_slot_update(SlotUpdate::Dead {
|
rpc_subscriptions.notify_slot_update(SlotUpdate::Dead {
|
||||||
slot,
|
slot,
|
||||||
err: format!("error: {:?}", err),
|
err: format!("error: {:?}", err),
|
||||||
|
@ -1788,6 +1791,9 @@ impl ReplayStage {
|
||||||
epoch_slots_frozen_slots,
|
epoch_slots_frozen_slots,
|
||||||
drop_bank_sender,
|
drop_bank_sender,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
blockstore.slots_stats.mark_rooted(new_root);
|
||||||
|
|
||||||
rpc_subscriptions.notify_roots(rooted_slots);
|
rpc_subscriptions.notify_roots(rooted_slots);
|
||||||
if let Some(sender) = bank_notification_sender {
|
if let Some(sender) = bank_notification_sender {
|
||||||
sender
|
sender
|
||||||
|
@ -2931,6 +2937,7 @@ impl ReplayStage {
|
||||||
accounts_background_request_sender,
|
accounts_background_request_sender,
|
||||||
highest_confirmed_root,
|
highest_confirmed_root,
|
||||||
);
|
);
|
||||||
|
|
||||||
drop_bank_sender
|
drop_bank_sender
|
||||||
.send(removed_banks)
|
.send(removed_banks)
|
||||||
.unwrap_or_else(|err| warn!("bank drop failed: {:?}", err));
|
.unwrap_or_else(|err| warn!("bank drop failed: {:?}", err));
|
||||||
|
|
|
@ -179,7 +179,7 @@ pub struct Blockstore {
|
||||||
pub shred_timing_point_sender: Option<PohTimingSender>,
|
pub shred_timing_point_sender: Option<PohTimingSender>,
|
||||||
pub lowest_cleanup_slot: RwLock<Slot>,
|
pub lowest_cleanup_slot: RwLock<Slot>,
|
||||||
no_compaction: bool,
|
no_compaction: bool,
|
||||||
slots_stats: Mutex<SlotsStats>,
|
pub slots_stats: SlotsStats,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct IndexMetaWorkingSetEntry {
|
pub struct IndexMetaWorkingSetEntry {
|
||||||
|
@ -451,7 +451,7 @@ impl Blockstore {
|
||||||
last_root,
|
last_root,
|
||||||
lowest_cleanup_slot: RwLock::<Slot>::default(),
|
lowest_cleanup_slot: RwLock::<Slot>::default(),
|
||||||
no_compaction: false,
|
no_compaction: false,
|
||||||
slots_stats: Mutex::<SlotsStats>::default(),
|
slots_stats: SlotsStats::default(),
|
||||||
};
|
};
|
||||||
if initialize_transaction_status_index {
|
if initialize_transaction_status_index {
|
||||||
blockstore.initialize_transaction_status_index()?;
|
blockstore.initialize_transaction_status_index()?;
|
||||||
|
@ -1217,10 +1217,10 @@ impl Blockstore {
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.slots_stats
|
self.slots_stats
|
||||||
.lock()
|
.record_shred(shred.slot(), shred.fec_set_index(), shred_source, None);
|
||||||
.unwrap()
|
|
||||||
.add_shred(slot, shred_source);
|
|
||||||
// insert coding shred into rocks
|
// insert coding shred into rocks
|
||||||
let result = self
|
let result = self
|
||||||
.insert_coding_shred(index_meta, &shred, write_batch)
|
.insert_coding_shred(index_meta, &shred, write_batch)
|
||||||
|
@ -1652,13 +1652,13 @@ impl Blockstore {
|
||||||
end_index,
|
end_index,
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
{
|
|
||||||
let mut slots_stats = self.slots_stats.lock().unwrap();
|
self.slots_stats.record_shred(
|
||||||
slots_stats.add_shred(slot_meta.slot, shred_source);
|
shred.slot(),
|
||||||
if slot_meta.is_full() {
|
shred.fec_set_index(),
|
||||||
slots_stats.set_full(slot_meta);
|
shred_source,
|
||||||
}
|
Some(slot_meta),
|
||||||
}
|
);
|
||||||
|
|
||||||
// slot is full, send slot full timing to poh_timing_report service.
|
// slot is full, send slot full timing to poh_timing_report service.
|
||||||
if slot_meta.is_full() {
|
if slot_meta.is_full() {
|
||||||
|
|
|
@ -8,7 +8,6 @@ pub mod bigtable_delete;
|
||||||
pub mod bigtable_upload;
|
pub mod bigtable_upload;
|
||||||
pub mod bigtable_upload_service;
|
pub mod bigtable_upload_service;
|
||||||
pub mod block_error;
|
pub mod block_error;
|
||||||
mod slot_stats;
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod blockstore;
|
pub mod blockstore;
|
||||||
pub mod ancestor_iterator;
|
pub mod ancestor_iterator;
|
||||||
|
@ -25,6 +24,7 @@ pub mod next_slots_iterator;
|
||||||
pub mod rooted_slot_iterator;
|
pub mod rooted_slot_iterator;
|
||||||
pub mod shred;
|
pub mod shred;
|
||||||
pub mod sigverify_shreds;
|
pub mod sigverify_shreds;
|
||||||
|
pub mod slot_stats;
|
||||||
pub mod staking_utils;
|
pub mod staking_utils;
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
|
|
|
@ -1,21 +1,16 @@
|
||||||
use {
|
use {
|
||||||
crate::blockstore_meta::SlotMeta, bitflags::bitflags, lru::LruCache, solana_sdk::clock::Slot,
|
crate::blockstore_meta::SlotMeta,
|
||||||
|
bitflags::bitflags,
|
||||||
|
lru::LruCache,
|
||||||
|
solana_sdk::clock::Slot,
|
||||||
|
std::{
|
||||||
|
collections::HashMap,
|
||||||
|
sync::{Mutex, MutexGuard},
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
const SLOTS_STATS_CACHE_CAPACITY: usize = 300;
|
const SLOTS_STATS_CACHE_CAPACITY: usize = 300;
|
||||||
|
|
||||||
macro_rules! get_mut_entry (
|
|
||||||
($cache:expr, $key:expr) => (
|
|
||||||
match $cache.get_mut(&$key) {
|
|
||||||
Some(entry) => entry,
|
|
||||||
None => {
|
|
||||||
$cache.put($key, SlotStats::default());
|
|
||||||
$cache.get_mut(&$key).unwrap()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
);
|
|
||||||
|
|
||||||
#[derive(Copy, Clone, Debug)]
|
#[derive(Copy, Clone, Debug)]
|
||||||
pub(crate) enum ShredSource {
|
pub(crate) enum ShredSource {
|
||||||
Turbine,
|
Turbine,
|
||||||
|
@ -32,59 +27,139 @@ bitflags! {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Clone, Default)]
|
||||||
struct SlotStats {
|
pub struct SlotStats {
|
||||||
flags: SlotFlags,
|
turbine_fec_set_index_counts: HashMap</*fec_set_index*/ u32, /*count*/ usize>,
|
||||||
num_repaired: usize,
|
num_repaired: usize,
|
||||||
num_recovered: usize,
|
num_recovered: usize,
|
||||||
|
last_index: u64,
|
||||||
|
flags: SlotFlags,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) struct SlotsStats(LruCache<Slot, SlotStats>);
|
impl SlotStats {
|
||||||
|
pub fn get_min_index_count(&self) -> usize {
|
||||||
|
self.turbine_fec_set_index_counts
|
||||||
|
.iter()
|
||||||
|
.map(|(_, cnt)| *cnt)
|
||||||
|
.min()
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn report(&self, slot: Slot) {
|
||||||
|
let min_fec_set_count = self.get_min_index_count();
|
||||||
|
datapoint_info!(
|
||||||
|
"slot_stats_tracking_complete",
|
||||||
|
("slot", slot, i64),
|
||||||
|
("last_index", self.last_index, i64),
|
||||||
|
("num_repaired", self.num_repaired, i64),
|
||||||
|
("num_recovered", self.num_recovered, i64),
|
||||||
|
("min_turbine_fec_set_count", min_fec_set_count, i64),
|
||||||
|
("is_full", self.flags.contains(SlotFlags::FULL), bool),
|
||||||
|
("is_rooted", self.flags.contains(SlotFlags::ROOTED), bool),
|
||||||
|
("is_dead", self.flags.contains(SlotFlags::DEAD), bool),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct SlotsStats {
|
||||||
|
pub stats: Mutex<LruCache<Slot, SlotStats>>,
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for SlotsStats {
|
impl Default for SlotsStats {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
// LruCache::unbounded because capacity is enforced manually.
|
Self {
|
||||||
Self(LruCache::unbounded())
|
stats: Mutex::new(LruCache::new(SLOTS_STATS_CACHE_CAPACITY)),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SlotsStats {
|
impl SlotsStats {
|
||||||
pub(crate) fn add_shred(&mut self, slot: Slot, source: ShredSource) {
|
fn get_or_default_with_eviction_check<'a>(
|
||||||
let entry = get_mut_entry!(self.0, slot);
|
stats: &'a mut MutexGuard<LruCache<Slot, SlotStats>>,
|
||||||
match source {
|
slot: Slot,
|
||||||
ShredSource::Turbine => (),
|
) -> (&'a mut SlotStats, Option<(Slot, SlotStats)>) {
|
||||||
ShredSource::Repaired => entry.num_repaired += 1,
|
let evicted = if stats.len() == stats.cap() {
|
||||||
ShredSource::Recovered => entry.num_recovered += 1,
|
match stats.peek_lru() {
|
||||||
|
Some((s, _)) if *s == slot => None,
|
||||||
|
_ => stats.pop_lru(),
|
||||||
}
|
}
|
||||||
self.maybe_evict_cache();
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
stats.get_or_insert(slot, SlotStats::default);
|
||||||
|
(stats.get_mut(&slot).unwrap(), evicted)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_full(&mut self, slot_meta: &SlotMeta) {
|
pub(crate) fn record_shred(
|
||||||
|
&self,
|
||||||
|
slot: Slot,
|
||||||
|
fec_set_index: u32,
|
||||||
|
source: ShredSource,
|
||||||
|
slot_meta: Option<&SlotMeta>,
|
||||||
|
) {
|
||||||
|
let mut slot_full_reporting_info = None;
|
||||||
|
let mut stats = self.stats.lock().unwrap();
|
||||||
|
let (mut slot_stats, evicted) = Self::get_or_default_with_eviction_check(&mut stats, slot);
|
||||||
|
match source {
|
||||||
|
ShredSource::Recovered => slot_stats.num_recovered += 1,
|
||||||
|
ShredSource::Repaired => slot_stats.num_repaired += 1,
|
||||||
|
ShredSource::Turbine => {
|
||||||
|
*slot_stats
|
||||||
|
.turbine_fec_set_index_counts
|
||||||
|
.entry(fec_set_index)
|
||||||
|
.or_default() += 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(meta) = slot_meta {
|
||||||
|
if meta.is_full() {
|
||||||
|
slot_stats.last_index = meta.last_index.unwrap_or_default();
|
||||||
|
if !slot_stats.flags.contains(SlotFlags::FULL) {
|
||||||
|
slot_stats.flags |= SlotFlags::FULL;
|
||||||
|
slot_full_reporting_info =
|
||||||
|
Some((slot_stats.num_repaired, slot_stats.num_recovered));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
drop(stats);
|
||||||
|
if let Some((num_repaired, num_recovered)) = slot_full_reporting_info {
|
||||||
|
let slot_meta = slot_meta.unwrap();
|
||||||
let total_time_ms =
|
let total_time_ms =
|
||||||
solana_sdk::timing::timestamp().saturating_sub(slot_meta.first_shred_timestamp);
|
solana_sdk::timing::timestamp().saturating_sub(slot_meta.first_shred_timestamp);
|
||||||
let last_index = slot_meta
|
let last_index = slot_meta
|
||||||
.last_index
|
.last_index
|
||||||
.and_then(|ix| i64::try_from(ix).ok())
|
.and_then(|ix| i64::try_from(ix).ok())
|
||||||
.unwrap_or(-1);
|
.unwrap_or(-1);
|
||||||
let entry = get_mut_entry!(self.0, slot_meta.slot);
|
|
||||||
if !entry.flags.contains(SlotFlags::FULL) {
|
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"shred_insert_is_full",
|
"shred_insert_is_full",
|
||||||
("total_time_ms", total_time_ms, i64),
|
("total_time_ms", total_time_ms, i64),
|
||||||
("slot", slot_meta.slot, i64),
|
("slot", slot, i64),
|
||||||
("last_index", last_index, i64),
|
("last_index", last_index, i64),
|
||||||
("num_repaired", entry.num_repaired, i64),
|
("num_repaired", num_repaired, i64),
|
||||||
("num_recovered", entry.num_recovered, i64),
|
("num_recovered", num_recovered, i64),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
entry.flags |= SlotFlags::FULL;
|
if let Some((evicted_slot, evicted_stats)) = evicted {
|
||||||
self.maybe_evict_cache();
|
evicted_stats.report(evicted_slot);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn maybe_evict_cache(&mut self) {
|
fn add_flag(&self, slot: Slot, flag: SlotFlags) {
|
||||||
while self.0.len() > SLOTS_STATS_CACHE_CAPACITY {
|
let evicted = {
|
||||||
let (_slot, _entry) = self.0.pop_lru().unwrap();
|
let mut stats = self.stats.lock().unwrap();
|
||||||
// TODO: submit metrics for (slot, entry).
|
let (slot_stats, evicted) = Self::get_or_default_with_eviction_check(&mut stats, slot);
|
||||||
|
slot_stats.flags |= flag;
|
||||||
|
evicted
|
||||||
|
};
|
||||||
|
if let Some((evicted_slot, evicted_stats)) = evicted {
|
||||||
|
evicted_stats.report(evicted_slot);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn mark_dead(&self, slot: Slot) {
|
||||||
|
self.add_flag(slot, SlotFlags::DEAD);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn mark_rooted(&self, slot: Slot) {
|
||||||
|
self.add_flag(slot, SlotFlags::ROOTED);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue