Add repair number per slot (#18082)

This commit is contained in:
sakridge 2021-06-30 18:20:07 +02:00 committed by GitHub
parent 00d1125e98
commit 8d9a6deda4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 144 additions and 55 deletions

View File

@ -188,9 +188,14 @@ where
} }
prune_shreds_invalid_repair(&mut shreds, &mut repair_infos, outstanding_requests); prune_shreds_invalid_repair(&mut shreds, &mut repair_infos, outstanding_requests);
let repairs: Vec<_> = repair_infos
.iter()
.map(|repair_info| repair_info.is_some())
.collect();
let (completed_data_sets, inserted_indices) = blockstore.insert_shreds_handle_duplicate( let (completed_data_sets, inserted_indices) = blockstore.insert_shreds_handle_duplicate(
shreds, shreds,
repairs,
Some(leader_schedule_cache), Some(leader_schedule_cache),
false, false,
&handle_duplicate, &handle_duplicate,

View File

@ -45,7 +45,7 @@ use std::{
borrow::Cow, borrow::Cow,
cell::RefCell, cell::RefCell,
cmp, cmp,
collections::{HashMap, HashSet}, collections::{BTreeMap, HashMap, HashSet},
convert::TryInto, convert::TryInto,
fs, fs,
io::{Error as IoError, ErrorKind}, io::{Error as IoError, ErrorKind},
@ -56,6 +56,7 @@ use std::{
mpsc::{sync_channel, Receiver, SyncSender, TrySendError}, mpsc::{sync_channel, Receiver, SyncSender, TrySendError},
Arc, Mutex, RwLock, RwLockWriteGuard, Arc, Mutex, RwLock, RwLockWriteGuard,
}, },
time::Instant,
}; };
use thiserror::Error; use thiserror::Error;
use trees::{Tree, TreeWalk}; use trees::{Tree, TreeWalk};
@ -148,6 +149,27 @@ pub struct Blockstore {
pub completed_slots_senders: Vec<CompletedSlotsSender>, pub completed_slots_senders: Vec<CompletedSlotsSender>,
pub lowest_cleanup_slot: Arc<RwLock<Slot>>, pub lowest_cleanup_slot: Arc<RwLock<Slot>>,
no_compaction: bool, no_compaction: bool,
slots_stats: Arc<Mutex<SlotsStats>>,
}
struct SlotsStats {
last_cleanup_ts: Instant,
stats: BTreeMap<Slot, SlotStats>,
}
impl Default for SlotsStats {
fn default() -> Self {
SlotsStats {
last_cleanup_ts: Instant::now(),
stats: BTreeMap::new(),
}
}
}
#[derive(Default)]
struct SlotStats {
num_repaired: usize,
num_recovered: usize,
} }
pub struct IndexMetaWorkingSetEntry { pub struct IndexMetaWorkingSetEntry {
@ -165,6 +187,13 @@ pub struct SlotMetaWorkingSetEntry {
did_insert_occur: bool, did_insert_occur: bool,
} }
#[derive(PartialEq, Debug, Clone)]
enum ShredSource {
Turbine,
Repaired,
Recovered,
}
#[derive(Default)] #[derive(Default)]
pub struct BlockstoreInsertionMetrics { pub struct BlockstoreInsertionMetrics {
pub num_shreds: usize, pub num_shreds: usize,
@ -367,6 +396,7 @@ impl Blockstore {
last_root, last_root,
lowest_cleanup_slot: Arc::new(RwLock::new(0)), lowest_cleanup_slot: Arc::new(RwLock::new(0)),
no_compaction: false, no_compaction: false,
slots_stats: Arc::new(Mutex::new(SlotsStats::default())),
}; };
if initialize_transaction_status_index { if initialize_transaction_status_index {
blockstore.initialize_transaction_status_index()?; blockstore.initialize_transaction_status_index()?;
@ -758,6 +788,7 @@ impl Blockstore {
pub fn insert_shreds_handle_duplicate<F>( pub fn insert_shreds_handle_duplicate<F>(
&self, &self,
shreds: Vec<Shred>, shreds: Vec<Shred>,
is_repaired: Vec<bool>,
leader_schedule: Option<&Arc<LeaderScheduleCache>>, leader_schedule: Option<&Arc<LeaderScheduleCache>>,
is_trusted: bool, is_trusted: bool,
handle_duplicate: &F, handle_duplicate: &F,
@ -766,6 +797,7 @@ impl Blockstore {
where where
F: Fn(Shred), F: Fn(Shred),
{ {
assert_eq!(shreds.len(), is_repaired.len());
let mut total_start = Measure::start("Total elapsed"); let mut total_start = Measure::start("Total elapsed");
let mut start = Measure::start("Blockstore lock"); let mut start = Measure::start("Blockstore lock");
let _lock = self.insert_shreds_lock.lock().unwrap(); let _lock = self.insert_shreds_lock.lock().unwrap();
@ -787,46 +819,56 @@ impl Blockstore {
let mut index_meta_time = 0; let mut index_meta_time = 0;
let mut newly_completed_data_sets: Vec<CompletedDataSetInfo> = vec![]; let mut newly_completed_data_sets: Vec<CompletedDataSetInfo> = vec![];
let mut inserted_indices = Vec::new(); let mut inserted_indices = Vec::new();
shreds.into_iter().enumerate().for_each(|(i, shred)| { shreds
if shred.is_data() { .into_iter()
let shred_slot = shred.slot(); .zip(is_repaired.into_iter())
if let Ok(completed_data_sets) = self.check_insert_data_shred( .enumerate()
shred, .for_each(|(i, (shred, is_repaired))| {
&mut erasure_metas, if shred.is_data() {
&mut index_working_set, let shred_slot = shred.slot();
&mut slot_meta_working_set, let shred_source = if is_repaired {
&mut write_batch, ShredSource::Repaired
&mut just_inserted_data_shreds, } else {
&mut index_meta_time, ShredSource::Turbine
is_trusted, };
handle_duplicate, if let Ok(completed_data_sets) = self.check_insert_data_shred(
leader_schedule, shred,
false, &mut erasure_metas,
) { &mut index_working_set,
newly_completed_data_sets.extend(completed_data_sets.into_iter().map( &mut slot_meta_working_set,
|(start_index, end_index)| CompletedDataSetInfo { &mut write_batch,
slot: shred_slot, &mut just_inserted_data_shreds,
start_index, &mut index_meta_time,
end_index, is_trusted,
}, handle_duplicate,
)); leader_schedule,
inserted_indices.push(i); shred_source,
num_inserted += 1; ) {
newly_completed_data_sets.extend(completed_data_sets.into_iter().map(
|(start_index, end_index)| CompletedDataSetInfo {
slot: shred_slot,
start_index,
end_index,
},
));
inserted_indices.push(i);
num_inserted += 1;
}
} else if shred.is_code() {
self.check_cache_coding_shred(
shred,
&mut erasure_metas,
&mut index_working_set,
&mut just_inserted_coding_shreds,
&mut index_meta_time,
handle_duplicate,
is_trusted,
is_repaired,
);
} else {
panic!("There should be no other case");
} }
} else if shred.is_code() { });
self.check_cache_coding_shred(
shred,
&mut erasure_metas,
&mut index_working_set,
&mut just_inserted_coding_shreds,
&mut index_meta_time,
handle_duplicate,
is_trusted,
);
} else {
panic!("There should be no other case");
}
});
start.stop(); start.stop();
let insert_shreds_elapsed = start.as_us(); let insert_shreds_elapsed = start.as_us();
@ -861,7 +903,7 @@ impl Blockstore {
is_trusted, is_trusted,
&handle_duplicate, &handle_duplicate,
leader_schedule, leader_schedule,
true, ShredSource::Recovered,
) { ) {
Err(InsertDataShredError::Exists) => { Err(InsertDataShredError::Exists) => {
num_recovered_exists += 1; num_recovered_exists += 1;
@ -993,8 +1035,10 @@ impl Blockstore {
leader_schedule: Option<&Arc<LeaderScheduleCache>>, leader_schedule: Option<&Arc<LeaderScheduleCache>>,
is_trusted: bool, is_trusted: bool,
) -> Result<(Vec<CompletedDataSetInfo>, Vec<usize>)> { ) -> Result<(Vec<CompletedDataSetInfo>, Vec<usize>)> {
let shreds_len = shreds.len();
self.insert_shreds_handle_duplicate( self.insert_shreds_handle_duplicate(
shreds, shreds,
vec![false; shreds_len],
leader_schedule, leader_schedule,
is_trusted, is_trusted,
&|_| {}, &|_| {},
@ -1038,6 +1082,7 @@ impl Blockstore {
index_meta_time: &mut u64, index_meta_time: &mut u64,
handle_duplicate: &F, handle_duplicate: &F,
is_trusted: bool, is_trusted: bool,
is_repaired: bool,
) -> bool ) -> bool
where where
F: Fn(Shred), F: Fn(Shred),
@ -1105,6 +1150,12 @@ impl Blockstore {
return false; return false;
} }
if is_repaired {
let mut slots_stats = self.slots_stats.lock().unwrap();
let mut e = slots_stats.stats.entry(slot).or_default();
e.num_repaired += 1;
}
// Should be safe to modify index_meta here. Two cases // Should be safe to modify index_meta here. Two cases
// 1) Recovery happens: Then all inserted erasure metas are removed // 1) Recovery happens: Then all inserted erasure metas are removed
// from just_received_coding_shreds, and nothing will be committed by // from just_received_coding_shreds, and nothing will be committed by
@ -1165,7 +1216,7 @@ impl Blockstore {
is_trusted: bool, is_trusted: bool,
handle_duplicate: &F, handle_duplicate: &F,
leader_schedule: Option<&Arc<LeaderScheduleCache>>, leader_schedule: Option<&Arc<LeaderScheduleCache>>,
is_recovered: bool, shred_source: ShredSource,
) -> std::result::Result<Vec<(u32, u32)>, InsertDataShredError> ) -> std::result::Result<Vec<(u32, u32)>, InsertDataShredError>
where where
F: Fn(Shred), F: Fn(Shred),
@ -1208,15 +1259,20 @@ impl Blockstore {
just_inserted_data_shreds, just_inserted_data_shreds,
&self.last_root, &self.last_root,
leader_schedule, leader_schedule,
is_recovered, shred_source.clone(),
) { ) {
return Err(InsertDataShredError::InvalidShred); return Err(InsertDataShredError::InvalidShred);
} }
} }
let set_index = u64::from(shred.common_header.fec_set_index); let set_index = u64::from(shred.common_header.fec_set_index);
let newly_completed_data_sets = let newly_completed_data_sets = self.insert_data_shred(
self.insert_data_shred(slot_meta, index_meta.data_mut(), &shred, write_batch)?; slot_meta,
index_meta.data_mut(),
&shred,
write_batch,
shred_source,
)?;
just_inserted_data_shreds.insert((slot, shred_index), shred); just_inserted_data_shreds.insert((slot, shred_index), shred);
index_meta_working_set_entry.did_insert_occur = true; index_meta_working_set_entry.did_insert_occur = true;
slot_meta_entry.did_insert_occur = true; slot_meta_entry.did_insert_occur = true;
@ -1295,7 +1351,7 @@ impl Blockstore {
just_inserted_data_shreds: &HashMap<(u64, u64), Shred>, just_inserted_data_shreds: &HashMap<(u64, u64), Shred>,
last_root: &RwLock<u64>, last_root: &RwLock<u64>,
leader_schedule: Option<&Arc<LeaderScheduleCache>>, leader_schedule: Option<&Arc<LeaderScheduleCache>>,
is_recovered: bool, shred_source: ShredSource,
) -> bool { ) -> bool {
use crate::shred::SHRED_PAYLOAD_SIZE; use crate::shred::SHRED_PAYLOAD_SIZE;
let shred_index = u64::from(shred.index()); let shred_index = u64::from(shred.index());
@ -1371,8 +1427,8 @@ impl Blockstore {
( (
"error", "error",
format!( format!(
"Leader {:?}, slot {}: received index {} >= slot.last_index {}, is_recovered: {}", "Leader {:?}, slot {}: received index {} >= slot.last_index {}, shred_source: {:?}",
leader_pubkey, slot, shred_index, last_index, is_recovered leader_pubkey, slot, shred_index, last_index, shred_source
), ),
String String
) )
@ -1407,8 +1463,8 @@ impl Blockstore {
( (
"error", "error",
format!( format!(
"Leader {:?}, slot {}: received shred_index {} < slot.received {}, is_recovered: {}", "Leader {:?}, slot {}: received shred_index {} < slot.received {}, shred_source: {:?}",
leader_pubkey, slot, shred_index, slot_meta.received, is_recovered leader_pubkey, slot, shred_index, slot_meta.received, shred_source
), ),
String String
) )
@ -1426,6 +1482,7 @@ impl Blockstore {
data_index: &mut ShredIndex, data_index: &mut ShredIndex,
shred: &Shred, shred: &Shred,
write_batch: &mut WriteBatch, write_batch: &mut WriteBatch,
shred_source: ShredSource,
) -> Result<Vec<(u32, u32)>> { ) -> Result<Vec<(u32, u32)>> {
let slot = shred.slot(); let slot = shred.slot();
let index = u64::from(shred.index()); let index = u64::from(shred.index());
@ -1476,7 +1533,30 @@ impl Blockstore {
shred.reference_tick(), shred.reference_tick(),
data_index, data_index,
); );
if shred_source == ShredSource::Repaired || shred_source == ShredSource::Recovered {
let mut slots_stats = self.slots_stats.lock().unwrap();
let mut e = slots_stats.stats.entry(slot_meta.slot).or_default();
if shred_source == ShredSource::Repaired {
e.num_repaired += 1;
}
if shred_source == ShredSource::Recovered {
e.num_recovered += 1;
}
}
if slot_meta.is_full() { if slot_meta.is_full() {
let (num_repaired, num_recovered) = {
let mut slots_stats = self.slots_stats.lock().unwrap();
if let Some(e) = slots_stats.stats.remove(&slot_meta.slot) {
if slots_stats.last_cleanup_ts.elapsed().as_secs() > 30 {
let root = self.last_root();
slots_stats.stats = slots_stats.stats.split_off(&root);
slots_stats.last_cleanup_ts = Instant::now();
}
(e.num_repaired, e.num_recovered)
} else {
(0, 0)
}
};
datapoint_info!( datapoint_info!(
"shred_insert_is_full", "shred_insert_is_full",
( (
@ -1486,6 +1566,8 @@ impl Blockstore {
), ),
("slot", slot_meta.slot, i64), ("slot", slot_meta.slot, i64),
("last_index", slot_meta.last_index, i64), ("last_index", slot_meta.last_index, i64),
("num_repaired", num_repaired, i64),
("num_recovered", num_recovered, i64),
); );
} }
trace!("inserted shred into slot {:?} and index {:?}", slot, index); trace!("inserted shred into slot {:?} and index {:?}", slot, index);
@ -5405,7 +5487,7 @@ pub mod tests {
&HashMap::new(), &HashMap::new(),
&last_root, &last_root,
None, None,
false ShredSource::Turbine
)); ));
// Ensure that an empty shred (one with no data) would get inserted. Such shreds // Ensure that an empty shred (one with no data) would get inserted. Such shreds
@ -5428,7 +5510,7 @@ pub mod tests {
&HashMap::new(), &HashMap::new(),
&last_root, &last_root,
None, None,
false ShredSource::Repaired,
)); ));
empty_shred.data_header.size = 0; empty_shred.data_header.size = 0;
assert!(!blockstore.should_insert_data_shred( assert!(!blockstore.should_insert_data_shred(
@ -5437,7 +5519,7 @@ pub mod tests {
&HashMap::new(), &HashMap::new(),
&last_root, &last_root,
None, None,
false ShredSource::Recovered,
)); ));
// Trying to insert another "is_last" shred with index < the received index should fail // Trying to insert another "is_last" shred with index < the received index should fail
@ -5461,7 +5543,7 @@ pub mod tests {
&HashMap::new(), &HashMap::new(),
&last_root, &last_root,
None, None,
false ShredSource::Repaired,
)); ));
assert!(blockstore.has_duplicate_shreds_in_slot(0)); assert!(blockstore.has_duplicate_shreds_in_slot(0));
@ -5482,7 +5564,7 @@ pub mod tests {
&HashMap::new(), &HashMap::new(),
&last_root, &last_root,
None, None,
false ShredSource::Repaired,
)); ));
} }
Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction"); Blockstore::destroy(&blockstore_path).expect("Expected successful database destruction");
@ -5550,6 +5632,7 @@ pub mod tests {
panic!("no dupes"); panic!("no dupes");
}, },
false, false,
false,
)); ));
// insert again fails on dupe // insert again fails on dupe
@ -5565,6 +5648,7 @@ pub mod tests {
counter.fetch_add(1, Ordering::Relaxed); counter.fetch_add(1, Ordering::Relaxed);
}, },
false, false,
false,
)); ));
assert_eq!(counter.load(Ordering::Relaxed), 1); assert_eq!(counter.load(Ordering::Relaxed), 1);
} }