Refactor blockstore recovery code (#10008)
This commit is contained in:
parent
1e80044e93
commit
9575afc8fa
|
@ -547,49 +547,18 @@ impl Blockstore {
|
||||||
Ok(slot_iterator.map(move |(rooted_slot, _)| rooted_slot))
|
Ok(slot_iterator.map(move |(rooted_slot, _)| rooted_slot))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_shred_recovery(
|
fn get_recovery_data_shreds(
|
||||||
db: &Database,
|
index: &mut Index,
|
||||||
erasure_metas: &HashMap<(u64, u64), ErasureMeta>,
|
set_index: u64,
|
||||||
index_working_set: &mut HashMap<u64, IndexMetaWorkingSetEntry>,
|
slot: Slot,
|
||||||
|
erasure_meta: &ErasureMeta,
|
||||||
|
available_shreds: &mut Vec<Shred>,
|
||||||
prev_inserted_datas: &mut HashMap<(u64, u64), Shred>,
|
prev_inserted_datas: &mut HashMap<(u64, u64), Shred>,
|
||||||
prev_inserted_codes: &mut HashMap<(u64, u64), Shred>,
|
data_cf: &LedgerColumn<cf::ShredData>,
|
||||||
) -> Vec<Shred> {
|
) {
|
||||||
let data_cf = db.column::<cf::ShredData>();
|
|
||||||
let code_cf = db.column::<cf::ShredCode>();
|
|
||||||
let mut recovered_data_shreds = vec![];
|
|
||||||
// Recovery rules:
|
|
||||||
// 1. Only try recovery around indexes for which new data or coding shreds are received
|
|
||||||
// 2. For new data shreds, check if an erasure set exists. If not, don't try recovery
|
|
||||||
// 3. Before trying recovery, check if enough number of shreds have been received
|
|
||||||
// 3a. Enough number of shreds = (#data + #coding shreds) > erasure.num_data
|
|
||||||
for (&(slot, set_index), erasure_meta) in erasure_metas.iter() {
|
|
||||||
let submit_metrics = |attempted: bool, status: String, recovered: usize| {
|
|
||||||
datapoint_debug!(
|
|
||||||
"blockstore-erasure",
|
|
||||||
("slot", slot as i64, i64),
|
|
||||||
("start_index", set_index as i64, i64),
|
|
||||||
(
|
|
||||||
"end_index",
|
|
||||||
(erasure_meta.set_index + erasure_meta.config.num_data() as u64) as i64,
|
|
||||||
i64
|
|
||||||
),
|
|
||||||
("recovery_attempted", attempted, bool),
|
|
||||||
("recovery_status", status, String),
|
|
||||||
("recovered", recovered as i64, i64),
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
let index_meta_entry = index_working_set.get_mut(&slot).expect("Index");
|
|
||||||
let index = &mut index_meta_entry.index;
|
|
||||||
match erasure_meta.status(&index) {
|
|
||||||
ErasureMetaStatus::CanRecover => {
|
|
||||||
// Find shreds for this erasure set and try recovery
|
|
||||||
let slot = index.slot;
|
|
||||||
let mut available_shreds = vec![];
|
|
||||||
(set_index..set_index + erasure_meta.config.num_data() as u64).for_each(|i| {
|
(set_index..set_index + erasure_meta.config.num_data() as u64).for_each(|i| {
|
||||||
if index.data().is_present(i) {
|
if index.data().is_present(i) {
|
||||||
if let Some(shred) =
|
if let Some(shred) = prev_inserted_datas.remove(&(slot, i)).or_else(|| {
|
||||||
prev_inserted_datas.remove(&(slot, i)).or_else(|| {
|
|
||||||
let some_data = data_cf
|
let some_data = data_cf
|
||||||
.get_bytes((slot, i))
|
.get_bytes((slot, i))
|
||||||
.expect("Database failure, could not fetch data shred");
|
.expect("Database failure, could not fetch data shred");
|
||||||
|
@ -599,15 +568,23 @@ impl Blockstore {
|
||||||
warn!("Data shred deleted while reading for recovery");
|
warn!("Data shred deleted while reading for recovery");
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
})
|
}) {
|
||||||
{
|
|
||||||
available_shreds.push(shred);
|
available_shreds.push(shred);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_recovery_coding_shreds(
|
||||||
|
index: &mut Index,
|
||||||
|
slot: Slot,
|
||||||
|
erasure_meta: &ErasureMeta,
|
||||||
|
available_shreds: &mut Vec<Shred>,
|
||||||
|
prev_inserted_codes: &mut HashMap<(u64, u64), Shred>,
|
||||||
|
code_cf: &LedgerColumn<cf::ShredCode>,
|
||||||
|
) {
|
||||||
(erasure_meta.first_coding_index
|
(erasure_meta.first_coding_index
|
||||||
..erasure_meta.first_coding_index
|
..erasure_meta.first_coding_index + erasure_meta.config.num_coding() as u64)
|
||||||
+ erasure_meta.config.num_coding() as u64)
|
|
||||||
.for_each(|i| {
|
.for_each(|i| {
|
||||||
if let Some(shred) = prev_inserted_codes
|
if let Some(shred) = prev_inserted_codes
|
||||||
.remove(&(slot, i))
|
.remove(&(slot, i))
|
||||||
|
@ -638,6 +615,41 @@ impl Blockstore {
|
||||||
available_shreds.push(shred);
|
available_shreds.push(shred);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn recover_shreds(
|
||||||
|
index: &mut Index,
|
||||||
|
set_index: u64,
|
||||||
|
erasure_meta: &ErasureMeta,
|
||||||
|
prev_inserted_datas: &mut HashMap<(u64, u64), Shred>,
|
||||||
|
prev_inserted_codes: &mut HashMap<(u64, u64), Shred>,
|
||||||
|
recovered_data_shreds: &mut Vec<Shred>,
|
||||||
|
data_cf: &LedgerColumn<cf::ShredData>,
|
||||||
|
code_cf: &LedgerColumn<cf::ShredCode>,
|
||||||
|
) {
|
||||||
|
// Find shreds for this erasure set and try recovery
|
||||||
|
let slot = index.slot;
|
||||||
|
let mut available_shreds = vec![];
|
||||||
|
|
||||||
|
Self::get_recovery_data_shreds(
|
||||||
|
index,
|
||||||
|
set_index,
|
||||||
|
slot,
|
||||||
|
erasure_meta,
|
||||||
|
&mut available_shreds,
|
||||||
|
prev_inserted_datas,
|
||||||
|
data_cf,
|
||||||
|
);
|
||||||
|
|
||||||
|
Self::get_recovery_coding_shreds(
|
||||||
|
index,
|
||||||
|
slot,
|
||||||
|
erasure_meta,
|
||||||
|
&mut available_shreds,
|
||||||
|
prev_inserted_codes,
|
||||||
|
code_cf,
|
||||||
|
);
|
||||||
|
|
||||||
if let Ok(mut result) = Shredder::try_recovery(
|
if let Ok(mut result) = Shredder::try_recovery(
|
||||||
available_shreds,
|
available_shreds,
|
||||||
erasure_meta.config.num_data(),
|
erasure_meta.config.num_data(),
|
||||||
|
@ -646,12 +658,74 @@ impl Blockstore {
|
||||||
erasure_meta.first_coding_index as usize,
|
erasure_meta.first_coding_index as usize,
|
||||||
slot,
|
slot,
|
||||||
) {
|
) {
|
||||||
submit_metrics(true, "complete".into(), result.len());
|
Self::submit_metrics(
|
||||||
|
slot,
|
||||||
|
set_index,
|
||||||
|
erasure_meta,
|
||||||
|
true,
|
||||||
|
"complete".into(),
|
||||||
|
result.len(),
|
||||||
|
);
|
||||||
recovered_data_shreds.append(&mut result);
|
recovered_data_shreds.append(&mut result);
|
||||||
} else {
|
} else {
|
||||||
submit_metrics(true, "incomplete".into(), 0);
|
Self::submit_metrics(slot, set_index, erasure_meta, true, "incomplete".into(), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn submit_metrics(
|
||||||
|
slot: Slot,
|
||||||
|
set_index: u64,
|
||||||
|
erasure_meta: &ErasureMeta,
|
||||||
|
attempted: bool,
|
||||||
|
status: String,
|
||||||
|
recovered: usize,
|
||||||
|
) {
|
||||||
|
datapoint_debug!(
|
||||||
|
"blockstore-erasure",
|
||||||
|
("slot", slot as i64, i64),
|
||||||
|
("start_index", set_index as i64, i64),
|
||||||
|
(
|
||||||
|
"end_index",
|
||||||
|
(erasure_meta.set_index + erasure_meta.config.num_data() as u64) as i64,
|
||||||
|
i64
|
||||||
|
),
|
||||||
|
("recovery_attempted", attempted, bool),
|
||||||
|
("recovery_status", status, String),
|
||||||
|
("recovered", recovered as i64, i64),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn try_shred_recovery(
|
||||||
|
db: &Database,
|
||||||
|
erasure_metas: &HashMap<(u64, u64), ErasureMeta>,
|
||||||
|
index_working_set: &mut HashMap<u64, IndexMetaWorkingSetEntry>,
|
||||||
|
prev_inserted_datas: &mut HashMap<(u64, u64), Shred>,
|
||||||
|
prev_inserted_codes: &mut HashMap<(u64, u64), Shred>,
|
||||||
|
) -> Vec<Shred> {
|
||||||
|
let data_cf = db.column::<cf::ShredData>();
|
||||||
|
let code_cf = db.column::<cf::ShredCode>();
|
||||||
|
let mut recovered_data_shreds = vec![];
|
||||||
|
// Recovery rules:
|
||||||
|
// 1. Only try recovery around indexes for which new data or coding shreds are received
|
||||||
|
// 2. For new data shreds, check if an erasure set exists. If not, don't try recovery
|
||||||
|
// 3. Before trying recovery, check if enough number of shreds have been received
|
||||||
|
// 3a. Enough number of shreds = (#data + #coding shreds) > erasure.num_data
|
||||||
|
for (&(slot, set_index), erasure_meta) in erasure_metas.iter() {
|
||||||
|
let index_meta_entry = index_working_set.get_mut(&slot).expect("Index");
|
||||||
|
let index = &mut index_meta_entry.index;
|
||||||
|
match erasure_meta.status(&index) {
|
||||||
|
ErasureMetaStatus::CanRecover => {
|
||||||
|
Self::recover_shreds(
|
||||||
|
index,
|
||||||
|
set_index,
|
||||||
|
erasure_meta,
|
||||||
|
prev_inserted_datas,
|
||||||
|
prev_inserted_codes,
|
||||||
|
&mut recovered_data_shreds,
|
||||||
|
&data_cf,
|
||||||
|
&code_cf,
|
||||||
|
);
|
||||||
|
}
|
||||||
ErasureMetaStatus::DataFull => {
|
ErasureMetaStatus::DataFull => {
|
||||||
(set_index..set_index + erasure_meta.config.num_coding() as u64).for_each(
|
(set_index..set_index + erasure_meta.config.num_coding() as u64).for_each(
|
||||||
|i| {
|
|i| {
|
||||||
|
@ -665,10 +739,24 @@ impl Blockstore {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
submit_metrics(false, "complete".into(), 0);
|
Self::submit_metrics(
|
||||||
|
slot,
|
||||||
|
set_index,
|
||||||
|
erasure_meta,
|
||||||
|
false,
|
||||||
|
"complete".into(),
|
||||||
|
0,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
ErasureMetaStatus::StillNeed(needed) => {
|
ErasureMetaStatus::StillNeed(needed) => {
|
||||||
submit_metrics(false, format!("still need: {}", needed), 0);
|
Self::submit_metrics(
|
||||||
|
slot,
|
||||||
|
set_index,
|
||||||
|
erasure_meta,
|
||||||
|
false,
|
||||||
|
format!("still need: {}", needed),
|
||||||
|
0,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue