solana/ledger/tests/shred.rs

262 lines
9.4 KiB
Rust
Raw Normal View History

#![allow(clippy::arithmetic_side_effects)]
use {
solana_entry::entry::Entry,
solana_ledger::shred::{
max_entries_per_n_shred, verify_test_data_shred, ProcessShredsStats, ReedSolomonCache,
Shred, Shredder, DATA_SHREDS_PER_FEC_BLOCK, LEGACY_SHRED_DATA_CAPACITY,
},
solana_sdk::{
clock::Slot,
hash::Hash,
signature::{Keypair, Signer},
system_transaction,
},
std::{
collections::{BTreeMap, HashSet},
convert::TryInto,
sync::Arc,
},
};
type IndexShredsMap = BTreeMap<u32, Vec<Shred>>;
#[test]
fn test_multi_fec_block_coding() {
let keypair = Arc::new(Keypair::new());
let slot = 0x1234_5678_9abc_def0;
2021-06-21 13:12:38 -07:00
let shredder = Shredder::new(slot, slot - 5, 0, 0).unwrap();
let num_fec_sets = 100;
removes buffering when generating coding shreds in broadcast (#25807) Given the 32:32 erasure recovery schema, current implementation requires exactly 32 data shreds to generate coding shreds for the batch (except for the final erasure batch in each slot). As a result, when serializing ledger entries to data shreds, if the number of data shreds is not a multiple of 32, the coding shreds for the last batch cannot be generated until there are more data shreds to complete the batch to 32 data shreds. This adds latency in generating and broadcasting coding shreds. In addition, with Merkle variants for shreds, data shreds cannot be signed and broadcasted until coding shreds are also generated. As a result *both* code and data shreds will be delayed before broadcast if we still require exactly 32 data shreds for each batch. This commit instead always generates and broadcast coding shreds as soon as there any number of data shreds available. When serializing entries to shreds: * if the number of resulting data shreds is less than 32, then more coding shreds will be generated so that the resulting erasure batch has the same recovery probabilities as a 32:32 batch. * if the number of data shreds is more than 32, then the data shreds are split uniformly into erasure batches with _at least_ 32 data shreds in each batch. Each erasure batch will have the same number of code and data shreds. For example: * If there are 19 data shreds, 27 coding shreds are generated. The resulting 19(data):27(code) erasure batch has the same recovery probabilities as a 32:32 batch. * If there are 107 data shreds, they are split into 3 batches of 36:36, 36:36 and 35:35 data:code shreds each. A consequence of this change is that code and data shreds indices will no longer align as there will be more coding shreds than data shreds (not only in the last batch in each slot but also in the intermediate ones);
2022-08-11 05:44:27 -07:00
let num_data_shreds = DATA_SHREDS_PER_FEC_BLOCK * num_fec_sets;
let keypair0 = Keypair::new();
let keypair1 = Keypair::new();
let tx0 = system_transaction::transfer(&keypair0, &keypair1.pubkey(), 1, Hash::default());
let entry = Entry::new(&Hash::default(), 1, vec![tx0]);
let num_entries = max_entries_per_n_shred(
&entry,
num_data_shreds as u64,
Some(LEGACY_SHRED_DATA_CAPACITY),
);
let entries: Vec<_> = (0..num_entries)
.map(|_| {
let keypair0 = Keypair::new();
let keypair1 = Keypair::new();
let tx0 =
system_transaction::transfer(&keypair0, &keypair1.pubkey(), 1, Hash::default());
Entry::new(&Hash::default(), 1, vec![tx0])
})
.collect();
let reed_solomon_cache = ReedSolomonCache::default();
let serialized_entries = bincode::serialize(&entries).unwrap();
let (data_shreds, coding_shreds) = shredder.entries_to_shreds(
&keypair,
&entries,
true, // is_last_in_slot
0, // next_shred_index
0, // next_code_index
false, // merkle_variant
&reed_solomon_cache,
&mut ProcessShredsStats::default(),
);
let next_index = data_shreds.last().unwrap().index() + 1;
assert_eq!(next_index as usize, num_data_shreds);
assert_eq!(data_shreds.len(), num_data_shreds);
assert_eq!(coding_shreds.len(), num_data_shreds);
for c in &coding_shreds {
assert!(!c.is_data());
}
let mut all_shreds = vec![];
for i in 0..num_fec_sets {
removes buffering when generating coding shreds in broadcast (#25807) Given the 32:32 erasure recovery schema, current implementation requires exactly 32 data shreds to generate coding shreds for the batch (except for the final erasure batch in each slot). As a result, when serializing ledger entries to data shreds, if the number of data shreds is not a multiple of 32, the coding shreds for the last batch cannot be generated until there are more data shreds to complete the batch to 32 data shreds. This adds latency in generating and broadcasting coding shreds. In addition, with Merkle variants for shreds, data shreds cannot be signed and broadcasted until coding shreds are also generated. As a result *both* code and data shreds will be delayed before broadcast if we still require exactly 32 data shreds for each batch. This commit instead always generates and broadcast coding shreds as soon as there any number of data shreds available. When serializing entries to shreds: * if the number of resulting data shreds is less than 32, then more coding shreds will be generated so that the resulting erasure batch has the same recovery probabilities as a 32:32 batch. * if the number of data shreds is more than 32, then the data shreds are split uniformly into erasure batches with _at least_ 32 data shreds in each batch. Each erasure batch will have the same number of code and data shreds. For example: * If there are 19 data shreds, 27 coding shreds are generated. The resulting 19(data):27(code) erasure batch has the same recovery probabilities as a 32:32 batch. * If there are 107 data shreds, they are split into 3 batches of 36:36, 36:36 and 35:35 data:code shreds each. A consequence of this change is that code and data shreds indices will no longer align as there will be more coding shreds than data shreds (not only in the last batch in each slot but also in the intermediate ones);
2022-08-11 05:44:27 -07:00
let shred_start_index = DATA_SHREDS_PER_FEC_BLOCK * i;
let end_index = shred_start_index + DATA_SHREDS_PER_FEC_BLOCK - 1;
let fec_set_shreds = data_shreds[shred_start_index..=end_index]
.iter()
.cloned()
.chain(coding_shreds[shred_start_index..=end_index].iter().cloned())
.collect::<Vec<_>>();
let mut shred_info: Vec<Shred> = fec_set_shreds
.iter()
.enumerate()
.filter_map(|(i, b)| if i % 2 != 0 { Some(b.clone()) } else { None })
.collect();
let recovered_data =
Shredder::try_recovery(shred_info.clone(), &reed_solomon_cache).unwrap();
for (i, recovered_shred) in recovered_data.into_iter().enumerate() {
let index = shred_start_index + (i * 2);
verify_test_data_shred(
&recovered_shred,
index.try_into().unwrap(),
slot,
slot - 5,
&keypair.pubkey(),
true,
index == end_index,
index == end_index,
);
shred_info.insert(i * 2, recovered_shred);
}
removes buffering when generating coding shreds in broadcast (#25807) Given the 32:32 erasure recovery schema, current implementation requires exactly 32 data shreds to generate coding shreds for the batch (except for the final erasure batch in each slot). As a result, when serializing ledger entries to data shreds, if the number of data shreds is not a multiple of 32, the coding shreds for the last batch cannot be generated until there are more data shreds to complete the batch to 32 data shreds. This adds latency in generating and broadcasting coding shreds. In addition, with Merkle variants for shreds, data shreds cannot be signed and broadcasted until coding shreds are also generated. As a result *both* code and data shreds will be delayed before broadcast if we still require exactly 32 data shreds for each batch. This commit instead always generates and broadcast coding shreds as soon as there any number of data shreds available. When serializing entries to shreds: * if the number of resulting data shreds is less than 32, then more coding shreds will be generated so that the resulting erasure batch has the same recovery probabilities as a 32:32 batch. * if the number of data shreds is more than 32, then the data shreds are split uniformly into erasure batches with _at least_ 32 data shreds in each batch. Each erasure batch will have the same number of code and data shreds. For example: * If there are 19 data shreds, 27 coding shreds are generated. The resulting 19(data):27(code) erasure batch has the same recovery probabilities as a 32:32 batch. * If there are 107 data shreds, they are split into 3 batches of 36:36, 36:36 and 35:35 data:code shreds each. A consequence of this change is that code and data shreds indices will no longer align as there will be more coding shreds than data shreds (not only in the last batch in each slot but also in the intermediate ones);
2022-08-11 05:44:27 -07:00
all_shreds.extend(shred_info.into_iter().take(DATA_SHREDS_PER_FEC_BLOCK));
}
let result = Shredder::deshred(&all_shreds[..]).unwrap();
assert_eq!(serialized_entries[..], result[..serialized_entries.len()]);
}
#[test]
fn test_multi_fec_block_different_size_coding() {
let slot = 0x1234_5678_9abc_def0;
let parent_slot = slot - 5;
let keypair = Arc::new(Keypair::new());
let (fec_data, fec_coding, num_shreds_per_iter) =
setup_different_sized_fec_blocks(slot, parent_slot, keypair.clone());
let total_num_data_shreds: usize = fec_data.values().map(|x| x.len()).sum();
let reed_solomon_cache = ReedSolomonCache::default();
// Test recovery
for (fec_data_shreds, fec_coding_shreds) in fec_data.values().zip(fec_coding.values()) {
let first_data_index = fec_data_shreds.first().unwrap().index() as usize;
let all_shreds: Vec<Shred> = fec_data_shreds
.iter()
.step_by(2)
.chain(fec_coding_shreds.iter().step_by(2))
.cloned()
.collect();
let recovered_data = Shredder::try_recovery(all_shreds, &reed_solomon_cache).unwrap();
// Necessary in order to ensure the last shred in the slot
// is part of the recovered set, and that the below `index`
// calculation in the loop is correct
assert!(fec_data_shreds.len() % 2 == 0);
for (i, recovered_shred) in recovered_data.into_iter().enumerate() {
let index = first_data_index + (i * 2) + 1;
verify_test_data_shred(
&recovered_shred,
index.try_into().unwrap(),
slot,
parent_slot,
&keypair.pubkey(),
true,
index == total_num_data_shreds - 1,
index % num_shreds_per_iter == num_shreds_per_iter - 1,
);
}
}
}
fn sort_data_coding_into_fec_sets(
data_shreds: Vec<Shred>,
coding_shreds: Vec<Shred>,
fec_data: &mut IndexShredsMap,
fec_coding: &mut IndexShredsMap,
data_slot_and_index: &mut HashSet<(Slot, u32)>,
coding_slot_and_index: &mut HashSet<(Slot, u32)>,
) {
for shred in data_shreds {
assert!(shred.is_data());
let key = (shred.slot(), shred.index());
// Make sure there are no duplicates for same key
assert!(!data_slot_and_index.contains(&key));
data_slot_and_index.insert(key);
let fec_entry = fec_data.entry(shred.fec_set_index()).or_default();
fec_entry.push(shred);
}
for shred in coding_shreds {
assert!(!shred.is_data());
let key = (shred.slot(), shred.index());
// Make sure there are no duplicates for same key
assert!(!coding_slot_and_index.contains(&key));
coding_slot_and_index.insert(key);
let fec_entry = fec_coding.entry(shred.fec_set_index()).or_default();
fec_entry.push(shred);
}
}
#[allow(clippy::assertions_on_constants)]
fn setup_different_sized_fec_blocks(
slot: Slot,
parent_slot: Slot,
keypair: Arc<Keypair>,
) -> (IndexShredsMap, IndexShredsMap, usize) {
2021-06-21 13:12:38 -07:00
let shredder = Shredder::new(slot, parent_slot, 0, 0).unwrap();
let keypair0 = Keypair::new();
let keypair1 = Keypair::new();
let tx0 = system_transaction::transfer(&keypair0, &keypair1.pubkey(), 1, Hash::default());
let entry = Entry::new(&Hash::default(), 1, vec![tx0]);
removes buffering when generating coding shreds in broadcast (#25807) Given the 32:32 erasure recovery schema, current implementation requires exactly 32 data shreds to generate coding shreds for the batch (except for the final erasure batch in each slot). As a result, when serializing ledger entries to data shreds, if the number of data shreds is not a multiple of 32, the coding shreds for the last batch cannot be generated until there are more data shreds to complete the batch to 32 data shreds. This adds latency in generating and broadcasting coding shreds. In addition, with Merkle variants for shreds, data shreds cannot be signed and broadcasted until coding shreds are also generated. As a result *both* code and data shreds will be delayed before broadcast if we still require exactly 32 data shreds for each batch. This commit instead always generates and broadcast coding shreds as soon as there any number of data shreds available. When serializing entries to shreds: * if the number of resulting data shreds is less than 32, then more coding shreds will be generated so that the resulting erasure batch has the same recovery probabilities as a 32:32 batch. * if the number of data shreds is more than 32, then the data shreds are split uniformly into erasure batches with _at least_ 32 data shreds in each batch. Each erasure batch will have the same number of code and data shreds. For example: * If there are 19 data shreds, 27 coding shreds are generated. The resulting 19(data):27(code) erasure batch has the same recovery probabilities as a 32:32 batch. * If there are 107 data shreds, they are split into 3 batches of 36:36, 36:36 and 35:35 data:code shreds each. A consequence of this change is that code and data shreds indices will no longer align as there will be more coding shreds than data shreds (not only in the last batch in each slot but also in the intermediate ones);
2022-08-11 05:44:27 -07:00
// Make enough entries for `DATA_SHREDS_PER_FEC_BLOCK + 2` shreds so one
// fec set will have `DATA_SHREDS_PER_FEC_BLOCK` shreds and the next
// will have 2 shreds.
removes buffering when generating coding shreds in broadcast (#25807) Given the 32:32 erasure recovery schema, current implementation requires exactly 32 data shreds to generate coding shreds for the batch (except for the final erasure batch in each slot). As a result, when serializing ledger entries to data shreds, if the number of data shreds is not a multiple of 32, the coding shreds for the last batch cannot be generated until there are more data shreds to complete the batch to 32 data shreds. This adds latency in generating and broadcasting coding shreds. In addition, with Merkle variants for shreds, data shreds cannot be signed and broadcasted until coding shreds are also generated. As a result *both* code and data shreds will be delayed before broadcast if we still require exactly 32 data shreds for each batch. This commit instead always generates and broadcast coding shreds as soon as there any number of data shreds available. When serializing entries to shreds: * if the number of resulting data shreds is less than 32, then more coding shreds will be generated so that the resulting erasure batch has the same recovery probabilities as a 32:32 batch. * if the number of data shreds is more than 32, then the data shreds are split uniformly into erasure batches with _at least_ 32 data shreds in each batch. Each erasure batch will have the same number of code and data shreds. For example: * If there are 19 data shreds, 27 coding shreds are generated. The resulting 19(data):27(code) erasure batch has the same recovery probabilities as a 32:32 batch. * If there are 107 data shreds, they are split into 3 batches of 36:36, 36:36 and 35:35 data:code shreds each. A consequence of this change is that code and data shreds indices will no longer align as there will be more coding shreds than data shreds (not only in the last batch in each slot but also in the intermediate ones);
2022-08-11 05:44:27 -07:00
assert!(DATA_SHREDS_PER_FEC_BLOCK > 2);
let num_shreds_per_iter = DATA_SHREDS_PER_FEC_BLOCK + 2;
let num_entries = max_entries_per_n_shred(
&entry,
num_shreds_per_iter as u64,
Some(LEGACY_SHRED_DATA_CAPACITY),
);
let entries: Vec<_> = (0..num_entries)
.map(|_| {
let keypair0 = Keypair::new();
let keypair1 = Keypair::new();
let tx0 =
system_transaction::transfer(&keypair0, &keypair1.pubkey(), 1, Hash::default());
Entry::new(&Hash::default(), 1, vec![tx0])
})
.collect();
// Run the shredder twice, generate data and coding shreds
let mut next_shred_index = 0;
let mut next_code_index = 0;
let mut fec_data = BTreeMap::new();
let mut fec_coding = BTreeMap::new();
let mut data_slot_and_index = HashSet::new();
let mut coding_slot_and_index = HashSet::new();
let total_num_data_shreds: usize = 2 * num_shreds_per_iter;
let reed_solomon_cache = ReedSolomonCache::default();
for i in 0..2 {
let is_last = i == 1;
let (data_shreds, coding_shreds) = shredder.entries_to_shreds(
&keypair,
&entries,
is_last,
next_shred_index,
next_code_index,
false, // merkle_variant
&reed_solomon_cache,
&mut ProcessShredsStats::default(),
);
for shred in &data_shreds {
if (shred.index() as usize) == total_num_data_shreds - 1 {
assert!(shred.data_complete());
assert!(shred.last_in_slot());
} else if (shred.index() as usize) % num_shreds_per_iter == num_shreds_per_iter - 1 {
assert!(shred.data_complete());
} else {
assert!(!shred.data_complete());
assert!(!shred.last_in_slot());
}
}
2022-11-09 11:39:38 -08:00
assert_eq!(data_shreds.len(), num_shreds_per_iter);
next_shred_index = data_shreds.last().unwrap().index() + 1;
next_code_index = coding_shreds.last().unwrap().index() + 1;
sort_data_coding_into_fec_sets(
data_shreds,
coding_shreds,
&mut fec_data,
&mut fec_coding,
&mut data_slot_and_index,
&mut coding_slot_and_index,
);
}
assert_eq!(fec_data.len(), fec_coding.len());
(fec_data, fec_coding, num_shreds_per_iter)
}