2021-02-16 13:48:20 -08:00
|
|
|
#![allow(clippy::integer_arithmetic)]
|
2019-09-18 12:35:52 -07:00
|
|
|
#![feature(test)]
|
|
|
|
|
|
|
|
extern crate test;
|
|
|
|
|
2021-12-03 09:00:31 -08:00
|
|
|
use {
|
|
|
|
rand::seq::SliceRandom,
|
|
|
|
raptorq::{Decoder, Encoder},
|
|
|
|
solana_entry::entry::{create_ticks, Entry},
|
|
|
|
solana_ledger::shred::{
|
2022-09-25 11:09:47 -07:00
|
|
|
max_entries_per_n_shred, max_ticks_per_n_shreds, ProcessShredsStats, ReedSolomonCache,
|
|
|
|
Shred, ShredFlags, Shredder, DATA_SHREDS_PER_FEC_BLOCK, LEGACY_SHRED_DATA_CAPACITY,
|
2021-12-03 09:00:31 -08:00
|
|
|
},
|
|
|
|
solana_perf::test_tx,
|
2022-04-25 05:43:22 -07:00
|
|
|
solana_sdk::{hash::Hash, packet::PACKET_DATA_SIZE, signature::Keypair},
|
2021-12-03 09:00:31 -08:00
|
|
|
test::Bencher,
|
2019-10-08 00:42:51 -07:00
|
|
|
};
|
2019-09-18 12:35:52 -07:00
|
|
|
|
2022-04-25 05:43:22 -07:00
|
|
|
// Copied these values here to avoid exposing shreds
|
|
|
|
// internals only for the sake of benchmarks.
|
|
|
|
|
|
|
|
// size of nonce: 4
|
|
|
|
// size of common shred header: 83
|
|
|
|
// size of coding shred header: 6
|
|
|
|
const VALID_SHRED_DATA_LEN: usize = PACKET_DATA_SIZE - 4 - 83 - 6;
|
|
|
|
|
2019-10-10 14:00:24 -07:00
|
|
|
fn make_test_entry(txs_per_entry: u64) -> Entry {
|
|
|
|
Entry {
|
|
|
|
num_hashes: 100_000,
|
|
|
|
hash: Hash::default(),
|
2021-08-17 15:17:56 -07:00
|
|
|
transactions: vec![test_tx::test_tx().into(); txs_per_entry as usize],
|
2019-10-10 14:00:24 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
fn make_large_unchained_entries(txs_per_entry: u64, num_entries: u64) -> Vec<Entry> {
|
|
|
|
(0..num_entries)
|
|
|
|
.map(|_| make_test_entry(txs_per_entry))
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2020-07-02 18:31:32 -07:00
|
|
|
fn make_shreds(num_shreds: usize) -> Vec<Shred> {
|
|
|
|
let txs_per_entry = 128;
|
|
|
|
let num_entries = max_entries_per_n_shred(
|
|
|
|
&make_test_entry(txs_per_entry),
|
|
|
|
2 * num_shreds as u64,
|
2022-05-30 05:51:19 -07:00
|
|
|
Some(LEGACY_SHRED_DATA_CAPACITY),
|
2020-07-02 18:31:32 -07:00
|
|
|
);
|
|
|
|
let entries = make_large_unchained_entries(txs_per_entry, num_entries);
|
2021-06-21 13:12:38 -07:00
|
|
|
let shredder = Shredder::new(1, 0, 0, 0).unwrap();
|
2022-06-23 06:27:54 -07:00
|
|
|
let (data_shreds, _) = shredder.entries_to_shreds(
|
2021-12-17 07:01:55 -08:00
|
|
|
&Keypair::new(),
|
|
|
|
&entries,
|
2022-09-15 14:51:41 -07:00
|
|
|
true, // is_last_in_slot
|
|
|
|
0, // next_shred_index
|
|
|
|
0, // next_code_index
|
|
|
|
false, // merkle_variant
|
2022-09-25 11:09:47 -07:00
|
|
|
&ReedSolomonCache::default(),
|
2021-12-17 07:01:55 -08:00
|
|
|
&mut ProcessShredsStats::default(),
|
|
|
|
);
|
2020-07-02 18:31:32 -07:00
|
|
|
assert!(data_shreds.len() >= num_shreds);
|
|
|
|
data_shreds
|
|
|
|
}
|
|
|
|
|
|
|
|
fn make_concatenated_shreds(num_shreds: usize) -> Vec<u8> {
|
|
|
|
let data_shreds = make_shreds(num_shreds);
|
2022-04-25 05:43:22 -07:00
|
|
|
let mut data: Vec<u8> = vec![0; num_shreds * VALID_SHRED_DATA_LEN];
|
2020-07-02 18:31:32 -07:00
|
|
|
for (i, shred) in (data_shreds[0..num_shreds]).iter().enumerate() {
|
2022-04-25 05:43:22 -07:00
|
|
|
data[i * VALID_SHRED_DATA_LEN..(i + 1) * VALID_SHRED_DATA_LEN]
|
|
|
|
.copy_from_slice(&shred.payload()[..VALID_SHRED_DATA_LEN]);
|
2020-07-02 18:31:32 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
data
|
|
|
|
}
|
|
|
|
|
2019-09-18 12:35:52 -07:00
|
|
|
#[bench]
|
2019-10-10 14:00:24 -07:00
|
|
|
fn bench_shredder_ticks(bencher: &mut Bencher) {
|
2021-06-21 13:12:38 -07:00
|
|
|
let kp = Keypair::new();
|
2022-05-30 05:51:19 -07:00
|
|
|
let shred_size = LEGACY_SHRED_DATA_CAPACITY;
|
2019-10-08 00:42:51 -07:00
|
|
|
let num_shreds = ((1000 * 1000) + (shred_size - 1)) / shred_size;
|
|
|
|
// ~1Mb
|
2022-05-30 05:51:19 -07:00
|
|
|
let num_ticks = max_ticks_per_n_shreds(1, Some(LEGACY_SHRED_DATA_CAPACITY)) * num_shreds as u64;
|
2019-10-31 13:38:50 -07:00
|
|
|
let entries = create_ticks(num_ticks, 0, Hash::default());
|
2022-09-25 11:09:47 -07:00
|
|
|
let reed_solomon_cache = ReedSolomonCache::default();
|
2019-09-18 12:35:52 -07:00
|
|
|
bencher.iter(|| {
|
2021-06-21 13:12:38 -07:00
|
|
|
let shredder = Shredder::new(1, 0, 0, 0).unwrap();
|
2022-06-23 06:27:54 -07:00
|
|
|
shredder.entries_to_shreds(
|
|
|
|
&kp,
|
|
|
|
&entries,
|
|
|
|
true,
|
|
|
|
0,
|
|
|
|
0,
|
2022-09-15 14:51:41 -07:00
|
|
|
true, // merkle_variant
|
2022-09-25 11:09:47 -07:00
|
|
|
&reed_solomon_cache,
|
2022-06-23 06:27:54 -07:00
|
|
|
&mut ProcessShredsStats::default(),
|
|
|
|
);
|
2019-09-18 12:35:52 -07:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-10-10 14:00:24 -07:00
|
|
|
#[bench]
|
|
|
|
fn bench_shredder_large_entries(bencher: &mut Bencher) {
|
2021-06-21 13:12:38 -07:00
|
|
|
let kp = Keypair::new();
|
2022-05-30 05:51:19 -07:00
|
|
|
let shred_size = LEGACY_SHRED_DATA_CAPACITY;
|
2019-10-10 14:00:24 -07:00
|
|
|
let num_shreds = ((1000 * 1000) + (shred_size - 1)) / shred_size;
|
|
|
|
let txs_per_entry = 128;
|
2020-05-19 12:38:18 -07:00
|
|
|
let num_entries = max_entries_per_n_shred(
|
|
|
|
&make_test_entry(txs_per_entry),
|
|
|
|
num_shreds as u64,
|
|
|
|
Some(shred_size),
|
|
|
|
);
|
2019-10-10 14:00:24 -07:00
|
|
|
let entries = make_large_unchained_entries(txs_per_entry, num_entries);
|
2022-09-25 11:09:47 -07:00
|
|
|
let reed_solomon_cache = ReedSolomonCache::default();
|
2019-10-10 14:00:24 -07:00
|
|
|
// 1Mb
|
|
|
|
bencher.iter(|| {
|
2021-06-21 13:12:38 -07:00
|
|
|
let shredder = Shredder::new(1, 0, 0, 0).unwrap();
|
2022-06-23 06:27:54 -07:00
|
|
|
shredder.entries_to_shreds(
|
|
|
|
&kp,
|
|
|
|
&entries,
|
|
|
|
true,
|
|
|
|
0,
|
|
|
|
0,
|
2022-09-15 14:51:41 -07:00
|
|
|
true, // merkle_variant
|
2022-09-25 11:09:47 -07:00
|
|
|
&reed_solomon_cache,
|
2022-06-23 06:27:54 -07:00
|
|
|
&mut ProcessShredsStats::default(),
|
|
|
|
);
|
2019-10-10 14:00:24 -07:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-09-18 12:35:52 -07:00
|
|
|
#[bench]
|
|
|
|
fn bench_deshredder(bencher: &mut Bencher) {
|
2021-06-21 13:12:38 -07:00
|
|
|
let kp = Keypair::new();
|
2022-05-30 05:51:19 -07:00
|
|
|
let shred_size = LEGACY_SHRED_DATA_CAPACITY;
|
2019-10-08 00:42:51 -07:00
|
|
|
// ~10Mb
|
|
|
|
let num_shreds = ((10000 * 1000) + (shred_size - 1)) / shred_size;
|
2020-05-19 12:38:18 -07:00
|
|
|
let num_ticks = max_ticks_per_n_shreds(1, Some(shred_size)) * num_shreds as u64;
|
2019-10-31 13:38:50 -07:00
|
|
|
let entries = create_ticks(num_ticks, 0, Hash::default());
|
2021-06-21 13:12:38 -07:00
|
|
|
let shredder = Shredder::new(1, 0, 0, 0).unwrap();
|
2022-06-23 06:27:54 -07:00
|
|
|
let (data_shreds, _) = shredder.entries_to_shreds(
|
|
|
|
&kp,
|
|
|
|
&entries,
|
|
|
|
true,
|
|
|
|
0,
|
|
|
|
0,
|
2022-09-15 14:51:41 -07:00
|
|
|
true, // merkle_variant
|
2022-09-25 11:09:47 -07:00
|
|
|
&ReedSolomonCache::default(),
|
2022-06-23 06:27:54 -07:00
|
|
|
&mut ProcessShredsStats::default(),
|
|
|
|
);
|
2019-09-18 12:35:52 -07:00
|
|
|
bencher.iter(|| {
|
2019-10-08 00:42:51 -07:00
|
|
|
let raw = &mut Shredder::deshred(&data_shreds).unwrap();
|
2019-09-18 12:35:52 -07:00
|
|
|
assert_ne!(raw.len(), 0);
|
|
|
|
})
|
|
|
|
}
|
2019-10-15 15:18:23 -07:00
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_deserialize_hdr(bencher: &mut Bencher) {
|
2022-05-30 05:51:19 -07:00
|
|
|
let data = vec![0; LEGACY_SHRED_DATA_CAPACITY];
|
2019-10-15 15:18:23 -07:00
|
|
|
|
2022-05-02 16:33:53 -07:00
|
|
|
let shred = Shred::new_from_data(2, 1, 1, &data, ShredFlags::LAST_SHRED_IN_SLOT, 0, 0, 1);
|
2019-10-15 15:18:23 -07:00
|
|
|
|
|
|
|
bencher.iter(|| {
|
2022-04-25 05:43:22 -07:00
|
|
|
let payload = shred.payload().clone();
|
2019-10-15 15:18:23 -07:00
|
|
|
let _ = Shred::new_from_serialized_shred(payload).unwrap();
|
|
|
|
})
|
|
|
|
}
|
2020-07-02 18:31:32 -07:00
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_shredder_coding(bencher: &mut Bencher) {
|
removes buffering when generating coding shreds in broadcast (#25807)
Given the 32:32 erasure recovery schema, current implementation requires
exactly 32 data shreds to generate coding shreds for the batch (except
for the final erasure batch in each slot).
As a result, when serializing ledger entries to data shreds, if the
number of data shreds is not a multiple of 32, the coding shreds for the
last batch cannot be generated until there are more data shreds to
complete the batch to 32 data shreds. This adds latency in generating
and broadcasting coding shreds.
In addition, with Merkle variants for shreds, data shreds cannot be
signed and broadcasted until coding shreds are also generated. As a
result *both* code and data shreds will be delayed before broadcast if
we still require exactly 32 data shreds for each batch.
This commit instead always generates and broadcast coding shreds as soon
as there any number of data shreds available. When serializing entries
to shreds:
* if the number of resulting data shreds is less than 32, then more
coding shreds will be generated so that the resulting erasure batch
has the same recovery probabilities as a 32:32 batch.
* if the number of data shreds is more than 32, then the data shreds are
split uniformly into erasure batches with _at least_ 32 data shreds in
each batch. Each erasure batch will have the same number of code and
data shreds.
For example:
* If there are 19 data shreds, 27 coding shreds are generated. The
resulting 19(data):27(code) erasure batch has the same recovery
probabilities as a 32:32 batch.
* If there are 107 data shreds, they are split into 3 batches of 36:36,
36:36 and 35:35 data:code shreds each.
A consequence of this change is that code and data shreds indices will
no longer align as there will be more coding shreds than data shreds
(not only in the last batch in each slot but also in the intermediate
ones);
2022-08-11 05:44:27 -07:00
|
|
|
let symbol_count = DATA_SHREDS_PER_FEC_BLOCK;
|
2020-07-02 18:31:32 -07:00
|
|
|
let data_shreds = make_shreds(symbol_count);
|
2022-09-25 11:09:47 -07:00
|
|
|
let reed_solomon_cache = ReedSolomonCache::default();
|
2020-07-02 18:31:32 -07:00
|
|
|
bencher.iter(|| {
|
2020-12-09 23:14:31 -08:00
|
|
|
Shredder::generate_coding_shreds(
|
|
|
|
&data_shreds[..symbol_count],
|
removes buffering when generating coding shreds in broadcast (#25807)
Given the 32:32 erasure recovery schema, current implementation requires
exactly 32 data shreds to generate coding shreds for the batch (except
for the final erasure batch in each slot).
As a result, when serializing ledger entries to data shreds, if the
number of data shreds is not a multiple of 32, the coding shreds for the
last batch cannot be generated until there are more data shreds to
complete the batch to 32 data shreds. This adds latency in generating
and broadcasting coding shreds.
In addition, with Merkle variants for shreds, data shreds cannot be
signed and broadcasted until coding shreds are also generated. As a
result *both* code and data shreds will be delayed before broadcast if
we still require exactly 32 data shreds for each batch.
This commit instead always generates and broadcast coding shreds as soon
as there any number of data shreds available. When serializing entries
to shreds:
* if the number of resulting data shreds is less than 32, then more
coding shreds will be generated so that the resulting erasure batch
has the same recovery probabilities as a 32:32 batch.
* if the number of data shreds is more than 32, then the data shreds are
split uniformly into erasure batches with _at least_ 32 data shreds in
each batch. Each erasure batch will have the same number of code and
data shreds.
For example:
* If there are 19 data shreds, 27 coding shreds are generated. The
resulting 19(data):27(code) erasure batch has the same recovery
probabilities as a 32:32 batch.
* If there are 107 data shreds, they are split into 3 batches of 36:36,
36:36 and 35:35 data:code shreds each.
A consequence of this change is that code and data shreds indices will
no longer align as there will be more coding shreds than data shreds
(not only in the last batch in each slot but also in the intermediate
ones);
2022-08-11 05:44:27 -07:00
|
|
|
0, // next_code_index
|
2022-09-25 11:09:47 -07:00
|
|
|
&reed_solomon_cache,
|
2020-12-09 23:14:31 -08:00
|
|
|
)
|
|
|
|
.len();
|
2020-07-02 18:31:32 -07:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_shredder_decoding(bencher: &mut Bencher) {
|
removes buffering when generating coding shreds in broadcast (#25807)
Given the 32:32 erasure recovery schema, current implementation requires
exactly 32 data shreds to generate coding shreds for the batch (except
for the final erasure batch in each slot).
As a result, when serializing ledger entries to data shreds, if the
number of data shreds is not a multiple of 32, the coding shreds for the
last batch cannot be generated until there are more data shreds to
complete the batch to 32 data shreds. This adds latency in generating
and broadcasting coding shreds.
In addition, with Merkle variants for shreds, data shreds cannot be
signed and broadcasted until coding shreds are also generated. As a
result *both* code and data shreds will be delayed before broadcast if
we still require exactly 32 data shreds for each batch.
This commit instead always generates and broadcast coding shreds as soon
as there any number of data shreds available. When serializing entries
to shreds:
* if the number of resulting data shreds is less than 32, then more
coding shreds will be generated so that the resulting erasure batch
has the same recovery probabilities as a 32:32 batch.
* if the number of data shreds is more than 32, then the data shreds are
split uniformly into erasure batches with _at least_ 32 data shreds in
each batch. Each erasure batch will have the same number of code and
data shreds.
For example:
* If there are 19 data shreds, 27 coding shreds are generated. The
resulting 19(data):27(code) erasure batch has the same recovery
probabilities as a 32:32 batch.
* If there are 107 data shreds, they are split into 3 batches of 36:36,
36:36 and 35:35 data:code shreds each.
A consequence of this change is that code and data shreds indices will
no longer align as there will be more coding shreds than data shreds
(not only in the last batch in each slot but also in the intermediate
ones);
2022-08-11 05:44:27 -07:00
|
|
|
let symbol_count = DATA_SHREDS_PER_FEC_BLOCK;
|
2020-07-02 18:31:32 -07:00
|
|
|
let data_shreds = make_shreds(symbol_count);
|
2022-09-25 11:09:47 -07:00
|
|
|
let reed_solomon_cache = ReedSolomonCache::default();
|
2020-12-09 23:14:31 -08:00
|
|
|
let coding_shreds = Shredder::generate_coding_shreds(
|
|
|
|
&data_shreds[..symbol_count],
|
removes buffering when generating coding shreds in broadcast (#25807)
Given the 32:32 erasure recovery schema, current implementation requires
exactly 32 data shreds to generate coding shreds for the batch (except
for the final erasure batch in each slot).
As a result, when serializing ledger entries to data shreds, if the
number of data shreds is not a multiple of 32, the coding shreds for the
last batch cannot be generated until there are more data shreds to
complete the batch to 32 data shreds. This adds latency in generating
and broadcasting coding shreds.
In addition, with Merkle variants for shreds, data shreds cannot be
signed and broadcasted until coding shreds are also generated. As a
result *both* code and data shreds will be delayed before broadcast if
we still require exactly 32 data shreds for each batch.
This commit instead always generates and broadcast coding shreds as soon
as there any number of data shreds available. When serializing entries
to shreds:
* if the number of resulting data shreds is less than 32, then more
coding shreds will be generated so that the resulting erasure batch
has the same recovery probabilities as a 32:32 batch.
* if the number of data shreds is more than 32, then the data shreds are
split uniformly into erasure batches with _at least_ 32 data shreds in
each batch. Each erasure batch will have the same number of code and
data shreds.
For example:
* If there are 19 data shreds, 27 coding shreds are generated. The
resulting 19(data):27(code) erasure batch has the same recovery
probabilities as a 32:32 batch.
* If there are 107 data shreds, they are split into 3 batches of 36:36,
36:36 and 35:35 data:code shreds each.
A consequence of this change is that code and data shreds indices will
no longer align as there will be more coding shreds than data shreds
(not only in the last batch in each slot but also in the intermediate
ones);
2022-08-11 05:44:27 -07:00
|
|
|
0, // next_code_index
|
2022-09-25 11:09:47 -07:00
|
|
|
&reed_solomon_cache,
|
2020-12-09 23:14:31 -08:00
|
|
|
);
|
2020-07-02 18:31:32 -07:00
|
|
|
bencher.iter(|| {
|
2022-09-25 11:09:47 -07:00
|
|
|
Shredder::try_recovery(coding_shreds[..].to_vec(), &reed_solomon_cache).unwrap();
|
2020-07-02 18:31:32 -07:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_shredder_coding_raptorq(bencher: &mut Bencher) {
|
removes buffering when generating coding shreds in broadcast (#25807)
Given the 32:32 erasure recovery schema, current implementation requires
exactly 32 data shreds to generate coding shreds for the batch (except
for the final erasure batch in each slot).
As a result, when serializing ledger entries to data shreds, if the
number of data shreds is not a multiple of 32, the coding shreds for the
last batch cannot be generated until there are more data shreds to
complete the batch to 32 data shreds. This adds latency in generating
and broadcasting coding shreds.
In addition, with Merkle variants for shreds, data shreds cannot be
signed and broadcasted until coding shreds are also generated. As a
result *both* code and data shreds will be delayed before broadcast if
we still require exactly 32 data shreds for each batch.
This commit instead always generates and broadcast coding shreds as soon
as there any number of data shreds available. When serializing entries
to shreds:
* if the number of resulting data shreds is less than 32, then more
coding shreds will be generated so that the resulting erasure batch
has the same recovery probabilities as a 32:32 batch.
* if the number of data shreds is more than 32, then the data shreds are
split uniformly into erasure batches with _at least_ 32 data shreds in
each batch. Each erasure batch will have the same number of code and
data shreds.
For example:
* If there are 19 data shreds, 27 coding shreds are generated. The
resulting 19(data):27(code) erasure batch has the same recovery
probabilities as a 32:32 batch.
* If there are 107 data shreds, they are split into 3 batches of 36:36,
36:36 and 35:35 data:code shreds each.
A consequence of this change is that code and data shreds indices will
no longer align as there will be more coding shreds than data shreds
(not only in the last batch in each slot but also in the intermediate
ones);
2022-08-11 05:44:27 -07:00
|
|
|
let symbol_count = DATA_SHREDS_PER_FEC_BLOCK;
|
|
|
|
let data = make_concatenated_shreds(symbol_count);
|
2020-07-02 18:31:32 -07:00
|
|
|
bencher.iter(|| {
|
2022-04-25 05:43:22 -07:00
|
|
|
let encoder = Encoder::with_defaults(&data, VALID_SHRED_DATA_LEN as u16);
|
removes buffering when generating coding shreds in broadcast (#25807)
Given the 32:32 erasure recovery schema, current implementation requires
exactly 32 data shreds to generate coding shreds for the batch (except
for the final erasure batch in each slot).
As a result, when serializing ledger entries to data shreds, if the
number of data shreds is not a multiple of 32, the coding shreds for the
last batch cannot be generated until there are more data shreds to
complete the batch to 32 data shreds. This adds latency in generating
and broadcasting coding shreds.
In addition, with Merkle variants for shreds, data shreds cannot be
signed and broadcasted until coding shreds are also generated. As a
result *both* code and data shreds will be delayed before broadcast if
we still require exactly 32 data shreds for each batch.
This commit instead always generates and broadcast coding shreds as soon
as there any number of data shreds available. When serializing entries
to shreds:
* if the number of resulting data shreds is less than 32, then more
coding shreds will be generated so that the resulting erasure batch
has the same recovery probabilities as a 32:32 batch.
* if the number of data shreds is more than 32, then the data shreds are
split uniformly into erasure batches with _at least_ 32 data shreds in
each batch. Each erasure batch will have the same number of code and
data shreds.
For example:
* If there are 19 data shreds, 27 coding shreds are generated. The
resulting 19(data):27(code) erasure batch has the same recovery
probabilities as a 32:32 batch.
* If there are 107 data shreds, they are split into 3 batches of 36:36,
36:36 and 35:35 data:code shreds each.
A consequence of this change is that code and data shreds indices will
no longer align as there will be more coding shreds than data shreds
(not only in the last batch in each slot but also in the intermediate
ones);
2022-08-11 05:44:27 -07:00
|
|
|
encoder.get_encoded_packets(symbol_count as u32);
|
2020-07-02 18:31:32 -07:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_shredder_decoding_raptorq(bencher: &mut Bencher) {
|
removes buffering when generating coding shreds in broadcast (#25807)
Given the 32:32 erasure recovery schema, current implementation requires
exactly 32 data shreds to generate coding shreds for the batch (except
for the final erasure batch in each slot).
As a result, when serializing ledger entries to data shreds, if the
number of data shreds is not a multiple of 32, the coding shreds for the
last batch cannot be generated until there are more data shreds to
complete the batch to 32 data shreds. This adds latency in generating
and broadcasting coding shreds.
In addition, with Merkle variants for shreds, data shreds cannot be
signed and broadcasted until coding shreds are also generated. As a
result *both* code and data shreds will be delayed before broadcast if
we still require exactly 32 data shreds for each batch.
This commit instead always generates and broadcast coding shreds as soon
as there any number of data shreds available. When serializing entries
to shreds:
* if the number of resulting data shreds is less than 32, then more
coding shreds will be generated so that the resulting erasure batch
has the same recovery probabilities as a 32:32 batch.
* if the number of data shreds is more than 32, then the data shreds are
split uniformly into erasure batches with _at least_ 32 data shreds in
each batch. Each erasure batch will have the same number of code and
data shreds.
For example:
* If there are 19 data shreds, 27 coding shreds are generated. The
resulting 19(data):27(code) erasure batch has the same recovery
probabilities as a 32:32 batch.
* If there are 107 data shreds, they are split into 3 batches of 36:36,
36:36 and 35:35 data:code shreds each.
A consequence of this change is that code and data shreds indices will
no longer align as there will be more coding shreds than data shreds
(not only in the last batch in each slot but also in the intermediate
ones);
2022-08-11 05:44:27 -07:00
|
|
|
let symbol_count = DATA_SHREDS_PER_FEC_BLOCK;
|
|
|
|
let data = make_concatenated_shreds(symbol_count);
|
2022-04-25 05:43:22 -07:00
|
|
|
let encoder = Encoder::with_defaults(&data, VALID_SHRED_DATA_LEN as u16);
|
2020-07-02 18:31:32 -07:00
|
|
|
let mut packets = encoder.get_encoded_packets(symbol_count as u32);
|
|
|
|
packets.shuffle(&mut rand::thread_rng());
|
|
|
|
|
|
|
|
// Here we simulate losing 1 less than 50% of the packets randomly
|
|
|
|
packets.truncate(packets.len() - packets.len() / 2 + 1);
|
|
|
|
|
|
|
|
bencher.iter(|| {
|
|
|
|
let mut decoder = Decoder::new(encoder.get_config());
|
|
|
|
let mut result = None;
|
|
|
|
for packet in &packets {
|
|
|
|
result = decoder.decode(packet.clone());
|
2022-09-07 13:04:32 -07:00
|
|
|
if result.is_some() {
|
2020-07-02 18:31:32 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert_eq!(result.unwrap(), data);
|
|
|
|
})
|
|
|
|
}
|