Disables EAH with short epochs (#28803)
This commit is contained in:
parent
b572422d24
commit
d798e751a0
|
@ -6202,6 +6202,7 @@ dependencies = [
|
||||||
"symlink",
|
"symlink",
|
||||||
"tar",
|
"tar",
|
||||||
"tempfile",
|
"tempfile",
|
||||||
|
"test-case",
|
||||||
"thiserror",
|
"thiserror",
|
||||||
"zstd",
|
"zstd",
|
||||||
]
|
]
|
||||||
|
|
|
@ -139,6 +139,7 @@ impl TestEnvironment {
|
||||||
assert!(bank
|
assert!(bank
|
||||||
.feature_set
|
.feature_set
|
||||||
.is_active(&feature_set::epoch_accounts_hash::id()));
|
.is_active(&feature_set::epoch_accounts_hash::id()));
|
||||||
|
assert!(epoch_accounts_hash::is_enabled_this_epoch(&bank));
|
||||||
|
|
||||||
bank.set_startup_verification_complete();
|
bank.set_startup_verification_complete();
|
||||||
|
|
||||||
|
|
|
@ -37,7 +37,6 @@ use {
|
||||||
},
|
},
|
||||||
solana_sdk::{
|
solana_sdk::{
|
||||||
clock::Slot,
|
clock::Slot,
|
||||||
epoch_schedule::EpochSchedule,
|
|
||||||
genesis_config::{
|
genesis_config::{
|
||||||
ClusterType::{self, Development, Devnet, MainnetBeta, Testnet},
|
ClusterType::{self, Development, Devnet, MainnetBeta, Testnet},
|
||||||
GenesisConfig,
|
GenesisConfig,
|
||||||
|
@ -95,8 +94,6 @@ impl SnapshotTestConfig {
|
||||||
&solana_sdk::pubkey::new_rand(), // validator_pubkey
|
&solana_sdk::pubkey::new_rand(), // validator_pubkey
|
||||||
1, // validator_stake_lamports
|
1, // validator_stake_lamports
|
||||||
);
|
);
|
||||||
// NOTE: Must set `warmup == false` until EAH can handle short epochs
|
|
||||||
genesis_config_info.genesis_config.epoch_schedule = EpochSchedule::without_warmup();
|
|
||||||
genesis_config_info.genesis_config.cluster_type = cluster_type;
|
genesis_config_info.genesis_config.cluster_type = cluster_type;
|
||||||
let bank0 = Bank::new_with_paths_for_tests(
|
let bank0 = Bank::new_with_paths_for_tests(
|
||||||
&genesis_config_info.genesis_config,
|
&genesis_config_info.genesis_config,
|
||||||
|
|
|
@ -179,29 +179,34 @@ well.
|
||||||
|
|
||||||
An EAH is requested by `BankForks::set_root()`, which happens while setting
|
An EAH is requested by `BankForks::set_root()`, which happens while setting
|
||||||
*roots*. The EAH is stored into `Bank`s when they are *frozen*. Banks are
|
*roots*. The EAH is stored into `Bank`s when they are *frozen*. Banks are
|
||||||
frozen 32 slots before they are rooted. For the expected behavior, the EAH
|
frozen *at least* 32 slots before they are rooted. For the expected behavior,
|
||||||
start slot really should be 32 slots before the stop slot. If the number of
|
the EAH start slot really should be 32 slots before the stop slot. If the
|
||||||
slots per epoch is small, this can result in surprising behavior.
|
number of slots per epoch is small, this can result in surprising behavior.
|
||||||
|
|
||||||
Example 1: Assume there are 64 slots per epoch. The EAH start offset is 16
|
Example: Assume there are 40 slots per epoch. The EAH start offset is 10, and
|
||||||
and the EAH stop offset is 48. The difference is 32. So when Bank 48 is
|
the EAH stop offset is 30. When Bank 30 is frozen it will include the EAH in
|
||||||
frozen before Bank 16 is rooted, a new EAH request has not yet been requested;
|
its hash. However, Bank 10 has not yet been rooted, so a new EAH has not been
|
||||||
the EAH from the previous epoch is still valid and will be used by Bank 48.
|
calculated for this epoch. This means Bank 30 will have included the EAH *from
|
||||||
|
the previous epoch* in its hash.
|
||||||
|
|
||||||
Example 2: Assume there are 66 slots per epoch, then the EAH start offset is
|
Later, when Bank 10 is rooted, it will request a new EAH be calculated. If a
|
||||||
still 16 and the EAH stop offset is now 49. The difference is now 33. When
|
snapshot is taken for Bank 12 (or any bank between 10 and 30), it will include
|
||||||
Bank 49 is frozen, Bank 16 will already have been rooted, and thus sent an EAH
|
the EAH *from this epoch*. If a node loads the snapshot from Bank 12, once it
|
||||||
request; Bank 49 will wait for the new EAH calculation to complete.
|
gets to freezing Bank 30, it will end up with a different bank hash since it
|
||||||
|
included the EAH from this epoch (versus the other node's Bank 30 included the
|
||||||
|
EAH from the previous epoch). Different bank hashes will result in consensus
|
||||||
|
failures.
|
||||||
|
|
||||||
Example 3: Assume there are 32 slots per epoch (the minimum allowed). The EAH
|
The above example is clearly bad. It can be observed that short epochs only
|
||||||
start offset is 8, and the EAH stop offset is 24. Similar to Example 1, Bank
|
occur (1) during warmup, or (2) in tests. Real clusters have much longer
|
||||||
24 is frozen around when Bank 24 *of the previous epoch* is rooted. This
|
epochs (432,000 slots by default).
|
||||||
ensures that when the EAH is stored, it'll be for the previous epoch.
|
|
||||||
|
|
||||||
In these examples the observed behavior of the EAH is different than when using
|
Tests can be fixed as needed; that leaves fixing warmup. Since warmup is
|
||||||
the normal 432,000 slots per epoch. The EAH is still valid and correct with a
|
transient, we disable EAH until slots-per-epoch is large enough. More
|
||||||
small number of slots per epoch; it now has a delay of one epoch. Since the
|
precisely, we disable EAH until the `calculation window` is big enough. During
|
||||||
epochs themselves can be much faster, security is not reduced.
|
warmup, slots-per-epoch doubles each epoch until reaching the desired number,
|
||||||
|
so only a few epochs will skip EAH (which also is a small total number of
|
||||||
|
slots).
|
||||||
|
|
||||||
|
|
||||||
#### Warping
|
#### Warping
|
||||||
|
|
|
@ -76,6 +76,7 @@ libsecp256k1 = "0.6.0"
|
||||||
rand_chacha = "0.2.2"
|
rand_chacha = "0.2.2"
|
||||||
solana-logger = { path = "../logger", version = "=1.15.0" }
|
solana-logger = { path = "../logger", version = "=1.15.0" }
|
||||||
static_assertions = "1.1.0"
|
static_assertions = "1.1.0"
|
||||||
|
test-case = "2.1.0"
|
||||||
|
|
||||||
[package.metadata.docs.rs]
|
[package.metadata.docs.rs]
|
||||||
targets = ["x86_64-unknown-linux-gnu"]
|
targets = ["x86_64-unknown-linux-gnu"]
|
||||||
|
|
|
@ -6748,6 +6748,10 @@ impl Bank {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !epoch_accounts_hash::is_enabled_this_epoch(self) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
let stop_slot = epoch_accounts_hash::calculation_stop(self);
|
let stop_slot = epoch_accounts_hash::calculation_stop(self);
|
||||||
self.parent_slot() < stop_slot && self.slot() >= stop_slot
|
self.parent_slot() < stop_slot && self.slot() >= stop_slot
|
||||||
}
|
}
|
||||||
|
@ -7722,15 +7726,15 @@ impl Bank {
|
||||||
/// EAH *must* be included. This means if an EAH calculation is currently in-flight we will
|
/// EAH *must* be included. This means if an EAH calculation is currently in-flight we will
|
||||||
/// wait for it to complete.
|
/// wait for it to complete.
|
||||||
pub fn get_epoch_accounts_hash_to_serialize(&self) -> Option<EpochAccountsHash> {
|
pub fn get_epoch_accounts_hash_to_serialize(&self) -> Option<EpochAccountsHash> {
|
||||||
let (epoch_accounts_hash, measure) = measure!(
|
let should_get_epoch_accounts_hash = epoch_accounts_hash::is_enabled_this_epoch(self)
|
||||||
epoch_accounts_hash::is_in_calculation_window(self).then(|| {
|
&& epoch_accounts_hash::is_in_calculation_window(self);
|
||||||
self.rc
|
let (epoch_accounts_hash, measure) = measure!(should_get_epoch_accounts_hash.then(|| {
|
||||||
.accounts
|
self.rc
|
||||||
.accounts_db
|
.accounts
|
||||||
.epoch_accounts_hash_manager
|
.accounts_db
|
||||||
.wait_get_epoch_accounts_hash()
|
.epoch_accounts_hash_manager
|
||||||
})
|
.wait_get_epoch_accounts_hash()
|
||||||
);
|
}));
|
||||||
|
|
||||||
datapoint_info!(
|
datapoint_info!(
|
||||||
"bank-get_epoch_accounts_hash_to_serialize",
|
"bank-get_epoch_accounts_hash_to_serialize",
|
||||||
|
|
|
@ -618,6 +618,10 @@ impl BankForks {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if !epoch_accounts_hash::is_enabled_this_epoch(bank) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
let start_slot = epoch_accounts_hash::calculation_start(bank);
|
let start_slot = epoch_accounts_hash::calculation_start(bank);
|
||||||
bank.slot() > self.last_accounts_hash_slot
|
bank.slot() > self.last_accounts_hash_slot
|
||||||
&& bank.parent_slot() < start_slot
|
&& bank.parent_slot() < start_slot
|
||||||
|
|
|
@ -2,9 +2,36 @@
|
||||||
|
|
||||||
use {
|
use {
|
||||||
crate::bank::Bank,
|
crate::bank::Bank,
|
||||||
solana_sdk::clock::{Epoch, Slot},
|
solana_sdk::{
|
||||||
|
clock::{Epoch, Slot},
|
||||||
|
vote::state::MAX_LOCKOUT_HISTORY,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Is the EAH enabled this Epoch?
|
||||||
|
#[must_use]
|
||||||
|
pub fn is_enabled_this_epoch(bank: &Bank) -> bool {
|
||||||
|
// The EAH calculation "start" is based on when a bank is *rooted*, and "stop" is based on when a
|
||||||
|
// bank is *frozen*. Banks are rooted after exceeding the maximum lockout, so there is a delay
|
||||||
|
// of at least `maximum lockout` number of slots the EAH calculation must take into
|
||||||
|
// consideration. To ensure an EAH calculation has started by the time that calculation is
|
||||||
|
// needed, the calculation interval must be at least `maximum lockout` plus some buffer to
|
||||||
|
// handle when banks are not rooted every single slot.
|
||||||
|
const MINIMUM_CALCULATION_INTERVAL: u64 =
|
||||||
|
(MAX_LOCKOUT_HISTORY as u64).saturating_add(CALCULATION_INTERVAL_BUFFER);
|
||||||
|
// The calculation buffer is a best-attempt at median worst-case for how many bank ancestors can
|
||||||
|
// accumulate before the bank is rooted.
|
||||||
|
// [brooks] On Wed Oct 26 12:15:21 2022, over the previous 6 hour period against mainnet-beta,
|
||||||
|
// I saw multiple validators reporting metrics in the 120s for `total_parent_banks`. The mean
|
||||||
|
// is 2 to 3, but a number of nodes also reported values in the low 20s. A value of 150 should
|
||||||
|
// capture the majority of validators, and will not be an issue for clusters running with
|
||||||
|
// normal slots-per-epoch; this really will only affect tests and epoch schedule warmup.
|
||||||
|
const CALCULATION_INTERVAL_BUFFER: u64 = 150;
|
||||||
|
|
||||||
|
let calculation_interval = calculation_interval(bank);
|
||||||
|
calculation_interval >= MINIMUM_CALCULATION_INTERVAL
|
||||||
|
}
|
||||||
|
|
||||||
/// Calculation of the EAH occurs once per epoch. All nodes in the cluster must agree on which
|
/// Calculation of the EAH occurs once per epoch. All nodes in the cluster must agree on which
|
||||||
/// slot the EAH is based on. This slot will be at an offset into the epoch, and referred to as
|
/// slot the EAH is based on. This slot will be at an offset into the epoch, and referred to as
|
||||||
/// the "start" slot for the EAH calculation.
|
/// the "start" slot for the EAH calculation.
|
||||||
|
@ -38,13 +65,19 @@ pub fn calculation_stop(bank: &Bank) -> Slot {
|
||||||
calculation_info(bank).calculation_stop
|
calculation_info(bank).calculation_stop
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Is this bank in the calculation window?
|
/// Get the number of slots from EAH calculation start to stop; known as the calculation interval
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[inline]
|
#[inline]
|
||||||
|
pub fn calculation_interval(bank: &Bank) -> u64 {
|
||||||
|
calculation_info(bank).calculation_interval
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is this bank in the calculation window?
|
||||||
|
#[must_use]
|
||||||
pub fn is_in_calculation_window(bank: &Bank) -> bool {
|
pub fn is_in_calculation_window(bank: &Bank) -> bool {
|
||||||
let bank_slot = bank.slot();
|
|
||||||
let info = calculation_info(bank);
|
let info = calculation_info(bank);
|
||||||
bank_slot >= info.calculation_start && bank_slot < info.calculation_stop
|
let range = info.calculation_start..info.calculation_stop;
|
||||||
|
range.contains(&bank.slot())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// For the epoch that `bank` is in, get all the EAH calculation information
|
/// For the epoch that `bank` is in, get all the EAH calculation information
|
||||||
|
@ -60,6 +93,7 @@ pub fn calculation_info(bank: &Bank) -> CalculationInfo {
|
||||||
let last_slot_in_epoch = epoch_schedule.get_last_slot_in_epoch(epoch);
|
let last_slot_in_epoch = epoch_schedule.get_last_slot_in_epoch(epoch);
|
||||||
let calculation_start = first_slot_in_epoch.saturating_add(calculation_offset_start);
|
let calculation_start = first_slot_in_epoch.saturating_add(calculation_offset_start);
|
||||||
let calculation_stop = first_slot_in_epoch.saturating_add(calculation_offset_stop);
|
let calculation_stop = first_slot_in_epoch.saturating_add(calculation_offset_stop);
|
||||||
|
let calculation_interval = calculation_offset_stop.saturating_sub(calculation_offset_start);
|
||||||
|
|
||||||
CalculationInfo {
|
CalculationInfo {
|
||||||
epoch,
|
epoch,
|
||||||
|
@ -70,6 +104,7 @@ pub fn calculation_info(bank: &Bank) -> CalculationInfo {
|
||||||
calculation_offset_stop,
|
calculation_offset_stop,
|
||||||
calculation_start,
|
calculation_start,
|
||||||
calculation_stop,
|
calculation_stop,
|
||||||
|
calculation_interval,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,6 +138,8 @@ pub struct CalculationInfo {
|
||||||
pub calculation_start: Slot,
|
pub calculation_start: Slot,
|
||||||
/// Absolute slot where the EAH calculation stops
|
/// Absolute slot where the EAH calculation stops
|
||||||
pub calculation_stop: Slot,
|
pub calculation_stop: Slot,
|
||||||
|
/// Number of slots from EAH calculation start to stop
|
||||||
|
pub calculation_interval: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -110,8 +147,23 @@ mod tests {
|
||||||
use {
|
use {
|
||||||
super::*,
|
super::*,
|
||||||
solana_sdk::{epoch_schedule::EpochSchedule, genesis_config::GenesisConfig},
|
solana_sdk::{epoch_schedule::EpochSchedule, genesis_config::GenesisConfig},
|
||||||
|
test_case::test_case,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#[test_case( 32 => false)] // minimum slots per epoch
|
||||||
|
#[test_case( 361 => false)] // below minimum slots per epoch *for EAH*
|
||||||
|
#[test_case( 362 => false)] // minimum slots per epoch *for EAH*
|
||||||
|
#[test_case( 8_192 => true)] // default dev slots per epoch
|
||||||
|
#[test_case(432_000 => true)] // default slots per epoch
|
||||||
|
fn test_is_enabled_this_epoch(slots_per_epoch: u64) -> bool {
|
||||||
|
let genesis_config = GenesisConfig {
|
||||||
|
epoch_schedule: EpochSchedule::custom(slots_per_epoch, slots_per_epoch, false),
|
||||||
|
..GenesisConfig::default()
|
||||||
|
};
|
||||||
|
let bank = Bank::new_for_tests(&genesis_config);
|
||||||
|
is_enabled_this_epoch(&bank)
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_calculation_offset_bounds() {
|
fn test_calculation_offset_bounds() {
|
||||||
let bank = Bank::default_for_tests();
|
let bank = Bank::default_for_tests();
|
||||||
|
@ -130,7 +182,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_calculation_info() {
|
fn test_calculation_info() {
|
||||||
for slots_per_epoch in [32, 100, 65_536, 432_000, 123_456_789] {
|
for slots_per_epoch in [32, 361, 362, 8_192, 65_536, 432_000, 123_456_789] {
|
||||||
for warmup in [false, true] {
|
for warmup in [false, true] {
|
||||||
let genesis_config = GenesisConfig {
|
let genesis_config = GenesisConfig {
|
||||||
epoch_schedule: EpochSchedule::custom(slots_per_epoch, slots_per_epoch, warmup),
|
epoch_schedule: EpochSchedule::custom(slots_per_epoch, slots_per_epoch, warmup),
|
||||||
|
@ -140,9 +192,10 @@ mod tests {
|
||||||
assert!(info.calculation_offset_start < info.calculation_offset_stop);
|
assert!(info.calculation_offset_start < info.calculation_offset_stop);
|
||||||
assert!(info.calculation_offset_start < info.slots_per_epoch);
|
assert!(info.calculation_offset_start < info.slots_per_epoch);
|
||||||
assert!(info.calculation_offset_stop < info.slots_per_epoch);
|
assert!(info.calculation_offset_stop < info.slots_per_epoch);
|
||||||
assert!(info.calculation_start < info.calculation_stop,);
|
assert!(info.calculation_start < info.calculation_stop);
|
||||||
assert!(info.calculation_start > info.first_slot_in_epoch,);
|
assert!(info.calculation_start > info.first_slot_in_epoch);
|
||||||
assert!(info.calculation_stop < info.last_slot_in_epoch,);
|
assert!(info.calculation_stop < info.last_slot_in_epoch);
|
||||||
|
assert!(info.calculation_interval > 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -223,7 +223,8 @@ fn test_bank_serialize_style(
|
||||||
initial_epoch_accounts_hash: bool,
|
initial_epoch_accounts_hash: bool,
|
||||||
) {
|
) {
|
||||||
solana_logger::setup();
|
solana_logger::setup();
|
||||||
let (genesis_config, _) = create_genesis_config(500);
|
let (mut genesis_config, _) = create_genesis_config(500);
|
||||||
|
genesis_config.epoch_schedule = EpochSchedule::custom(400, 400, false);
|
||||||
let bank0 = Arc::new(Bank::new_for_tests(&genesis_config));
|
let bank0 = Arc::new(Bank::new_for_tests(&genesis_config));
|
||||||
let eah_start_slot = epoch_accounts_hash::calculation_start(&bank0);
|
let eah_start_slot = epoch_accounts_hash::calculation_start(&bank0);
|
||||||
let bank1 = Bank::new_from_parent(&bank0, &Pubkey::default(), 1);
|
let bank1 = Bank::new_from_parent(&bank0, &Pubkey::default(), 1);
|
||||||
|
|
Loading…
Reference in New Issue