replay: send duplicate proofs from blockstore to state machine (#32962)
* replay: send duplicate proofs from blockstore to state machine * pr feedback: bank.slot() -> slot * pr feedback
This commit is contained in:
parent
d077b13efa
commit
a8e83c8720
|
@ -293,6 +293,7 @@ impl Tower {
|
||||||
bank_forks.frozen_banks().values().cloned().collect(),
|
bank_forks.frozen_banks().values().cloned().collect(),
|
||||||
node_pubkey,
|
node_pubkey,
|
||||||
vote_account,
|
vote_account,
|
||||||
|
vec![],
|
||||||
);
|
);
|
||||||
let root = root_bank.slot();
|
let root = root_bank.slot();
|
||||||
|
|
||||||
|
|
|
@ -549,6 +549,7 @@ impl ReplayStage {
|
||||||
&bank_forks,
|
&bank_forks,
|
||||||
&my_pubkey,
|
&my_pubkey,
|
||||||
&vote_account,
|
&vote_account,
|
||||||
|
&blockstore,
|
||||||
);
|
);
|
||||||
let mut current_leader = None;
|
let mut current_leader = None;
|
||||||
let mut last_reset = Hash::default();
|
let mut last_reset = Hash::default();
|
||||||
|
@ -1230,16 +1231,29 @@ impl ReplayStage {
|
||||||
bank_forks: &RwLock<BankForks>,
|
bank_forks: &RwLock<BankForks>,
|
||||||
my_pubkey: &Pubkey,
|
my_pubkey: &Pubkey,
|
||||||
vote_account: &Pubkey,
|
vote_account: &Pubkey,
|
||||||
|
blockstore: &Blockstore,
|
||||||
) -> (ProgressMap, HeaviestSubtreeForkChoice) {
|
) -> (ProgressMap, HeaviestSubtreeForkChoice) {
|
||||||
let (root_bank, frozen_banks) = {
|
let (root_bank, frozen_banks, duplicate_slot_hashes) = {
|
||||||
let bank_forks = bank_forks.read().unwrap();
|
let bank_forks = bank_forks.read().unwrap();
|
||||||
|
let duplicate_slots = blockstore
|
||||||
|
.duplicate_slots_iterator(bank_forks.root_bank().slot())
|
||||||
|
.unwrap();
|
||||||
|
let duplicate_slot_hashes = duplicate_slots
|
||||||
|
.filter_map(|slot| bank_forks.bank_hash(slot).map(|hash| (slot, hash)));
|
||||||
(
|
(
|
||||||
bank_forks.root_bank(),
|
bank_forks.root_bank(),
|
||||||
bank_forks.frozen_banks().values().cloned().collect(),
|
bank_forks.frozen_banks().values().cloned().collect(),
|
||||||
|
duplicate_slot_hashes.collect::<Vec<(Slot, Hash)>>(),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
Self::initialize_progress_and_fork_choice(&root_bank, frozen_banks, my_pubkey, vote_account)
|
Self::initialize_progress_and_fork_choice(
|
||||||
|
&root_bank,
|
||||||
|
frozen_banks,
|
||||||
|
my_pubkey,
|
||||||
|
vote_account,
|
||||||
|
duplicate_slot_hashes,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn initialize_progress_and_fork_choice(
|
pub fn initialize_progress_and_fork_choice(
|
||||||
|
@ -1247,6 +1261,7 @@ impl ReplayStage {
|
||||||
mut frozen_banks: Vec<Arc<Bank>>,
|
mut frozen_banks: Vec<Arc<Bank>>,
|
||||||
my_pubkey: &Pubkey,
|
my_pubkey: &Pubkey,
|
||||||
vote_account: &Pubkey,
|
vote_account: &Pubkey,
|
||||||
|
duplicate_slot_hashes: Vec<(Slot, Hash)>,
|
||||||
) -> (ProgressMap, HeaviestSubtreeForkChoice) {
|
) -> (ProgressMap, HeaviestSubtreeForkChoice) {
|
||||||
let mut progress = ProgressMap::default();
|
let mut progress = ProgressMap::default();
|
||||||
|
|
||||||
|
@ -1261,11 +1276,15 @@ impl ReplayStage {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
let root = root_bank.slot();
|
let root = root_bank.slot();
|
||||||
let heaviest_subtree_fork_choice = HeaviestSubtreeForkChoice::new_from_frozen_banks(
|
let mut heaviest_subtree_fork_choice = HeaviestSubtreeForkChoice::new_from_frozen_banks(
|
||||||
(root, root_bank.hash()),
|
(root, root_bank.hash()),
|
||||||
&frozen_banks,
|
&frozen_banks,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
for slot_hash in duplicate_slot_hashes {
|
||||||
|
heaviest_subtree_fork_choice.mark_fork_invalid_candidate(&slot_hash);
|
||||||
|
}
|
||||||
|
|
||||||
(progress, heaviest_subtree_fork_choice)
|
(progress, heaviest_subtree_fork_choice)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2086,6 +2105,30 @@ impl ReplayStage {
|
||||||
purge_repair_slot_counter,
|
purge_repair_slot_counter,
|
||||||
SlotStateUpdate::Dead(dead_state),
|
SlotStateUpdate::Dead(dead_state),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// If we previously marked this slot as duplicate in blockstore, let the state machine know
|
||||||
|
if !duplicate_slots_tracker.contains(&slot) && blockstore.get_duplicate_slot(slot).is_some()
|
||||||
|
{
|
||||||
|
let duplicate_state = DuplicateState::new_from_state(
|
||||||
|
slot,
|
||||||
|
gossip_duplicate_confirmed_slots,
|
||||||
|
heaviest_subtree_fork_choice,
|
||||||
|
|| true,
|
||||||
|
|| None,
|
||||||
|
);
|
||||||
|
check_slot_agrees_with_cluster(
|
||||||
|
slot,
|
||||||
|
root,
|
||||||
|
blockstore,
|
||||||
|
duplicate_slots_tracker,
|
||||||
|
epoch_slots_frozen_slots,
|
||||||
|
heaviest_subtree_fork_choice,
|
||||||
|
duplicate_slots_to_repair,
|
||||||
|
ancestor_hashes_replay_update_sender,
|
||||||
|
purge_repair_slot_counter,
|
||||||
|
SlotStateUpdate::Duplicate(duplicate_state),
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
|
@ -2827,6 +2870,30 @@ impl ReplayStage {
|
||||||
purge_repair_slot_counter,
|
purge_repair_slot_counter,
|
||||||
SlotStateUpdate::BankFrozen(bank_frozen_state),
|
SlotStateUpdate::BankFrozen(bank_frozen_state),
|
||||||
);
|
);
|
||||||
|
// If we previously marked this slot as duplicate in blockstore, let the state machine know
|
||||||
|
if !duplicate_slots_tracker.contains(&bank.slot())
|
||||||
|
&& blockstore.get_duplicate_slot(bank.slot()).is_some()
|
||||||
|
{
|
||||||
|
let duplicate_state = DuplicateState::new_from_state(
|
||||||
|
bank.slot(),
|
||||||
|
gossip_duplicate_confirmed_slots,
|
||||||
|
heaviest_subtree_fork_choice,
|
||||||
|
|| false,
|
||||||
|
|| Some(bank.hash()),
|
||||||
|
);
|
||||||
|
check_slot_agrees_with_cluster(
|
||||||
|
bank.slot(),
|
||||||
|
bank_forks.read().unwrap().root(),
|
||||||
|
blockstore,
|
||||||
|
duplicate_slots_tracker,
|
||||||
|
epoch_slots_frozen_slots,
|
||||||
|
heaviest_subtree_fork_choice,
|
||||||
|
duplicate_slots_to_repair,
|
||||||
|
ancestor_hashes_replay_update_sender,
|
||||||
|
purge_repair_slot_counter,
|
||||||
|
SlotStateUpdate::Duplicate(duplicate_state),
|
||||||
|
);
|
||||||
|
}
|
||||||
if let Some(sender) = bank_notification_sender {
|
if let Some(sender) = bank_notification_sender {
|
||||||
sender
|
sender
|
||||||
.sender
|
.sender
|
||||||
|
|
|
@ -26,6 +26,7 @@ use {
|
||||||
solana_ledger::{
|
solana_ledger::{
|
||||||
ancestor_iterator::AncestorIterator,
|
ancestor_iterator::AncestorIterator,
|
||||||
blockstore::{Blockstore, PurgeType},
|
blockstore::{Blockstore, PurgeType},
|
||||||
|
blockstore_meta::DuplicateSlotProof,
|
||||||
blockstore_options::{AccessType, BlockstoreOptions},
|
blockstore_options::{AccessType, BlockstoreOptions},
|
||||||
leader_schedule::{FixedSchedule, LeaderSchedule},
|
leader_schedule::{FixedSchedule, LeaderSchedule},
|
||||||
},
|
},
|
||||||
|
@ -153,6 +154,23 @@ pub fn wait_for_last_vote_in_tower_to_land_in_ledger(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Waits roughly 10 seconds for duplicate proof to appear in blockstore at `dup_slot`. Returns proof if found.
|
||||||
|
pub fn wait_for_duplicate_proof(ledger_path: &Path, dup_slot: Slot) -> Option<DuplicateSlotProof> {
|
||||||
|
for _ in 0..10 {
|
||||||
|
let duplicate_fork_validator_blockstore = open_blockstore(ledger_path);
|
||||||
|
if let Some((found_dup_slot, found_duplicate_proof)) =
|
||||||
|
duplicate_fork_validator_blockstore.get_first_duplicate_proof()
|
||||||
|
{
|
||||||
|
if found_dup_slot == dup_slot {
|
||||||
|
return Some(found_duplicate_proof);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
sleep(Duration::from_millis(1000));
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
pub fn copy_blocks(end_slot: Slot, source: &Blockstore, dest: &Blockstore) {
|
pub fn copy_blocks(end_slot: Slot, source: &Blockstore, dest: &Blockstore) {
|
||||||
for slot in std::iter::once(end_slot).chain(AncestorIterator::new(end_slot, source)) {
|
for slot in std::iter::once(end_slot).chain(AncestorIterator::new(end_slot, source)) {
|
||||||
let source_meta = source.meta(slot).unwrap().unwrap();
|
let source_meta = source.meta(slot).unwrap().unwrap();
|
||||||
|
|
|
@ -24,10 +24,9 @@ use {
|
||||||
ancestor_iterator::AncestorIterator,
|
ancestor_iterator::AncestorIterator,
|
||||||
bank_forks_utils,
|
bank_forks_utils,
|
||||||
blockstore::{entries_to_test_shreds, Blockstore},
|
blockstore::{entries_to_test_shreds, Blockstore},
|
||||||
blockstore_meta::DuplicateSlotProof,
|
|
||||||
blockstore_processor::ProcessOptions,
|
blockstore_processor::ProcessOptions,
|
||||||
leader_schedule::FixedSchedule,
|
leader_schedule::FixedSchedule,
|
||||||
shred::Shred,
|
shred::{ProcessShredsStats, ReedSolomonCache, Shred, Shredder},
|
||||||
use_snapshot_archives_at_startup::UseSnapshotArchivesAtStartup,
|
use_snapshot_archives_at_startup::UseSnapshotArchivesAtStartup,
|
||||||
},
|
},
|
||||||
solana_local_cluster::{
|
solana_local_cluster::{
|
||||||
|
@ -39,7 +38,7 @@ use {
|
||||||
last_root_in_tower, last_vote_in_tower, ms_for_n_slots, open_blockstore,
|
last_root_in_tower, last_vote_in_tower, ms_for_n_slots, open_blockstore,
|
||||||
purge_slots_with_count, remove_tower, remove_tower_if_exists, restore_tower,
|
purge_slots_with_count, remove_tower, remove_tower_if_exists, restore_tower,
|
||||||
run_cluster_partition, run_kill_partition_switch_threshold, save_tower,
|
run_cluster_partition, run_kill_partition_switch_threshold, save_tower,
|
||||||
setup_snapshot_validator_config, test_faulty_node,
|
setup_snapshot_validator_config, test_faulty_node, wait_for_duplicate_proof,
|
||||||
wait_for_last_vote_in_tower_to_land_in_ledger, SnapshotValidatorConfig,
|
wait_for_last_vote_in_tower_to_land_in_ledger, SnapshotValidatorConfig,
|
||||||
ValidatorTestConfig, DEFAULT_CLUSTER_LAMPORTS, DEFAULT_NODE_STAKE, RUST_LOG_FILTER,
|
ValidatorTestConfig, DEFAULT_CLUSTER_LAMPORTS, DEFAULT_NODE_STAKE, RUST_LOG_FILTER,
|
||||||
},
|
},
|
||||||
|
@ -69,7 +68,7 @@ use {
|
||||||
client::{AsyncClient, SyncClient},
|
client::{AsyncClient, SyncClient},
|
||||||
clock::{self, Slot, DEFAULT_TICKS_PER_SLOT, MAX_PROCESSING_AGE},
|
clock::{self, Slot, DEFAULT_TICKS_PER_SLOT, MAX_PROCESSING_AGE},
|
||||||
commitment_config::CommitmentConfig,
|
commitment_config::CommitmentConfig,
|
||||||
epoch_schedule::MINIMUM_SLOTS_PER_EPOCH,
|
epoch_schedule::{DEFAULT_SLOTS_PER_EPOCH, MINIMUM_SLOTS_PER_EPOCH},
|
||||||
genesis_config::ClusterType,
|
genesis_config::ClusterType,
|
||||||
hard_forks::HardForks,
|
hard_forks::HardForks,
|
||||||
hash::Hash,
|
hash::Hash,
|
||||||
|
@ -5145,22 +5144,6 @@ fn test_duplicate_shreds_switch_failure() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn wait_for_duplicate_proof(ledger_path: &Path, dup_slot: Slot) -> Option<DuplicateSlotProof> {
|
|
||||||
for _ in 0..10 {
|
|
||||||
let duplicate_fork_validator_blockstore = open_blockstore(ledger_path);
|
|
||||||
if let Some((found_dup_slot, found_duplicate_proof)) =
|
|
||||||
duplicate_fork_validator_blockstore.get_first_duplicate_proof()
|
|
||||||
{
|
|
||||||
if found_dup_slot == dup_slot {
|
|
||||||
return Some(found_duplicate_proof);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
sleep(Duration::from_millis(1000));
|
|
||||||
}
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
solana_logger::setup_with_default(RUST_LOG_FILTER);
|
solana_logger::setup_with_default(RUST_LOG_FILTER);
|
||||||
let validator_keypairs = [
|
let validator_keypairs = [
|
||||||
"28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
|
"28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
|
||||||
|
@ -5506,3 +5489,156 @@ fn test_duplicate_shreds_switch_failure() {
|
||||||
SocketAddrSpace::Unspecified,
|
SocketAddrSpace::Unspecified,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Forks previous marked invalid should be marked as such in fork choice on restart
|
||||||
|
#[test]
|
||||||
|
#[serial]
|
||||||
|
fn test_invalid_forks_persisted_on_restart() {
|
||||||
|
solana_logger::setup_with("info,solana_metrics=off,solana_ledger=off");
|
||||||
|
|
||||||
|
let dup_slot = 10;
|
||||||
|
let validator_keypairs = [
|
||||||
|
"28bN3xyvrP4E8LwEgtLjhnkb7cY4amQb6DrYAbAYjgRV4GAGgkVM2K7wnxnAS7WDneuavza7x21MiafLu1HkwQt4",
|
||||||
|
"2saHBBoTkLMmttmPQP8KfBkcCw45S5cwtV3wTdGCscRC8uxdgvHxpHiWXKx4LvJjNJtnNcbSv5NdheokFFqnNDt8",
|
||||||
|
]
|
||||||
|
.iter()
|
||||||
|
.map(|s| (Arc::new(Keypair::from_base58_string(s)), true))
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let majority_keypair = validator_keypairs[1].0.clone();
|
||||||
|
|
||||||
|
let validators = validator_keypairs
|
||||||
|
.iter()
|
||||||
|
.map(|(kp, _)| kp.pubkey())
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let node_stakes = vec![DEFAULT_NODE_STAKE, 100 * DEFAULT_NODE_STAKE];
|
||||||
|
let (target_pubkey, majority_pubkey) = (validators[0], validators[1]);
|
||||||
|
// Need majority validator to make the dup_slot
|
||||||
|
let validator_to_slots = vec![
|
||||||
|
(majority_pubkey, dup_slot as usize + 5),
|
||||||
|
(target_pubkey, DEFAULT_SLOTS_PER_EPOCH as usize),
|
||||||
|
];
|
||||||
|
let leader_schedule = create_custom_leader_schedule(validator_to_slots.into_iter());
|
||||||
|
let mut default_config = ValidatorConfig::default_for_test();
|
||||||
|
default_config.fixed_leader_schedule = Some(FixedSchedule {
|
||||||
|
leader_schedule: Arc::new(leader_schedule),
|
||||||
|
});
|
||||||
|
let mut validator_configs = make_identical_validator_configs(&default_config, 2);
|
||||||
|
// Majority shouldn't duplicate confirm anything
|
||||||
|
validator_configs[1].voting_disabled = true;
|
||||||
|
|
||||||
|
let mut cluster = LocalCluster::new(
|
||||||
|
&mut ClusterConfig {
|
||||||
|
cluster_lamports: DEFAULT_CLUSTER_LAMPORTS + node_stakes.iter().sum::<u64>(),
|
||||||
|
validator_configs,
|
||||||
|
node_stakes,
|
||||||
|
validator_keys: Some(validator_keypairs),
|
||||||
|
skip_warmup_slots: true,
|
||||||
|
..ClusterConfig::default()
|
||||||
|
},
|
||||||
|
SocketAddrSpace::Unspecified,
|
||||||
|
);
|
||||||
|
|
||||||
|
let target_ledger_path = cluster.ledger_path(&target_pubkey);
|
||||||
|
|
||||||
|
// Wait for us to vote past duplicate slot
|
||||||
|
let timer = Instant::now();
|
||||||
|
loop {
|
||||||
|
if let Some(slot) =
|
||||||
|
wait_for_last_vote_in_tower_to_land_in_ledger(&target_ledger_path, &target_pubkey)
|
||||||
|
{
|
||||||
|
if slot > dup_slot {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
timer.elapsed() < Duration::from_secs(30),
|
||||||
|
"Did not make more than 10 blocks in 30 seconds"
|
||||||
|
);
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send duplicate
|
||||||
|
let parent = {
|
||||||
|
let blockstore = open_blockstore(&target_ledger_path);
|
||||||
|
let parent = blockstore
|
||||||
|
.meta(dup_slot)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap()
|
||||||
|
.parent_slot
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let entries = create_ticks(
|
||||||
|
64 * (std::cmp::max(1, dup_slot - parent)),
|
||||||
|
0,
|
||||||
|
cluster.genesis_config.hash(),
|
||||||
|
);
|
||||||
|
let last_hash = entries.last().unwrap().hash;
|
||||||
|
let version = solana_sdk::shred_version::version_from_hash(&last_hash);
|
||||||
|
let dup_shreds = Shredder::new(dup_slot, parent, 0, version)
|
||||||
|
.unwrap()
|
||||||
|
.entries_to_shreds(
|
||||||
|
&majority_keypair,
|
||||||
|
&entries,
|
||||||
|
true, // is_full_slot
|
||||||
|
0, // next_shred_index,
|
||||||
|
0, // next_code_index
|
||||||
|
false, // merkle_variant
|
||||||
|
&ReedSolomonCache::default(),
|
||||||
|
&mut ProcessShredsStats::default(),
|
||||||
|
)
|
||||||
|
.0;
|
||||||
|
|
||||||
|
info!("Sending duplicate shreds for {dup_slot}");
|
||||||
|
cluster.send_shreds_to_validator(dup_shreds.iter().collect(), &target_pubkey);
|
||||||
|
wait_for_duplicate_proof(&target_ledger_path, dup_slot)
|
||||||
|
.expect("Duplicate proof for {dup_slot} not found");
|
||||||
|
parent
|
||||||
|
};
|
||||||
|
|
||||||
|
info!("Duplicate proof for {dup_slot} has landed, restarting node");
|
||||||
|
let info = cluster.exit_node(&target_pubkey);
|
||||||
|
|
||||||
|
{
|
||||||
|
let blockstore = open_blockstore(&target_ledger_path);
|
||||||
|
purge_slots_with_count(&blockstore, dup_slot + 5, 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restart, should create an entirely new fork
|
||||||
|
cluster.restart_node(&target_pubkey, info, SocketAddrSpace::Unspecified);
|
||||||
|
|
||||||
|
info!("Waiting for fork built off {parent}");
|
||||||
|
let timer = Instant::now();
|
||||||
|
let mut checked_children: HashSet<Slot> = HashSet::default();
|
||||||
|
let mut done = false;
|
||||||
|
while !done {
|
||||||
|
let blockstore = open_blockstore(&target_ledger_path);
|
||||||
|
let parent_meta = blockstore.meta(parent).unwrap().expect("Meta must exist");
|
||||||
|
for child in parent_meta.next_slots {
|
||||||
|
if checked_children.contains(&child) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if blockstore.is_full(child) {
|
||||||
|
let shreds = blockstore
|
||||||
|
.get_data_shreds_for_slot(child, 0)
|
||||||
|
.expect("Child is full");
|
||||||
|
let mut is_our_block = true;
|
||||||
|
for shred in shreds {
|
||||||
|
is_our_block &= shred.verify(&target_pubkey);
|
||||||
|
}
|
||||||
|
if is_our_block {
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
checked_children.insert(child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
timer.elapsed() < Duration::from_secs(30),
|
||||||
|
"Did not create a new fork off parent {parent} in 30 seconds after restart"
|
||||||
|
);
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue