validator: remove optional remote accounts hash consistency check (#31279)

This commit is contained in:
Trent Nelson 2023-05-16 14:23:13 -06:00 committed by GitHub
parent a9b19f5b14
commit ad67fd5be5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 11 additions and 252 deletions

View File

@ -1,8 +1,7 @@
// Service to verify accounts hashes with other known validator nodes. // Service to verify accounts hashes with other known validator nodes.
// //
// Each interval, publish the snapshot hash which is the full accounts state // Each interval, publish the snapshot hash which is the full accounts state
// hash on gossip. Monitor gossip for messages from validators in the `--known-validator`s // hash on gossip.
// set and halt the node if a mismatch is detected.
use { use {
crossbeam_channel::{Receiver, Sender}, crossbeam_channel::{Receiver, Sender},
@ -26,10 +25,8 @@ use {
solana_sdk::{ solana_sdk::{
clock::{Slot, DEFAULT_MS_PER_SLOT}, clock::{Slot, DEFAULT_MS_PER_SLOT},
hash::Hash, hash::Hash,
pubkey::Pubkey,
}, },
std::{ std::{
collections::{HashMap, HashSet},
sync::{ sync::{
atomic::{AtomicBool, Ordering}, atomic::{AtomicBool, Ordering},
Arc, Arc,
@ -52,8 +49,6 @@ impl AccountsHashVerifier {
snapshot_package_sender: Option<Sender<SnapshotPackage>>, snapshot_package_sender: Option<Sender<SnapshotPackage>>,
exit: Arc<AtomicBool>, exit: Arc<AtomicBool>,
cluster_info: Arc<ClusterInfo>, cluster_info: Arc<ClusterInfo>,
known_validators: Option<HashSet<Pubkey>>,
halt_on_known_validators_accounts_hash_mismatch: bool,
accounts_hash_fault_injector: Option<AccountsHashFaultInjector>, accounts_hash_fault_injector: Option<AccountsHashFaultInjector>,
snapshot_config: SnapshotConfig, snapshot_config: SnapshotConfig,
) -> Self { ) -> Self {
@ -92,11 +87,8 @@ impl AccountsHashVerifier {
let (_, handling_time_us) = measure_us!(Self::process_accounts_package( let (_, handling_time_us) = measure_us!(Self::process_accounts_package(
accounts_package, accounts_package,
&cluster_info, &cluster_info,
known_validators.as_ref(),
halt_on_known_validators_accounts_hash_mismatch,
snapshot_package_sender.as_ref(), snapshot_package_sender.as_ref(),
&mut hashes, &mut hashes,
&exit,
&snapshot_config, &snapshot_config,
accounts_hash_fault_injector, accounts_hash_fault_injector,
)); ));
@ -212,11 +204,8 @@ impl AccountsHashVerifier {
fn process_accounts_package( fn process_accounts_package(
accounts_package: AccountsPackage, accounts_package: AccountsPackage,
cluster_info: &ClusterInfo, cluster_info: &ClusterInfo,
known_validators: Option<&HashSet<Pubkey>>,
halt_on_known_validator_accounts_hash_mismatch: bool,
snapshot_package_sender: Option<&Sender<SnapshotPackage>>, snapshot_package_sender: Option<&Sender<SnapshotPackage>>,
hashes: &mut Vec<(Slot, Hash)>, hashes: &mut Vec<(Slot, Hash)>,
exit: &AtomicBool,
snapshot_config: &SnapshotConfig, snapshot_config: &SnapshotConfig,
accounts_hash_fault_injector: Option<AccountsHashFaultInjector>, accounts_hash_fault_injector: Option<AccountsHashFaultInjector>,
) { ) {
@ -228,10 +217,7 @@ impl AccountsHashVerifier {
Self::push_accounts_hashes_to_cluster( Self::push_accounts_hashes_to_cluster(
&accounts_package, &accounts_package,
cluster_info, cluster_info,
known_validators,
halt_on_known_validator_accounts_hash_mismatch,
hashes, hashes,
exit,
accounts_hash, accounts_hash,
accounts_hash_fault_injector, accounts_hash_fault_injector,
); );
@ -504,10 +490,7 @@ impl AccountsHashVerifier {
fn push_accounts_hashes_to_cluster( fn push_accounts_hashes_to_cluster(
accounts_package: &AccountsPackage, accounts_package: &AccountsPackage,
cluster_info: &ClusterInfo, cluster_info: &ClusterInfo,
known_validators: Option<&HashSet<Pubkey>>,
halt_on_known_validator_accounts_hash_mismatch: bool,
hashes: &mut Vec<(Slot, Hash)>, hashes: &mut Vec<(Slot, Hash)>,
exit: &AtomicBool,
accounts_hash: AccountsHashEnum, accounts_hash: AccountsHashEnum,
accounts_hash_fault_injector: Option<AccountsHashFaultInjector>, accounts_hash_fault_injector: Option<AccountsHashFaultInjector>,
) { ) {
@ -518,16 +501,6 @@ impl AccountsHashVerifier {
retain_max_n_elements(hashes, MAX_ACCOUNTS_HASHES); retain_max_n_elements(hashes, MAX_ACCOUNTS_HASHES);
if halt_on_known_validator_accounts_hash_mismatch {
let mut slot_to_hash = HashMap::new();
for (slot, hash) in hashes.iter() {
slot_to_hash.insert(*slot, *hash);
}
if Self::should_halt(cluster_info, known_validators, &mut slot_to_hash) {
exit.store(true, Ordering::Relaxed);
}
}
cluster_info.push_accounts_hashes(hashes.clone()); cluster_info.push_accounts_hashes(hashes.clone());
} }
@ -555,52 +528,6 @@ impl AccountsHashVerifier {
.expect("send snapshot package"); .expect("send snapshot package");
} }
fn should_halt(
cluster_info: &ClusterInfo,
known_validators: Option<&HashSet<Pubkey>>,
slot_to_hash: &mut HashMap<Slot, Hash>,
) -> bool {
let mut verified_count = 0;
let mut highest_slot = 0;
if let Some(known_validators) = known_validators {
for known_validator in known_validators {
let is_conflicting = cluster_info.get_accounts_hash_for_node(known_validator, |accounts_hashes|
{
accounts_hashes.iter().any(|(slot, hash)| {
if let Some(reference_hash) = slot_to_hash.get(slot) {
if *hash != *reference_hash {
error!("Fatal! Exiting! Known validator {} produced conflicting hashes for slot: {} ({} != {})",
known_validator,
slot,
hash,
reference_hash,
);
true
} else {
verified_count += 1;
false
}
} else {
highest_slot = std::cmp::max(*slot, highest_slot);
slot_to_hash.insert(*slot, *hash);
false
}
})
}).unwrap_or(false);
if is_conflicting {
return true;
}
}
}
datapoint_info!(
"accounts_hash_verifier",
("highest_slot_verified", highest_slot, i64),
("num_verified", verified_count, i64),
);
false
}
pub fn join(self) -> thread::Result<()> { pub fn join(self) -> thread::Result<()> {
self.t_accounts_hash_verifier.join() self.t_accounts_hash_verifier.join()
} }
@ -611,10 +538,9 @@ mod tests {
use { use {
super::*, super::*,
rand::seq::SliceRandom, rand::seq::SliceRandom,
solana_gossip::{cluster_info::make_accounts_hashes_message, contact_info::ContactInfo}, solana_gossip::contact_info::ContactInfo,
solana_runtime::snapshot_package::SnapshotType, solana_runtime::snapshot_package::SnapshotType,
solana_sdk::{ solana_sdk::{
hash::hash,
signature::{Keypair, Signer}, signature::{Keypair, Signer},
timing::timestamp, timing::timestamp,
}, },
@ -628,44 +554,12 @@ mod tests {
ClusterInfo::new(contact_info, keypair, SocketAddrSpace::Unspecified) ClusterInfo::new(contact_info, keypair, SocketAddrSpace::Unspecified)
} }
#[test]
fn test_should_halt() {
let cluster_info = new_test_cluster_info();
let cluster_info = Arc::new(cluster_info);
let mut known_validators = HashSet::new();
let mut slot_to_hash = HashMap::new();
assert!(!AccountsHashVerifier::should_halt(
&cluster_info,
Some(&known_validators),
&mut slot_to_hash,
));
let validator1 = Keypair::new();
let hash1 = hash(&[1]);
let hash2 = hash(&[2]);
{
let message = make_accounts_hashes_message(&validator1, vec![(0, hash1)]).unwrap();
cluster_info.push_message(message);
cluster_info.flush_push_queue();
}
slot_to_hash.insert(0, hash2);
known_validators.insert(validator1.pubkey());
assert!(AccountsHashVerifier::should_halt(
&cluster_info,
Some(&known_validators),
&mut slot_to_hash,
));
}
#[test] #[test]
fn test_max_hashes() { fn test_max_hashes() {
solana_logger::setup(); solana_logger::setup();
let cluster_info = new_test_cluster_info(); let cluster_info = new_test_cluster_info();
let cluster_info = Arc::new(cluster_info); let cluster_info = Arc::new(cluster_info);
let known_validators = HashSet::new();
let exit = Arc::new(AtomicBool::new(false));
let mut hashes = vec![]; let mut hashes = vec![];
let full_snapshot_archive_interval_slots = 100; let full_snapshot_archive_interval_slots = 100;
let snapshot_config = SnapshotConfig { let snapshot_config = SnapshotConfig {
@ -685,11 +579,8 @@ mod tests {
AccountsHashVerifier::process_accounts_package( AccountsHashVerifier::process_accounts_package(
accounts_package, accounts_package,
&cluster_info, &cluster_info,
Some(&known_validators),
false,
None, None,
&mut hashes, &mut hashes,
&exit,
&snapshot_config, &snapshot_config,
None, None,
); );

View File

@ -208,7 +208,6 @@ pub struct ValidatorConfig {
pub repair_validators: Option<HashSet<Pubkey>>, // None = repair from all pub repair_validators: Option<HashSet<Pubkey>>, // None = repair from all
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>, // Empty = repair with all pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>, // Empty = repair with all
pub gossip_validators: Option<HashSet<Pubkey>>, // None = gossip with all pub gossip_validators: Option<HashSet<Pubkey>>, // None = gossip with all
pub halt_on_known_validators_accounts_hash_mismatch: bool,
pub accounts_hash_fault_injector: Option<AccountsHashFaultInjector>, pub accounts_hash_fault_injector: Option<AccountsHashFaultInjector>,
pub accounts_hash_interval_slots: u64, pub accounts_hash_interval_slots: u64,
pub max_genesis_archive_unpacked_size: u64, pub max_genesis_archive_unpacked_size: u64,
@ -277,7 +276,6 @@ impl Default for ValidatorConfig {
repair_validators: None, repair_validators: None,
repair_whitelist: Arc::new(RwLock::new(HashSet::default())), repair_whitelist: Arc::new(RwLock::new(HashSet::default())),
gossip_validators: None, gossip_validators: None,
halt_on_known_validators_accounts_hash_mismatch: false,
accounts_hash_fault_injector: None, accounts_hash_fault_injector: None,
accounts_hash_interval_slots: std::u64::MAX, accounts_hash_interval_slots: std::u64::MAX,
max_genesis_archive_unpacked_size: MAX_GENESIS_ARCHIVE_UNPACKED_SIZE, max_genesis_archive_unpacked_size: MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
@ -737,8 +735,6 @@ impl Validator {
snapshot_package_sender, snapshot_package_sender,
exit.clone(), exit.clone(),
cluster_info.clone(), cluster_info.clone(),
config.known_validators.clone(),
config.halt_on_known_validators_accounts_hash_mismatch,
config.accounts_hash_fault_injector, config.accounts_hash_fault_injector,
config.snapshot_config.clone(), config.snapshot_config.clone(),
); );

View File

@ -195,8 +195,6 @@ impl BackgroundServices {
exit.clone(), exit.clone(),
cluster_info, cluster_info,
None, None,
false,
None,
snapshot_config.clone(), snapshot_config.clone(),
); );

View File

@ -1002,8 +1002,6 @@ fn test_snapshots_with_background_services(
exit.clone(), exit.clone(),
cluster_info, cluster_info,
None, None,
false,
None,
snapshot_test_config.snapshot_config.clone(), snapshot_test_config.snapshot_config.clone(),
); );

View File

@ -265,8 +265,6 @@ pub fn load_bank_forks(
exit.clone(), exit.clone(),
cluster_info, cluster_info,
None, None,
false,
None,
SnapshotConfig::new_load_only(), SnapshotConfig::new_load_only(),
); );
let (snapshot_request_sender, snapshot_request_receiver) = crossbeam_channel::unbounded(); let (snapshot_request_sender, snapshot_request_receiver) = crossbeam_channel::unbounded();

View File

@ -30,8 +30,6 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
repair_validators: config.repair_validators.clone(), repair_validators: config.repair_validators.clone(),
repair_whitelist: config.repair_whitelist.clone(), repair_whitelist: config.repair_whitelist.clone(),
gossip_validators: config.gossip_validators.clone(), gossip_validators: config.gossip_validators.clone(),
halt_on_known_validators_accounts_hash_mismatch: config
.halt_on_known_validators_accounts_hash_mismatch,
accounts_hash_interval_slots: config.accounts_hash_interval_slots, accounts_hash_interval_slots: config.accounts_hash_interval_slots,
accounts_hash_fault_injector: config.accounts_hash_fault_injector, accounts_hash_fault_injector: config.accounts_hash_fault_injector,
max_genesis_archive_unpacked_size: config.max_genesis_archive_unpacked_size, max_genesis_archive_unpacked_size: config.max_genesis_archive_unpacked_size,

View File

@ -4,10 +4,8 @@
use { use {
common::*, common::*,
log::*, log::*,
rand::{thread_rng, Rng},
serial_test::serial, serial_test::serial,
solana_core::validator::ValidatorConfig, solana_core::validator::ValidatorConfig,
solana_gossip::gossip_service::discover_cluster,
solana_ledger::{ solana_ledger::{
ancestor_iterator::AncestorIterator, blockstore::Blockstore, leader_schedule::FixedSchedule, ancestor_iterator::AncestorIterator, blockstore::Blockstore, leader_schedule::FixedSchedule,
}, },
@ -18,9 +16,8 @@ use {
validator_configs::*, validator_configs::*,
}, },
solana_sdk::{ solana_sdk::{
client::SyncClient,
clock::Slot, clock::Slot,
hash::{extend_and_hash, Hash}, hash::Hash,
poh_config::PohConfig, poh_config::PohConfig,
signature::{Keypair, Signer}, signature::{Keypair, Signer},
}, },
@ -74,119 +71,6 @@ fn test_cluster_partition_1_1_1() {
) )
} }
#[test]
#[serial]
fn test_consistency_halt() {
solana_logger::setup_with_default(RUST_LOG_FILTER);
let snapshot_interval_slots = 20;
let num_account_paths = 1;
// Create cluster with a leader producing bad snapshot hashes.
let mut leader_snapshot_test_config =
setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
// Prepare fault hash injection for testing.
leader_snapshot_test_config
.validator_config
.accounts_hash_fault_injector = Some(|hash: &Hash, slot: Slot| {
const FAULT_INJECTION_RATE_SLOTS: u64 = 40; // Inject a fault hash every 40 slots
(slot % FAULT_INJECTION_RATE_SLOTS == 0).then(|| {
let rand = thread_rng().gen_range(0, 10);
let fault_hash = extend_and_hash(hash, &[rand]);
warn!("inserting fault at slot: {}", slot);
fault_hash
})
});
let validator_stake = DEFAULT_NODE_STAKE;
let mut config = ClusterConfig {
node_stakes: vec![validator_stake],
cluster_lamports: DEFAULT_CLUSTER_LAMPORTS,
validator_configs: vec![leader_snapshot_test_config.validator_config],
..ClusterConfig::default()
};
let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
sleep(Duration::from_millis(5000));
let cluster_nodes = discover_cluster(
&cluster.entry_point_info.gossip().unwrap(),
1,
SocketAddrSpace::Unspecified,
)
.unwrap();
info!("num_nodes: {}", cluster_nodes.len());
// Add a validator with the leader as trusted, it should halt when it detects
// mismatch.
let mut validator_snapshot_test_config =
setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
let mut known_validators = HashSet::new();
known_validators.insert(*cluster_nodes[0].pubkey());
validator_snapshot_test_config
.validator_config
.known_validators = Some(known_validators);
validator_snapshot_test_config
.validator_config
.halt_on_known_validators_accounts_hash_mismatch = true;
warn!("adding a validator");
cluster.add_validator(
&validator_snapshot_test_config.validator_config,
validator_stake,
Arc::new(Keypair::new()),
None,
SocketAddrSpace::Unspecified,
);
let num_nodes = 2;
assert_eq!(
discover_cluster(
&cluster.entry_point_info.gossip().unwrap(),
num_nodes,
SocketAddrSpace::Unspecified
)
.unwrap()
.len(),
num_nodes
);
// Check for only 1 node on the network.
let mut encountered_error = false;
loop {
let discover = discover_cluster(
&cluster.entry_point_info.gossip().unwrap(),
2,
SocketAddrSpace::Unspecified,
);
match discover {
Err(_) => {
encountered_error = true;
break;
}
Ok(nodes) => {
if nodes.len() < 2 {
encountered_error = true;
break;
}
info!("checking cluster for fewer nodes.. {:?}", nodes.len());
}
}
let client = cluster
.get_validator_client(cluster.entry_point_info.pubkey())
.unwrap();
if let Ok(slot) = client.get_slot() {
if slot > 210 {
break;
}
info!("slot: {}", slot);
}
sleep(Duration::from_millis(1000));
}
assert!(encountered_error);
}
// Cluster needs a supermajority to remain, so the minimum size for this test is 4 // Cluster needs a supermajority to remain, so the minimum size for this test is 4
#[test] #[test]
#[serial] #[serial]

View File

@ -1092,14 +1092,6 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
.multiple(true) .multiple(true)
.help("Specify the configuration file for the Geyser plugin."), .help("Specify the configuration file for the Geyser plugin."),
) )
.arg(
Arg::with_name("halt_on_known_validators_accounts_hash_mismatch")
.alias("halt-on-trusted-validators-accounts-hash-mismatch")
.long("halt-on-known-validators-accounts-hash-mismatch")
.requires("known_validators")
.takes_value(false)
.help("Abort the validator if a bank hash mismatch is detected within known validator set"),
)
.arg( .arg(
Arg::with_name("snapshot_archive_format") Arg::with_name("snapshot_archive_format")
.long("snapshot-archive-format") .long("snapshot-archive-format")
@ -1732,6 +1724,14 @@ fn deprecated_arguments() -> Vec<DeprecatedArg> {
.long("enable-quic-servers"), .long("enable-quic-servers"),
usage_warning: "The quic server is now enabled by default.", usage_warning: "The quic server is now enabled by default.",
); );
add_arg!(
Arg::with_name("halt_on_known_validators_accounts_hash_mismatch")
.alias("halt-on-trusted-validators-accounts-hash-mismatch")
.long("halt-on-known-validators-accounts-hash-mismatch")
.requires("known_validators")
.takes_value(false)
.help("Abort the validator if a bank hash mismatch is detected within known validator set"),
);
add_arg!(Arg::with_name("incremental_snapshots") add_arg!(Arg::with_name("incremental_snapshots")
.long("incremental-snapshots") .long("incremental-snapshots")
.takes_value(false) .takes_value(false)

View File

@ -1612,10 +1612,6 @@ pub fn main() {
), ),
}; };
if matches.is_present("halt_on_known_validators_accounts_hash_mismatch") {
validator_config.halt_on_known_validators_accounts_hash_mismatch = true;
}
let public_rpc_addr = matches.value_of("public_rpc_addr").map(|addr| { let public_rpc_addr = matches.value_of("public_rpc_addr").map(|addr| {
solana_net_utils::parse_host_port(addr).unwrap_or_else(|e| { solana_net_utils::parse_host_port(addr).unwrap_or_else(|e| {
eprintln!("failed to parse public rpc address: {e}"); eprintln!("failed to parse public rpc address: {e}");