validator: remove optional remote accounts hash consistency check (#31279)

This commit is contained in:
Trent Nelson 2023-05-16 14:23:13 -06:00 committed by GitHub
parent a9b19f5b14
commit ad67fd5be5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 11 additions and 252 deletions

View File

@ -1,8 +1,7 @@
// Service to verify accounts hashes with other known validator nodes.
//
// Each interval, publish the snapshot hash which is the full accounts state
// hash on gossip. Monitor gossip for messages from validators in the `--known-validator`s
// set and halt the node if a mismatch is detected.
// hash on gossip.
use {
crossbeam_channel::{Receiver, Sender},
@ -26,10 +25,8 @@ use {
solana_sdk::{
clock::{Slot, DEFAULT_MS_PER_SLOT},
hash::Hash,
pubkey::Pubkey,
},
std::{
collections::{HashMap, HashSet},
sync::{
atomic::{AtomicBool, Ordering},
Arc,
@ -52,8 +49,6 @@ impl AccountsHashVerifier {
snapshot_package_sender: Option<Sender<SnapshotPackage>>,
exit: Arc<AtomicBool>,
cluster_info: Arc<ClusterInfo>,
known_validators: Option<HashSet<Pubkey>>,
halt_on_known_validators_accounts_hash_mismatch: bool,
accounts_hash_fault_injector: Option<AccountsHashFaultInjector>,
snapshot_config: SnapshotConfig,
) -> Self {
@ -92,11 +87,8 @@ impl AccountsHashVerifier {
let (_, handling_time_us) = measure_us!(Self::process_accounts_package(
accounts_package,
&cluster_info,
known_validators.as_ref(),
halt_on_known_validators_accounts_hash_mismatch,
snapshot_package_sender.as_ref(),
&mut hashes,
&exit,
&snapshot_config,
accounts_hash_fault_injector,
));
@ -212,11 +204,8 @@ impl AccountsHashVerifier {
fn process_accounts_package(
accounts_package: AccountsPackage,
cluster_info: &ClusterInfo,
known_validators: Option<&HashSet<Pubkey>>,
halt_on_known_validator_accounts_hash_mismatch: bool,
snapshot_package_sender: Option<&Sender<SnapshotPackage>>,
hashes: &mut Vec<(Slot, Hash)>,
exit: &AtomicBool,
snapshot_config: &SnapshotConfig,
accounts_hash_fault_injector: Option<AccountsHashFaultInjector>,
) {
@ -228,10 +217,7 @@ impl AccountsHashVerifier {
Self::push_accounts_hashes_to_cluster(
&accounts_package,
cluster_info,
known_validators,
halt_on_known_validator_accounts_hash_mismatch,
hashes,
exit,
accounts_hash,
accounts_hash_fault_injector,
);
@ -504,10 +490,7 @@ impl AccountsHashVerifier {
fn push_accounts_hashes_to_cluster(
accounts_package: &AccountsPackage,
cluster_info: &ClusterInfo,
known_validators: Option<&HashSet<Pubkey>>,
halt_on_known_validator_accounts_hash_mismatch: bool,
hashes: &mut Vec<(Slot, Hash)>,
exit: &AtomicBool,
accounts_hash: AccountsHashEnum,
accounts_hash_fault_injector: Option<AccountsHashFaultInjector>,
) {
@ -518,16 +501,6 @@ impl AccountsHashVerifier {
retain_max_n_elements(hashes, MAX_ACCOUNTS_HASHES);
if halt_on_known_validator_accounts_hash_mismatch {
let mut slot_to_hash = HashMap::new();
for (slot, hash) in hashes.iter() {
slot_to_hash.insert(*slot, *hash);
}
if Self::should_halt(cluster_info, known_validators, &mut slot_to_hash) {
exit.store(true, Ordering::Relaxed);
}
}
cluster_info.push_accounts_hashes(hashes.clone());
}
@ -555,52 +528,6 @@ impl AccountsHashVerifier {
.expect("send snapshot package");
}
fn should_halt(
cluster_info: &ClusterInfo,
known_validators: Option<&HashSet<Pubkey>>,
slot_to_hash: &mut HashMap<Slot, Hash>,
) -> bool {
let mut verified_count = 0;
let mut highest_slot = 0;
if let Some(known_validators) = known_validators {
for known_validator in known_validators {
let is_conflicting = cluster_info.get_accounts_hash_for_node(known_validator, |accounts_hashes|
{
accounts_hashes.iter().any(|(slot, hash)| {
if let Some(reference_hash) = slot_to_hash.get(slot) {
if *hash != *reference_hash {
error!("Fatal! Exiting! Known validator {} produced conflicting hashes for slot: {} ({} != {})",
known_validator,
slot,
hash,
reference_hash,
);
true
} else {
verified_count += 1;
false
}
} else {
highest_slot = std::cmp::max(*slot, highest_slot);
slot_to_hash.insert(*slot, *hash);
false
}
})
}).unwrap_or(false);
if is_conflicting {
return true;
}
}
}
datapoint_info!(
"accounts_hash_verifier",
("highest_slot_verified", highest_slot, i64),
("num_verified", verified_count, i64),
);
false
}
pub fn join(self) -> thread::Result<()> {
self.t_accounts_hash_verifier.join()
}
@ -611,10 +538,9 @@ mod tests {
use {
super::*,
rand::seq::SliceRandom,
solana_gossip::{cluster_info::make_accounts_hashes_message, contact_info::ContactInfo},
solana_gossip::contact_info::ContactInfo,
solana_runtime::snapshot_package::SnapshotType,
solana_sdk::{
hash::hash,
signature::{Keypair, Signer},
timing::timestamp,
},
@ -628,44 +554,12 @@ mod tests {
ClusterInfo::new(contact_info, keypair, SocketAddrSpace::Unspecified)
}
#[test]
fn test_should_halt() {
let cluster_info = new_test_cluster_info();
let cluster_info = Arc::new(cluster_info);
let mut known_validators = HashSet::new();
let mut slot_to_hash = HashMap::new();
assert!(!AccountsHashVerifier::should_halt(
&cluster_info,
Some(&known_validators),
&mut slot_to_hash,
));
let validator1 = Keypair::new();
let hash1 = hash(&[1]);
let hash2 = hash(&[2]);
{
let message = make_accounts_hashes_message(&validator1, vec![(0, hash1)]).unwrap();
cluster_info.push_message(message);
cluster_info.flush_push_queue();
}
slot_to_hash.insert(0, hash2);
known_validators.insert(validator1.pubkey());
assert!(AccountsHashVerifier::should_halt(
&cluster_info,
Some(&known_validators),
&mut slot_to_hash,
));
}
#[test]
fn test_max_hashes() {
solana_logger::setup();
let cluster_info = new_test_cluster_info();
let cluster_info = Arc::new(cluster_info);
let known_validators = HashSet::new();
let exit = Arc::new(AtomicBool::new(false));
let mut hashes = vec![];
let full_snapshot_archive_interval_slots = 100;
let snapshot_config = SnapshotConfig {
@ -685,11 +579,8 @@ mod tests {
AccountsHashVerifier::process_accounts_package(
accounts_package,
&cluster_info,
Some(&known_validators),
false,
None,
&mut hashes,
&exit,
&snapshot_config,
None,
);

View File

@ -208,7 +208,6 @@ pub struct ValidatorConfig {
pub repair_validators: Option<HashSet<Pubkey>>, // None = repair from all
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>, // Empty = repair with all
pub gossip_validators: Option<HashSet<Pubkey>>, // None = gossip with all
pub halt_on_known_validators_accounts_hash_mismatch: bool,
pub accounts_hash_fault_injector: Option<AccountsHashFaultInjector>,
pub accounts_hash_interval_slots: u64,
pub max_genesis_archive_unpacked_size: u64,
@ -277,7 +276,6 @@ impl Default for ValidatorConfig {
repair_validators: None,
repair_whitelist: Arc::new(RwLock::new(HashSet::default())),
gossip_validators: None,
halt_on_known_validators_accounts_hash_mismatch: false,
accounts_hash_fault_injector: None,
accounts_hash_interval_slots: std::u64::MAX,
max_genesis_archive_unpacked_size: MAX_GENESIS_ARCHIVE_UNPACKED_SIZE,
@ -737,8 +735,6 @@ impl Validator {
snapshot_package_sender,
exit.clone(),
cluster_info.clone(),
config.known_validators.clone(),
config.halt_on_known_validators_accounts_hash_mismatch,
config.accounts_hash_fault_injector,
config.snapshot_config.clone(),
);

View File

@ -195,8 +195,6 @@ impl BackgroundServices {
exit.clone(),
cluster_info,
None,
false,
None,
snapshot_config.clone(),
);

View File

@ -1002,8 +1002,6 @@ fn test_snapshots_with_background_services(
exit.clone(),
cluster_info,
None,
false,
None,
snapshot_test_config.snapshot_config.clone(),
);

View File

@ -265,8 +265,6 @@ pub fn load_bank_forks(
exit.clone(),
cluster_info,
None,
false,
None,
SnapshotConfig::new_load_only(),
);
let (snapshot_request_sender, snapshot_request_receiver) = crossbeam_channel::unbounded();

View File

@ -30,8 +30,6 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
repair_validators: config.repair_validators.clone(),
repair_whitelist: config.repair_whitelist.clone(),
gossip_validators: config.gossip_validators.clone(),
halt_on_known_validators_accounts_hash_mismatch: config
.halt_on_known_validators_accounts_hash_mismatch,
accounts_hash_interval_slots: config.accounts_hash_interval_slots,
accounts_hash_fault_injector: config.accounts_hash_fault_injector,
max_genesis_archive_unpacked_size: config.max_genesis_archive_unpacked_size,

View File

@ -4,10 +4,8 @@
use {
common::*,
log::*,
rand::{thread_rng, Rng},
serial_test::serial,
solana_core::validator::ValidatorConfig,
solana_gossip::gossip_service::discover_cluster,
solana_ledger::{
ancestor_iterator::AncestorIterator, blockstore::Blockstore, leader_schedule::FixedSchedule,
},
@ -18,9 +16,8 @@ use {
validator_configs::*,
},
solana_sdk::{
client::SyncClient,
clock::Slot,
hash::{extend_and_hash, Hash},
hash::Hash,
poh_config::PohConfig,
signature::{Keypair, Signer},
},
@ -74,119 +71,6 @@ fn test_cluster_partition_1_1_1() {
)
}
#[test]
#[serial]
fn test_consistency_halt() {
solana_logger::setup_with_default(RUST_LOG_FILTER);
let snapshot_interval_slots = 20;
let num_account_paths = 1;
// Create cluster with a leader producing bad snapshot hashes.
let mut leader_snapshot_test_config =
setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
// Prepare fault hash injection for testing.
leader_snapshot_test_config
.validator_config
.accounts_hash_fault_injector = Some(|hash: &Hash, slot: Slot| {
const FAULT_INJECTION_RATE_SLOTS: u64 = 40; // Inject a fault hash every 40 slots
(slot % FAULT_INJECTION_RATE_SLOTS == 0).then(|| {
let rand = thread_rng().gen_range(0, 10);
let fault_hash = extend_and_hash(hash, &[rand]);
warn!("inserting fault at slot: {}", slot);
fault_hash
})
});
let validator_stake = DEFAULT_NODE_STAKE;
let mut config = ClusterConfig {
node_stakes: vec![validator_stake],
cluster_lamports: DEFAULT_CLUSTER_LAMPORTS,
validator_configs: vec![leader_snapshot_test_config.validator_config],
..ClusterConfig::default()
};
let mut cluster = LocalCluster::new(&mut config, SocketAddrSpace::Unspecified);
sleep(Duration::from_millis(5000));
let cluster_nodes = discover_cluster(
&cluster.entry_point_info.gossip().unwrap(),
1,
SocketAddrSpace::Unspecified,
)
.unwrap();
info!("num_nodes: {}", cluster_nodes.len());
// Add a validator with the leader as trusted, it should halt when it detects
// mismatch.
let mut validator_snapshot_test_config =
setup_snapshot_validator_config(snapshot_interval_slots, num_account_paths);
let mut known_validators = HashSet::new();
known_validators.insert(*cluster_nodes[0].pubkey());
validator_snapshot_test_config
.validator_config
.known_validators = Some(known_validators);
validator_snapshot_test_config
.validator_config
.halt_on_known_validators_accounts_hash_mismatch = true;
warn!("adding a validator");
cluster.add_validator(
&validator_snapshot_test_config.validator_config,
validator_stake,
Arc::new(Keypair::new()),
None,
SocketAddrSpace::Unspecified,
);
let num_nodes = 2;
assert_eq!(
discover_cluster(
&cluster.entry_point_info.gossip().unwrap(),
num_nodes,
SocketAddrSpace::Unspecified
)
.unwrap()
.len(),
num_nodes
);
// Check for only 1 node on the network.
let mut encountered_error = false;
loop {
let discover = discover_cluster(
&cluster.entry_point_info.gossip().unwrap(),
2,
SocketAddrSpace::Unspecified,
);
match discover {
Err(_) => {
encountered_error = true;
break;
}
Ok(nodes) => {
if nodes.len() < 2 {
encountered_error = true;
break;
}
info!("checking cluster for fewer nodes.. {:?}", nodes.len());
}
}
let client = cluster
.get_validator_client(cluster.entry_point_info.pubkey())
.unwrap();
if let Ok(slot) = client.get_slot() {
if slot > 210 {
break;
}
info!("slot: {}", slot);
}
sleep(Duration::from_millis(1000));
}
assert!(encountered_error);
}
// Cluster needs a supermajority to remain, so the minimum size for this test is 4
#[test]
#[serial]

View File

@ -1092,14 +1092,6 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
.multiple(true)
.help("Specify the configuration file for the Geyser plugin."),
)
.arg(
Arg::with_name("halt_on_known_validators_accounts_hash_mismatch")
.alias("halt-on-trusted-validators-accounts-hash-mismatch")
.long("halt-on-known-validators-accounts-hash-mismatch")
.requires("known_validators")
.takes_value(false)
.help("Abort the validator if a bank hash mismatch is detected within known validator set"),
)
.arg(
Arg::with_name("snapshot_archive_format")
.long("snapshot-archive-format")
@ -1732,6 +1724,14 @@ fn deprecated_arguments() -> Vec<DeprecatedArg> {
.long("enable-quic-servers"),
usage_warning: "The quic server is now enabled by default.",
);
add_arg!(
Arg::with_name("halt_on_known_validators_accounts_hash_mismatch")
.alias("halt-on-trusted-validators-accounts-hash-mismatch")
.long("halt-on-known-validators-accounts-hash-mismatch")
.requires("known_validators")
.takes_value(false)
.help("Abort the validator if a bank hash mismatch is detected within known validator set"),
);
add_arg!(Arg::with_name("incremental_snapshots")
.long("incremental-snapshots")
.takes_value(false)

View File

@ -1612,10 +1612,6 @@ pub fn main() {
),
};
if matches.is_present("halt_on_known_validators_accounts_hash_mismatch") {
validator_config.halt_on_known_validators_accounts_hash_mismatch = true;
}
let public_rpc_addr = matches.value_of("public_rpc_addr").map(|addr| {
solana_net_utils::parse_host_port(addr).unwrap_or_else(|e| {
eprintln!("failed to parse public rpc address: {e}");