add support for a repair protocol whitelist (#29161)
This commit is contained in:
parent
acb7eb226b
commit
a44ea779bd
|
@ -523,6 +523,7 @@ impl AncestorHashesService {
|
||||||
let serve_repair = ServeRepair::new(
|
let serve_repair = ServeRepair::new(
|
||||||
repair_info.cluster_info.clone(),
|
repair_info.cluster_info.clone(),
|
||||||
repair_info.bank_forks.clone(),
|
repair_info.bank_forks.clone(),
|
||||||
|
repair_info.repair_whitelist.clone(),
|
||||||
);
|
);
|
||||||
let mut repair_stats = AncestorRepairRequestsStats::default();
|
let mut repair_stats = AncestorRepairRequestsStats::default();
|
||||||
|
|
||||||
|
@ -969,8 +970,11 @@ mod test {
|
||||||
Arc::new(keypair),
|
Arc::new(keypair),
|
||||||
SocketAddrSpace::Unspecified,
|
SocketAddrSpace::Unspecified,
|
||||||
);
|
);
|
||||||
let responder_serve_repair =
|
let responder_serve_repair = ServeRepair::new(
|
||||||
ServeRepair::new(Arc::new(cluster_info), vote_simulator.bank_forks);
|
Arc::new(cluster_info),
|
||||||
|
vote_simulator.bank_forks,
|
||||||
|
Arc::<RwLock<HashSet<_>>>::default(), // repair whitelist
|
||||||
|
);
|
||||||
|
|
||||||
// Set up thread to give us responses
|
// Set up thread to give us responses
|
||||||
let ledger_path = get_tmp_ledger_path!();
|
let ledger_path = get_tmp_ledger_path!();
|
||||||
|
@ -1054,8 +1058,12 @@ mod test {
|
||||||
Arc::new(keypair),
|
Arc::new(keypair),
|
||||||
SocketAddrSpace::Unspecified,
|
SocketAddrSpace::Unspecified,
|
||||||
));
|
));
|
||||||
let requester_serve_repair =
|
let repair_whitelist = Arc::new(RwLock::new(HashSet::default()));
|
||||||
ServeRepair::new(requester_cluster_info.clone(), bank_forks.clone());
|
let requester_serve_repair = ServeRepair::new(
|
||||||
|
requester_cluster_info.clone(),
|
||||||
|
bank_forks.clone(),
|
||||||
|
repair_whitelist.clone(),
|
||||||
|
);
|
||||||
let (duplicate_slots_reset_sender, _duplicate_slots_reset_receiver) = unbounded();
|
let (duplicate_slots_reset_sender, _duplicate_slots_reset_receiver) = unbounded();
|
||||||
let repair_info = RepairInfo {
|
let repair_info = RepairInfo {
|
||||||
bank_forks,
|
bank_forks,
|
||||||
|
@ -1064,6 +1072,7 @@ mod test {
|
||||||
epoch_schedule,
|
epoch_schedule,
|
||||||
duplicate_slots_reset_sender,
|
duplicate_slots_reset_sender,
|
||||||
repair_validators: None,
|
repair_validators: None,
|
||||||
|
repair_whitelist,
|
||||||
};
|
};
|
||||||
|
|
||||||
let (ancestor_hashes_replay_update_sender, ancestor_hashes_replay_update_receiver) =
|
let (ancestor_hashes_replay_update_sender, ancestor_hashes_replay_update_receiver) =
|
||||||
|
|
|
@ -173,7 +173,10 @@ pub struct RepairInfo {
|
||||||
pub cluster_slots: Arc<ClusterSlots>,
|
pub cluster_slots: Arc<ClusterSlots>,
|
||||||
pub epoch_schedule: EpochSchedule,
|
pub epoch_schedule: EpochSchedule,
|
||||||
pub duplicate_slots_reset_sender: DuplicateSlotsResetSender,
|
pub duplicate_slots_reset_sender: DuplicateSlotsResetSender,
|
||||||
|
// Validators from which repairs are requested
|
||||||
pub repair_validators: Option<HashSet<Pubkey>>,
|
pub repair_validators: Option<HashSet<Pubkey>>,
|
||||||
|
// Validators which should be given priority when serving
|
||||||
|
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct RepairSlotRange {
|
pub struct RepairSlotRange {
|
||||||
|
@ -251,6 +254,7 @@ impl RepairService {
|
||||||
let serve_repair = ServeRepair::new(
|
let serve_repair = ServeRepair::new(
|
||||||
repair_info.cluster_info.clone(),
|
repair_info.cluster_info.clone(),
|
||||||
repair_info.bank_forks.clone(),
|
repair_info.bank_forks.clone(),
|
||||||
|
repair_info.repair_whitelist.clone(),
|
||||||
);
|
);
|
||||||
let id = repair_info.cluster_info.id();
|
let id = repair_info.cluster_info.id();
|
||||||
let mut repair_stats = RepairStats::default();
|
let mut repair_stats = RepairStats::default();
|
||||||
|
@ -1084,7 +1088,11 @@ mod test {
|
||||||
let cluster_slots = ClusterSlots::default();
|
let cluster_slots = ClusterSlots::default();
|
||||||
let cluster_info = Arc::new(new_test_cluster_info(Node::new_localhost().info));
|
let cluster_info = Arc::new(new_test_cluster_info(Node::new_localhost().info));
|
||||||
let identity_keypair = cluster_info.keypair().clone();
|
let identity_keypair = cluster_info.keypair().clone();
|
||||||
let serve_repair = ServeRepair::new(cluster_info, bank_forks);
|
let serve_repair = ServeRepair::new(
|
||||||
|
cluster_info,
|
||||||
|
bank_forks,
|
||||||
|
Arc::new(RwLock::new(HashSet::default())),
|
||||||
|
);
|
||||||
let mut duplicate_slot_repair_statuses = HashMap::new();
|
let mut duplicate_slot_repair_statuses = HashMap::new();
|
||||||
let dead_slot = 9;
|
let dead_slot = 9;
|
||||||
let receive_socket = &UdpSocket::bind("0.0.0.0:0").unwrap();
|
let receive_socket = &UdpSocket::bind("0.0.0.0:0").unwrap();
|
||||||
|
@ -1179,7 +1187,11 @@ mod test {
|
||||||
UdpSocket::bind("0.0.0.0:0").unwrap().local_addr().unwrap(),
|
UdpSocket::bind("0.0.0.0:0").unwrap().local_addr().unwrap(),
|
||||||
));
|
));
|
||||||
let cluster_info = Arc::new(new_test_cluster_info(Node::new_localhost().info));
|
let cluster_info = Arc::new(new_test_cluster_info(Node::new_localhost().info));
|
||||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks);
|
let serve_repair = ServeRepair::new(
|
||||||
|
cluster_info.clone(),
|
||||||
|
bank_forks,
|
||||||
|
Arc::new(RwLock::new(HashSet::default())),
|
||||||
|
);
|
||||||
let valid_repair_peer = Node::new_localhost().info;
|
let valid_repair_peer = Node::new_localhost().info;
|
||||||
|
|
||||||
// Signal that this peer has confirmed the dead slot, and is thus
|
// Signal that this peer has confirmed the dead slot, and is thus
|
||||||
|
|
|
@ -159,6 +159,7 @@ struct ServeRepairStats {
|
||||||
dropped_requests_outbound_bandwidth: usize,
|
dropped_requests_outbound_bandwidth: usize,
|
||||||
dropped_requests_load_shed: usize,
|
dropped_requests_load_shed: usize,
|
||||||
dropped_requests_low_stake: usize,
|
dropped_requests_low_stake: usize,
|
||||||
|
whitelisted_requests: usize,
|
||||||
total_dropped_response_packets: usize,
|
total_dropped_response_packets: usize,
|
||||||
total_response_packets: usize,
|
total_response_packets: usize,
|
||||||
total_response_bytes_staked: usize,
|
total_response_bytes_staked: usize,
|
||||||
|
@ -281,6 +282,7 @@ impl RepairProtocol {
|
||||||
pub struct ServeRepair {
|
pub struct ServeRepair {
|
||||||
cluster_info: Arc<ClusterInfo>,
|
cluster_info: Arc<ClusterInfo>,
|
||||||
bank_forks: Arc<RwLock<BankForks>>,
|
bank_forks: Arc<RwLock<BankForks>>,
|
||||||
|
repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cache entry for repair peers for a slot.
|
// Cache entry for repair peers for a slot.
|
||||||
|
@ -316,11 +318,23 @@ impl RepairPeers {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct RepairRequestWithMeta {
|
||||||
|
request: RepairProtocol,
|
||||||
|
from_addr: SocketAddr,
|
||||||
|
stake: u64,
|
||||||
|
whitelisted: bool,
|
||||||
|
}
|
||||||
|
|
||||||
impl ServeRepair {
|
impl ServeRepair {
|
||||||
pub fn new(cluster_info: Arc<ClusterInfo>, bank_forks: Arc<RwLock<BankForks>>) -> Self {
|
pub fn new(
|
||||||
|
cluster_info: Arc<ClusterInfo>,
|
||||||
|
bank_forks: Arc<RwLock<BankForks>>,
|
||||||
|
repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||||
|
) -> Self {
|
||||||
Self {
|
Self {
|
||||||
cluster_info,
|
cluster_info,
|
||||||
bank_forks,
|
bank_forks,
|
||||||
|
repair_whitelist,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -456,7 +470,11 @@ impl ServeRepair {
|
||||||
let my_id = identity_keypair.pubkey();
|
let my_id = identity_keypair.pubkey();
|
||||||
|
|
||||||
let max_buffered_packets = if root_bank.cluster_type() != ClusterType::MainnetBeta {
|
let max_buffered_packets = if root_bank.cluster_type() != ClusterType::MainnetBeta {
|
||||||
|
if self.repair_whitelist.read().unwrap().len() > 0 {
|
||||||
|
4 * MAX_REQUESTS_PER_ITERATION
|
||||||
|
} else {
|
||||||
2 * MAX_REQUESTS_PER_ITERATION
|
2 * MAX_REQUESTS_PER_ITERATION
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
MAX_REQUESTS_PER_ITERATION
|
MAX_REQUESTS_PER_ITERATION
|
||||||
};
|
};
|
||||||
|
@ -475,7 +493,10 @@ impl ServeRepair {
|
||||||
stats.total_requests += total_requests;
|
stats.total_requests += total_requests;
|
||||||
|
|
||||||
let decode_start = Instant::now();
|
let decode_start = Instant::now();
|
||||||
let mut decoded_reqs = Vec::default();
|
let mut decoded_requests = Vec::default();
|
||||||
|
let mut whitelisted_request_count: usize = 0;
|
||||||
|
{
|
||||||
|
let whitelist = self.repair_whitelist.read().unwrap();
|
||||||
for packet in reqs_v.iter().flatten() {
|
for packet in reqs_v.iter().flatten() {
|
||||||
let request: RepairProtocol = match packet.deserialize_slice(..) {
|
let request: RepairProtocol = match packet.deserialize_slice(..) {
|
||||||
Ok(request) => request,
|
Ok(request) => request,
|
||||||
|
@ -512,21 +533,34 @@ impl ServeRepair {
|
||||||
} else {
|
} else {
|
||||||
stats.handle_requests_staked += 1;
|
stats.handle_requests_staked += 1;
|
||||||
}
|
}
|
||||||
decoded_reqs.push((request, from_addr, *stake));
|
|
||||||
|
let whitelisted = whitelist.contains(request.sender());
|
||||||
|
if whitelisted {
|
||||||
|
whitelisted_request_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
decoded_requests.push(RepairRequestWithMeta {
|
||||||
|
request,
|
||||||
|
from_addr,
|
||||||
|
stake: *stake,
|
||||||
|
whitelisted,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
stats.decode_time_us += decode_start.elapsed().as_micros() as u64;
|
stats.decode_time_us += decode_start.elapsed().as_micros() as u64;
|
||||||
|
stats.whitelisted_requests += whitelisted_request_count.min(MAX_REQUESTS_PER_ITERATION);
|
||||||
|
|
||||||
if decoded_reqs.len() > MAX_REQUESTS_PER_ITERATION {
|
if decoded_requests.len() > MAX_REQUESTS_PER_ITERATION {
|
||||||
stats.dropped_requests_low_stake += decoded_reqs.len() - MAX_REQUESTS_PER_ITERATION;
|
stats.dropped_requests_low_stake += decoded_requests.len() - MAX_REQUESTS_PER_ITERATION;
|
||||||
decoded_reqs.sort_unstable_by_key(|(_, _, stake)| Reverse(*stake));
|
decoded_requests.sort_unstable_by_key(|r| Reverse((r.whitelisted, r.stake)));
|
||||||
decoded_reqs.truncate(MAX_REQUESTS_PER_ITERATION);
|
decoded_requests.truncate(MAX_REQUESTS_PER_ITERATION);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.handle_packets(
|
self.handle_packets(
|
||||||
ping_cache,
|
ping_cache,
|
||||||
recycler,
|
recycler,
|
||||||
blockstore,
|
blockstore,
|
||||||
decoded_reqs,
|
decoded_requests,
|
||||||
response_sender,
|
response_sender,
|
||||||
stats,
|
stats,
|
||||||
data_budget,
|
data_budget,
|
||||||
|
@ -564,6 +598,7 @@ impl ServeRepair {
|
||||||
stats.dropped_requests_low_stake,
|
stats.dropped_requests_low_stake,
|
||||||
i64
|
i64
|
||||||
),
|
),
|
||||||
|
("whitelisted_requests", stats.whitelisted_requests, i64),
|
||||||
(
|
(
|
||||||
"total_dropped_response_packets",
|
"total_dropped_response_packets",
|
||||||
stats.total_dropped_response_packets,
|
stats.total_dropped_response_packets,
|
||||||
|
@ -778,7 +813,7 @@ impl ServeRepair {
|
||||||
ping_cache: &mut PingCache,
|
ping_cache: &mut PingCache,
|
||||||
recycler: &PacketBatchRecycler,
|
recycler: &PacketBatchRecycler,
|
||||||
blockstore: &Blockstore,
|
blockstore: &Blockstore,
|
||||||
requests: Vec<(RepairProtocol, SocketAddr, /*stake*/ u64)>,
|
requests: Vec<RepairRequestWithMeta>,
|
||||||
response_sender: &PacketBatchSender,
|
response_sender: &PacketBatchSender,
|
||||||
stats: &mut ServeRepairStats,
|
stats: &mut ServeRepairStats,
|
||||||
data_budget: &DataBudget,
|
data_budget: &DataBudget,
|
||||||
|
@ -787,7 +822,16 @@ impl ServeRepair {
|
||||||
let mut pending_pings = Vec::default();
|
let mut pending_pings = Vec::default();
|
||||||
|
|
||||||
let requests_len = requests.len();
|
let requests_len = requests.len();
|
||||||
for (i, (request, from_addr, stake)) in requests.into_iter().enumerate() {
|
for (
|
||||||
|
i,
|
||||||
|
RepairRequestWithMeta {
|
||||||
|
request,
|
||||||
|
from_addr,
|
||||||
|
stake,
|
||||||
|
..
|
||||||
|
},
|
||||||
|
) in requests.into_iter().enumerate()
|
||||||
|
{
|
||||||
if !matches!(&request, RepairProtocol::Pong(_)) {
|
if !matches!(&request, RepairProtocol::Pong(_)) {
|
||||||
let (check, ping_pkt) =
|
let (check, ping_pkt) =
|
||||||
Self::check_ping_cache(ping_cache, &request, &from_addr, &identity_keypair);
|
Self::check_ping_cache(ping_cache, &request, &from_addr, &identity_keypair);
|
||||||
|
@ -1246,7 +1290,11 @@ mod tests {
|
||||||
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
||||||
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
||||||
let cluster_info = Arc::new(new_test_cluster_info(me));
|
let cluster_info = Arc::new(new_test_cluster_info(me));
|
||||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks);
|
let serve_repair = ServeRepair::new(
|
||||||
|
cluster_info.clone(),
|
||||||
|
bank_forks,
|
||||||
|
Arc::new(RwLock::new(HashSet::default())),
|
||||||
|
);
|
||||||
let keypair = cluster_info.keypair().clone();
|
let keypair = cluster_info.keypair().clone();
|
||||||
let repair_peer_id = solana_sdk::pubkey::new_rand();
|
let repair_peer_id = solana_sdk::pubkey::new_rand();
|
||||||
let repair_request = ShredRepairType::Orphan(123);
|
let repair_request = ShredRepairType::Orphan(123);
|
||||||
|
@ -1292,7 +1340,11 @@ mod tests {
|
||||||
let mut bank = Bank::new_for_tests(&genesis_config);
|
let mut bank = Bank::new_for_tests(&genesis_config);
|
||||||
bank.feature_set = Arc::new(FeatureSet::all_enabled());
|
bank.feature_set = Arc::new(FeatureSet::all_enabled());
|
||||||
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
||||||
let serve_repair = ServeRepair::new(cluster_info, bank_forks);
|
let serve_repair = ServeRepair::new(
|
||||||
|
cluster_info,
|
||||||
|
bank_forks,
|
||||||
|
Arc::new(RwLock::new(HashSet::default())),
|
||||||
|
);
|
||||||
|
|
||||||
let request_bytes = serve_repair
|
let request_bytes = serve_repair
|
||||||
.ancestor_repair_request_bytes(&keypair, &repair_peer_id, slot, nonce)
|
.ancestor_repair_request_bytes(&keypair, &repair_peer_id, slot, nonce)
|
||||||
|
@ -1326,7 +1378,11 @@ mod tests {
|
||||||
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
||||||
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
||||||
let cluster_info = Arc::new(new_test_cluster_info(me));
|
let cluster_info = Arc::new(new_test_cluster_info(me));
|
||||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks);
|
let serve_repair = ServeRepair::new(
|
||||||
|
cluster_info.clone(),
|
||||||
|
bank_forks,
|
||||||
|
Arc::new(RwLock::new(HashSet::default())),
|
||||||
|
);
|
||||||
let keypair = cluster_info.keypair().clone();
|
let keypair = cluster_info.keypair().clone();
|
||||||
let repair_peer_id = solana_sdk::pubkey::new_rand();
|
let repair_peer_id = solana_sdk::pubkey::new_rand();
|
||||||
|
|
||||||
|
@ -1653,7 +1709,11 @@ mod tests {
|
||||||
let cluster_slots = ClusterSlots::default();
|
let cluster_slots = ClusterSlots::default();
|
||||||
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
||||||
let cluster_info = Arc::new(new_test_cluster_info(me));
|
let cluster_info = Arc::new(new_test_cluster_info(me));
|
||||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks);
|
let serve_repair = ServeRepair::new(
|
||||||
|
cluster_info.clone(),
|
||||||
|
bank_forks,
|
||||||
|
Arc::new(RwLock::new(HashSet::default())),
|
||||||
|
);
|
||||||
let identity_keypair = cluster_info.keypair().clone();
|
let identity_keypair = cluster_info.keypair().clone();
|
||||||
let mut outstanding_requests = OutstandingShredRepairs::default();
|
let mut outstanding_requests = OutstandingShredRepairs::default();
|
||||||
let rv = serve_repair.repair_request(
|
let rv = serve_repair.repair_request(
|
||||||
|
@ -1984,7 +2044,11 @@ mod tests {
|
||||||
cluster_info.insert_info(contact_info2.clone());
|
cluster_info.insert_info(contact_info2.clone());
|
||||||
cluster_info.insert_info(contact_info3.clone());
|
cluster_info.insert_info(contact_info3.clone());
|
||||||
let identity_keypair = cluster_info.keypair().clone();
|
let identity_keypair = cluster_info.keypair().clone();
|
||||||
let serve_repair = ServeRepair::new(cluster_info, bank_forks);
|
let serve_repair = ServeRepair::new(
|
||||||
|
cluster_info,
|
||||||
|
bank_forks,
|
||||||
|
Arc::new(RwLock::new(HashSet::default())),
|
||||||
|
);
|
||||||
|
|
||||||
// If:
|
// If:
|
||||||
// 1) repair validator set doesn't exist in gossip
|
// 1) repair validator set doesn't exist in gossip
|
||||||
|
|
|
@ -80,7 +80,10 @@ pub struct TvuSockets {
|
||||||
pub struct TvuConfig {
|
pub struct TvuConfig {
|
||||||
pub max_ledger_shreds: Option<u64>,
|
pub max_ledger_shreds: Option<u64>,
|
||||||
pub shred_version: u16,
|
pub shred_version: u16,
|
||||||
|
// Validators from which repairs are requested
|
||||||
pub repair_validators: Option<HashSet<Pubkey>>,
|
pub repair_validators: Option<HashSet<Pubkey>>,
|
||||||
|
// Validators which should be given priority when serving repairs
|
||||||
|
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||||
pub wait_for_vote_to_start_leader: bool,
|
pub wait_for_vote_to_start_leader: bool,
|
||||||
pub replay_slots_concurrently: bool,
|
pub replay_slots_concurrently: bool,
|
||||||
}
|
}
|
||||||
|
@ -189,6 +192,7 @@ impl Tvu {
|
||||||
epoch_schedule,
|
epoch_schedule,
|
||||||
duplicate_slots_reset_sender,
|
duplicate_slots_reset_sender,
|
||||||
repair_validators: tvu_config.repair_validators,
|
repair_validators: tvu_config.repair_validators,
|
||||||
|
repair_whitelist: tvu_config.repair_whitelist,
|
||||||
cluster_info: cluster_info.clone(),
|
cluster_info: cluster_info.clone(),
|
||||||
cluster_slots: cluster_slots.clone(),
|
cluster_slots: cluster_slots.clone(),
|
||||||
};
|
};
|
||||||
|
|
|
@ -140,6 +140,7 @@ pub struct ValidatorConfig {
|
||||||
pub new_hard_forks: Option<Vec<Slot>>,
|
pub new_hard_forks: Option<Vec<Slot>>,
|
||||||
pub known_validators: Option<HashSet<Pubkey>>, // None = trust all
|
pub known_validators: Option<HashSet<Pubkey>>, // None = trust all
|
||||||
pub repair_validators: Option<HashSet<Pubkey>>, // None = repair from all
|
pub repair_validators: Option<HashSet<Pubkey>>, // None = repair from all
|
||||||
|
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>, // Empty = repair with all
|
||||||
pub gossip_validators: Option<HashSet<Pubkey>>, // None = gossip with all
|
pub gossip_validators: Option<HashSet<Pubkey>>, // None = gossip with all
|
||||||
pub halt_on_known_validators_accounts_hash_mismatch: bool,
|
pub halt_on_known_validators_accounts_hash_mismatch: bool,
|
||||||
pub accounts_hash_fault_injection_slots: u64, // 0 = no fault injection
|
pub accounts_hash_fault_injection_slots: u64, // 0 = no fault injection
|
||||||
|
@ -201,6 +202,7 @@ impl Default for ValidatorConfig {
|
||||||
new_hard_forks: None,
|
new_hard_forks: None,
|
||||||
known_validators: None,
|
known_validators: None,
|
||||||
repair_validators: None,
|
repair_validators: None,
|
||||||
|
repair_whitelist: Arc::new(RwLock::new(HashSet::default())),
|
||||||
gossip_validators: None,
|
gossip_validators: None,
|
||||||
halt_on_known_validators_accounts_hash_mismatch: false,
|
halt_on_known_validators_accounts_hash_mismatch: false,
|
||||||
accounts_hash_fault_injection_slots: 0,
|
accounts_hash_fault_injection_slots: 0,
|
||||||
|
@ -870,7 +872,11 @@ impl Validator {
|
||||||
Some(stats_reporter_sender.clone()),
|
Some(stats_reporter_sender.clone()),
|
||||||
&exit,
|
&exit,
|
||||||
);
|
);
|
||||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks.clone());
|
let serve_repair = ServeRepair::new(
|
||||||
|
cluster_info.clone(),
|
||||||
|
bank_forks.clone(),
|
||||||
|
config.repair_whitelist.clone(),
|
||||||
|
);
|
||||||
let serve_repair_service = ServeRepairService::new(
|
let serve_repair_service = ServeRepairService::new(
|
||||||
serve_repair,
|
serve_repair,
|
||||||
blockstore.clone(),
|
blockstore.clone(),
|
||||||
|
@ -964,6 +970,7 @@ impl Validator {
|
||||||
max_ledger_shreds: config.max_ledger_shreds,
|
max_ledger_shreds: config.max_ledger_shreds,
|
||||||
shred_version: node.info.shred_version,
|
shred_version: node.info.shred_version,
|
||||||
repair_validators: config.repair_validators.clone(),
|
repair_validators: config.repair_validators.clone(),
|
||||||
|
repair_whitelist: config.repair_whitelist.clone(),
|
||||||
wait_for_vote_to_start_leader,
|
wait_for_vote_to_start_leader,
|
||||||
replay_slots_concurrently: config.replay_slots_concurrently,
|
replay_slots_concurrently: config.replay_slots_concurrently,
|
||||||
},
|
},
|
||||||
|
|
|
@ -27,6 +27,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
|
||||||
new_hard_forks: config.new_hard_forks.clone(),
|
new_hard_forks: config.new_hard_forks.clone(),
|
||||||
known_validators: config.known_validators.clone(),
|
known_validators: config.known_validators.clone(),
|
||||||
repair_validators: config.repair_validators.clone(),
|
repair_validators: config.repair_validators.clone(),
|
||||||
|
repair_whitelist: config.repair_whitelist.clone(),
|
||||||
gossip_validators: config.gossip_validators.clone(),
|
gossip_validators: config.gossip_validators.clone(),
|
||||||
halt_on_known_validators_accounts_hash_mismatch: config
|
halt_on_known_validators_accounts_hash_mismatch: config
|
||||||
.halt_on_known_validators_accounts_hash_mismatch,
|
.halt_on_known_validators_accounts_hash_mismatch,
|
||||||
|
|
|
@ -974,6 +974,10 @@ impl TestValidator {
|
||||||
pub fn bank_forks(&self) -> Arc<RwLock<BankForks>> {
|
pub fn bank_forks(&self) -> Arc<RwLock<BankForks>> {
|
||||||
self.validator.as_ref().unwrap().bank_forks.clone()
|
self.validator.as_ref().unwrap().bank_forks.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn repair_whitelist(&self) -> Arc<RwLock<HashSet<Pubkey>>> {
|
||||||
|
Arc::new(RwLock::new(HashSet::default()))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Drop for TestValidator {
|
impl Drop for TestValidator {
|
||||||
|
|
|
@ -17,7 +17,7 @@ use {
|
||||||
signature::{read_keypair_file, Keypair, Signer},
|
signature::{read_keypair_file, Keypair, Signer},
|
||||||
},
|
},
|
||||||
std::{
|
std::{
|
||||||
collections::HashMap,
|
collections::{HashMap, HashSet},
|
||||||
error,
|
error,
|
||||||
fmt::{self, Display},
|
fmt::{self, Display},
|
||||||
net::SocketAddr,
|
net::SocketAddr,
|
||||||
|
@ -33,6 +33,7 @@ pub struct AdminRpcRequestMetadataPostInit {
|
||||||
pub cluster_info: Arc<ClusterInfo>,
|
pub cluster_info: Arc<ClusterInfo>,
|
||||||
pub bank_forks: Arc<RwLock<BankForks>>,
|
pub bank_forks: Arc<RwLock<BankForks>>,
|
||||||
pub vote_account: Pubkey,
|
pub vote_account: Pubkey,
|
||||||
|
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -80,6 +81,11 @@ pub struct AdminRpcContactInfo {
|
||||||
pub shred_version: u16,
|
pub shred_version: u16,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
pub struct AdminRpcRepairWhitelist {
|
||||||
|
pub whitelist: Vec<Pubkey>,
|
||||||
|
}
|
||||||
|
|
||||||
impl From<ContactInfo> for AdminRpcContactInfo {
|
impl From<ContactInfo> for AdminRpcContactInfo {
|
||||||
fn from(contact_info: ContactInfo) -> Self {
|
fn from(contact_info: ContactInfo) -> Self {
|
||||||
let ContactInfo {
|
let ContactInfo {
|
||||||
|
@ -133,6 +139,12 @@ impl Display for AdminRpcContactInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Display for AdminRpcRepairWhitelist {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
writeln!(f, "Repair whitelist: {:?}", &self.whitelist)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[rpc]
|
#[rpc]
|
||||||
pub trait AdminRpc {
|
pub trait AdminRpc {
|
||||||
type Metadata;
|
type Metadata;
|
||||||
|
@ -183,6 +195,12 @@ pub trait AdminRpc {
|
||||||
|
|
||||||
#[rpc(meta, name = "contactInfo")]
|
#[rpc(meta, name = "contactInfo")]
|
||||||
fn contact_info(&self, meta: Self::Metadata) -> Result<AdminRpcContactInfo>;
|
fn contact_info(&self, meta: Self::Metadata) -> Result<AdminRpcContactInfo>;
|
||||||
|
|
||||||
|
#[rpc(meta, name = "repairWhitelist")]
|
||||||
|
fn repair_whitelist(&self, meta: Self::Metadata) -> Result<AdminRpcRepairWhitelist>;
|
||||||
|
|
||||||
|
#[rpc(meta, name = "setRepairWhitelist")]
|
||||||
|
fn set_repair_whitelist(&self, meta: Self::Metadata, whitelist: Vec<Pubkey>) -> Result<()>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct AdminRpcImpl;
|
pub struct AdminRpcImpl;
|
||||||
|
@ -321,6 +339,35 @@ impl AdminRpc for AdminRpcImpl {
|
||||||
fn contact_info(&self, meta: Self::Metadata) -> Result<AdminRpcContactInfo> {
|
fn contact_info(&self, meta: Self::Metadata) -> Result<AdminRpcContactInfo> {
|
||||||
meta.with_post_init(|post_init| Ok(post_init.cluster_info.my_contact_info().into()))
|
meta.with_post_init(|post_init| Ok(post_init.cluster_info.my_contact_info().into()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn repair_whitelist(&self, meta: Self::Metadata) -> Result<AdminRpcRepairWhitelist> {
|
||||||
|
debug!("repair_whitelist request received");
|
||||||
|
|
||||||
|
meta.with_post_init(|post_init| {
|
||||||
|
let whitelist: Vec<_> = post_init
|
||||||
|
.repair_whitelist
|
||||||
|
.read()
|
||||||
|
.unwrap()
|
||||||
|
.iter()
|
||||||
|
.copied()
|
||||||
|
.collect();
|
||||||
|
Ok(AdminRpcRepairWhitelist { whitelist })
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_repair_whitelist(&self, meta: Self::Metadata, whitelist: Vec<Pubkey>) -> Result<()> {
|
||||||
|
debug!("set_repair_whitelist request received");
|
||||||
|
|
||||||
|
let whitelist: HashSet<Pubkey> = whitelist.into_iter().collect();
|
||||||
|
meta.with_post_init(|post_init| {
|
||||||
|
*post_init.repair_whitelist.write().unwrap() = whitelist;
|
||||||
|
warn!(
|
||||||
|
"Repair whitelist set to {:?}",
|
||||||
|
&post_init.repair_whitelist.read().unwrap()
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AdminRpcImpl {
|
impl AdminRpcImpl {
|
||||||
|
|
|
@ -498,6 +498,7 @@ fn main() {
|
||||||
bank_forks: test_validator.bank_forks(),
|
bank_forks: test_validator.bank_forks(),
|
||||||
cluster_info: test_validator.cluster_info(),
|
cluster_info: test_validator.cluster_info(),
|
||||||
vote_account: test_validator.vote_account_address(),
|
vote_account: test_validator.vote_account_address(),
|
||||||
|
repair_whitelist: test_validator.repair_whitelist(),
|
||||||
});
|
});
|
||||||
if let Some(dashboard) = dashboard {
|
if let Some(dashboard) = dashboard {
|
||||||
dashboard.run(Duration::from_millis(250));
|
dashboard.run(Duration::from_millis(250));
|
||||||
|
|
|
@ -714,6 +714,18 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
|
||||||
.help("A list of validators to request repairs from. If specified, repair will not \
|
.help("A list of validators to request repairs from. If specified, repair will not \
|
||||||
request from validators outside this set [default: all validators]")
|
request from validators outside this set [default: all validators]")
|
||||||
)
|
)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("repair_whitelist")
|
||||||
|
.hidden(true)
|
||||||
|
.long("repair-whitelist")
|
||||||
|
.validator(is_pubkey)
|
||||||
|
.value_name("VALIDATOR IDENTITY")
|
||||||
|
.multiple(true)
|
||||||
|
.takes_value(true)
|
||||||
|
.help("A list of validators to prioritize repairs from. If specified, repair requests \
|
||||||
|
from validators in the list will be prioritized over requests from other validators. \
|
||||||
|
[default: all validators]")
|
||||||
|
)
|
||||||
.arg(
|
.arg(
|
||||||
Arg::with_name("gossip_validators")
|
Arg::with_name("gossip_validators")
|
||||||
.long("gossip-validator")
|
.long("gossip-validator")
|
||||||
|
@ -1387,6 +1399,46 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
|
||||||
.help("Output display mode")
|
.help("Output display mode")
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("repair-whitelist")
|
||||||
|
.about("Manage the validator's repair protocol whitelist")
|
||||||
|
.setting(AppSettings::SubcommandRequiredElseHelp)
|
||||||
|
.setting(AppSettings::InferSubcommands)
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("get")
|
||||||
|
.about("Display the validator's repair protocol whitelist")
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("output")
|
||||||
|
.long("output")
|
||||||
|
.takes_value(true)
|
||||||
|
.value_name("MODE")
|
||||||
|
.possible_values(&["json", "json-compact"])
|
||||||
|
.help("Output display mode")
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("set")
|
||||||
|
.about("Set the validator's repair protocol whitelist")
|
||||||
|
.setting(AppSettings::ArgRequiredElseHelp)
|
||||||
|
.arg(
|
||||||
|
Arg::with_name("whitelist")
|
||||||
|
.long("whitelist")
|
||||||
|
.validator(is_pubkey)
|
||||||
|
.value_name("VALIDATOR IDENTITY")
|
||||||
|
.multiple(true)
|
||||||
|
.takes_value(true)
|
||||||
|
.help("Set the validator's repair protocol whitelist")
|
||||||
|
)
|
||||||
|
.after_help("Note: repair protocol whitelist changes only apply to the currently \
|
||||||
|
running validator instance")
|
||||||
|
)
|
||||||
|
.subcommand(
|
||||||
|
SubCommand::with_name("remove-all")
|
||||||
|
.about("Clear the validator's repair protocol whitelist")
|
||||||
|
.after_help("Note: repair protocol whitelist changes only apply to the currently \
|
||||||
|
running validator instance")
|
||||||
|
)
|
||||||
|
)
|
||||||
.subcommand(
|
.subcommand(
|
||||||
SubCommand::with_name("init")
|
SubCommand::with_name("init")
|
||||||
.about("Initialize the ledger directory then exit")
|
.about("Initialize the ledger directory then exit")
|
||||||
|
|
|
@ -344,6 +344,22 @@ fn wait_for_restart_window(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn set_repair_whitelist(
|
||||||
|
ledger_path: &Path,
|
||||||
|
whitelist: Vec<Pubkey>,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let admin_client = admin_rpc_service::connect(ledger_path);
|
||||||
|
admin_rpc_service::runtime()
|
||||||
|
.block_on(async move { admin_client.await?.set_repair_whitelist(whitelist).await })
|
||||||
|
.map_err(|err| {
|
||||||
|
std::io::Error::new(
|
||||||
|
std::io::ErrorKind::Other,
|
||||||
|
format!("setRepairWhitelist request failed: {}", err),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the default fifo shred storage size (include both data and coding
|
/// Returns the default fifo shred storage size (include both data and coding
|
||||||
/// shreds) based on the validator config.
|
/// shreds) based on the validator config.
|
||||||
fn default_fifo_shred_storage_size(vc: &ValidatorConfig) -> Option<u64> {
|
fn default_fifo_shred_storage_size(vc: &ValidatorConfig) -> Option<u64> {
|
||||||
|
@ -667,6 +683,59 @@ pub fn main() {
|
||||||
});
|
});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
("repair-whitelist", Some(repair_whitelist_subcommand_matches)) => {
|
||||||
|
match repair_whitelist_subcommand_matches.subcommand() {
|
||||||
|
("get", Some(subcommand_matches)) => {
|
||||||
|
let output_mode = subcommand_matches.value_of("output");
|
||||||
|
let admin_client = admin_rpc_service::connect(&ledger_path);
|
||||||
|
let repair_whitelist = admin_rpc_service::runtime()
|
||||||
|
.block_on(async move { admin_client.await?.repair_whitelist().await })
|
||||||
|
.unwrap_or_else(|err| {
|
||||||
|
eprintln!("Repair whitelist query failed: {}", err);
|
||||||
|
exit(1);
|
||||||
|
});
|
||||||
|
if let Some(mode) = output_mode {
|
||||||
|
match mode {
|
||||||
|
"json" => println!(
|
||||||
|
"{}",
|
||||||
|
serde_json::to_string_pretty(&repair_whitelist).unwrap()
|
||||||
|
),
|
||||||
|
"json-compact" => {
|
||||||
|
print!("{}", serde_json::to_string(&repair_whitelist).unwrap())
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
print!("{}", repair_whitelist);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
("set", Some(subcommand_matches)) => {
|
||||||
|
let whitelist = if subcommand_matches.is_present("whitelist") {
|
||||||
|
let validators_set: HashSet<_> =
|
||||||
|
values_t_or_exit!(subcommand_matches, "whitelist", Pubkey)
|
||||||
|
.into_iter()
|
||||||
|
.collect();
|
||||||
|
validators_set.into_iter().collect::<Vec<_>>()
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
set_repair_whitelist(&ledger_path, whitelist).unwrap_or_else(|err| {
|
||||||
|
eprintln!("{err}");
|
||||||
|
exit(1);
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
("remove-all", _) => {
|
||||||
|
set_repair_whitelist(&ledger_path, Vec::default()).unwrap_or_else(|err| {
|
||||||
|
eprintln!("{err}");
|
||||||
|
exit(1);
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -786,6 +855,13 @@ pub fn main() {
|
||||||
"repair_validators",
|
"repair_validators",
|
||||||
"--repair-validator",
|
"--repair-validator",
|
||||||
);
|
);
|
||||||
|
let repair_whitelist = validators_set(
|
||||||
|
&identity_keypair.pubkey(),
|
||||||
|
&matches,
|
||||||
|
"repair_whitelist",
|
||||||
|
"--repair-whitelist",
|
||||||
|
);
|
||||||
|
let repair_whitelist = Arc::new(RwLock::new(repair_whitelist.unwrap_or_default()));
|
||||||
let gossip_validators = validators_set(
|
let gossip_validators = validators_set(
|
||||||
&identity_keypair.pubkey(),
|
&identity_keypair.pubkey(),
|
||||||
&matches,
|
&matches,
|
||||||
|
@ -1094,6 +1170,7 @@ pub fn main() {
|
||||||
wait_for_supermajority: value_t!(matches, "wait_for_supermajority", Slot).ok(),
|
wait_for_supermajority: value_t!(matches, "wait_for_supermajority", Slot).ok(),
|
||||||
known_validators,
|
known_validators,
|
||||||
repair_validators,
|
repair_validators,
|
||||||
|
repair_whitelist: repair_whitelist.clone(),
|
||||||
gossip_validators,
|
gossip_validators,
|
||||||
wal_recovery_mode,
|
wal_recovery_mode,
|
||||||
poh_verify: !matches.is_present("skip_poh_verify"),
|
poh_verify: !matches.is_present("skip_poh_verify"),
|
||||||
|
@ -1572,6 +1649,7 @@ pub fn main() {
|
||||||
bank_forks: validator.bank_forks.clone(),
|
bank_forks: validator.bank_forks.clone(),
|
||||||
cluster_info: validator.cluster_info.clone(),
|
cluster_info: validator.cluster_info.clone(),
|
||||||
vote_account,
|
vote_account,
|
||||||
|
repair_whitelist,
|
||||||
});
|
});
|
||||||
|
|
||||||
if let Some(filename) = init_complete_file {
|
if let Some(filename) = init_complete_file {
|
||||||
|
|
Loading…
Reference in New Issue