add support for a repair protocol whitelist (#29161)
This commit is contained in:
parent
acb7eb226b
commit
a44ea779bd
|
@ -523,6 +523,7 @@ impl AncestorHashesService {
|
|||
let serve_repair = ServeRepair::new(
|
||||
repair_info.cluster_info.clone(),
|
||||
repair_info.bank_forks.clone(),
|
||||
repair_info.repair_whitelist.clone(),
|
||||
);
|
||||
let mut repair_stats = AncestorRepairRequestsStats::default();
|
||||
|
||||
|
@ -969,8 +970,11 @@ mod test {
|
|||
Arc::new(keypair),
|
||||
SocketAddrSpace::Unspecified,
|
||||
);
|
||||
let responder_serve_repair =
|
||||
ServeRepair::new(Arc::new(cluster_info), vote_simulator.bank_forks);
|
||||
let responder_serve_repair = ServeRepair::new(
|
||||
Arc::new(cluster_info),
|
||||
vote_simulator.bank_forks,
|
||||
Arc::<RwLock<HashSet<_>>>::default(), // repair whitelist
|
||||
);
|
||||
|
||||
// Set up thread to give us responses
|
||||
let ledger_path = get_tmp_ledger_path!();
|
||||
|
@ -1054,8 +1058,12 @@ mod test {
|
|||
Arc::new(keypair),
|
||||
SocketAddrSpace::Unspecified,
|
||||
));
|
||||
let requester_serve_repair =
|
||||
ServeRepair::new(requester_cluster_info.clone(), bank_forks.clone());
|
||||
let repair_whitelist = Arc::new(RwLock::new(HashSet::default()));
|
||||
let requester_serve_repair = ServeRepair::new(
|
||||
requester_cluster_info.clone(),
|
||||
bank_forks.clone(),
|
||||
repair_whitelist.clone(),
|
||||
);
|
||||
let (duplicate_slots_reset_sender, _duplicate_slots_reset_receiver) = unbounded();
|
||||
let repair_info = RepairInfo {
|
||||
bank_forks,
|
||||
|
@ -1064,6 +1072,7 @@ mod test {
|
|||
epoch_schedule,
|
||||
duplicate_slots_reset_sender,
|
||||
repair_validators: None,
|
||||
repair_whitelist,
|
||||
};
|
||||
|
||||
let (ancestor_hashes_replay_update_sender, ancestor_hashes_replay_update_receiver) =
|
||||
|
|
|
@ -173,7 +173,10 @@ pub struct RepairInfo {
|
|||
pub cluster_slots: Arc<ClusterSlots>,
|
||||
pub epoch_schedule: EpochSchedule,
|
||||
pub duplicate_slots_reset_sender: DuplicateSlotsResetSender,
|
||||
// Validators from which repairs are requested
|
||||
pub repair_validators: Option<HashSet<Pubkey>>,
|
||||
// Validators which should be given priority when serving
|
||||
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||
}
|
||||
|
||||
pub struct RepairSlotRange {
|
||||
|
@ -251,6 +254,7 @@ impl RepairService {
|
|||
let serve_repair = ServeRepair::new(
|
||||
repair_info.cluster_info.clone(),
|
||||
repair_info.bank_forks.clone(),
|
||||
repair_info.repair_whitelist.clone(),
|
||||
);
|
||||
let id = repair_info.cluster_info.id();
|
||||
let mut repair_stats = RepairStats::default();
|
||||
|
@ -1084,7 +1088,11 @@ mod test {
|
|||
let cluster_slots = ClusterSlots::default();
|
||||
let cluster_info = Arc::new(new_test_cluster_info(Node::new_localhost().info));
|
||||
let identity_keypair = cluster_info.keypair().clone();
|
||||
let serve_repair = ServeRepair::new(cluster_info, bank_forks);
|
||||
let serve_repair = ServeRepair::new(
|
||||
cluster_info,
|
||||
bank_forks,
|
||||
Arc::new(RwLock::new(HashSet::default())),
|
||||
);
|
||||
let mut duplicate_slot_repair_statuses = HashMap::new();
|
||||
let dead_slot = 9;
|
||||
let receive_socket = &UdpSocket::bind("0.0.0.0:0").unwrap();
|
||||
|
@ -1179,7 +1187,11 @@ mod test {
|
|||
UdpSocket::bind("0.0.0.0:0").unwrap().local_addr().unwrap(),
|
||||
));
|
||||
let cluster_info = Arc::new(new_test_cluster_info(Node::new_localhost().info));
|
||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks);
|
||||
let serve_repair = ServeRepair::new(
|
||||
cluster_info.clone(),
|
||||
bank_forks,
|
||||
Arc::new(RwLock::new(HashSet::default())),
|
||||
);
|
||||
let valid_repair_peer = Node::new_localhost().info;
|
||||
|
||||
// Signal that this peer has confirmed the dead slot, and is thus
|
||||
|
|
|
@ -159,6 +159,7 @@ struct ServeRepairStats {
|
|||
dropped_requests_outbound_bandwidth: usize,
|
||||
dropped_requests_load_shed: usize,
|
||||
dropped_requests_low_stake: usize,
|
||||
whitelisted_requests: usize,
|
||||
total_dropped_response_packets: usize,
|
||||
total_response_packets: usize,
|
||||
total_response_bytes_staked: usize,
|
||||
|
@ -281,6 +282,7 @@ impl RepairProtocol {
|
|||
pub struct ServeRepair {
|
||||
cluster_info: Arc<ClusterInfo>,
|
||||
bank_forks: Arc<RwLock<BankForks>>,
|
||||
repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||
}
|
||||
|
||||
// Cache entry for repair peers for a slot.
|
||||
|
@ -316,11 +318,23 @@ impl RepairPeers {
|
|||
}
|
||||
}
|
||||
|
||||
struct RepairRequestWithMeta {
|
||||
request: RepairProtocol,
|
||||
from_addr: SocketAddr,
|
||||
stake: u64,
|
||||
whitelisted: bool,
|
||||
}
|
||||
|
||||
impl ServeRepair {
|
||||
pub fn new(cluster_info: Arc<ClusterInfo>, bank_forks: Arc<RwLock<BankForks>>) -> Self {
|
||||
pub fn new(
|
||||
cluster_info: Arc<ClusterInfo>,
|
||||
bank_forks: Arc<RwLock<BankForks>>,
|
||||
repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||
) -> Self {
|
||||
Self {
|
||||
cluster_info,
|
||||
bank_forks,
|
||||
repair_whitelist,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -456,7 +470,11 @@ impl ServeRepair {
|
|||
let my_id = identity_keypair.pubkey();
|
||||
|
||||
let max_buffered_packets = if root_bank.cluster_type() != ClusterType::MainnetBeta {
|
||||
2 * MAX_REQUESTS_PER_ITERATION
|
||||
if self.repair_whitelist.read().unwrap().len() > 0 {
|
||||
4 * MAX_REQUESTS_PER_ITERATION
|
||||
} else {
|
||||
2 * MAX_REQUESTS_PER_ITERATION
|
||||
}
|
||||
} else {
|
||||
MAX_REQUESTS_PER_ITERATION
|
||||
};
|
||||
|
@ -475,58 +493,74 @@ impl ServeRepair {
|
|||
stats.total_requests += total_requests;
|
||||
|
||||
let decode_start = Instant::now();
|
||||
let mut decoded_reqs = Vec::default();
|
||||
for packet in reqs_v.iter().flatten() {
|
||||
let request: RepairProtocol = match packet.deserialize_slice(..) {
|
||||
Ok(request) => request,
|
||||
Err(_) => {
|
||||
let mut decoded_requests = Vec::default();
|
||||
let mut whitelisted_request_count: usize = 0;
|
||||
{
|
||||
let whitelist = self.repair_whitelist.read().unwrap();
|
||||
for packet in reqs_v.iter().flatten() {
|
||||
let request: RepairProtocol = match packet.deserialize_slice(..) {
|
||||
Ok(request) => request,
|
||||
Err(_) => {
|
||||
stats.err_malformed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let from_addr = packet.meta().socket_addr();
|
||||
if !ContactInfo::is_valid_address(&from_addr, &socket_addr_space) {
|
||||
stats.err_malformed += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
let from_addr = packet.meta().socket_addr();
|
||||
if !ContactInfo::is_valid_address(&from_addr, &socket_addr_space) {
|
||||
stats.err_malformed += 1;
|
||||
continue;
|
||||
}
|
||||
if request.supports_signature() {
|
||||
// collect stats for signature verification
|
||||
Self::verify_signed_packet(&my_id, packet, &request, stats);
|
||||
} else {
|
||||
stats.unsigned_requests += 1;
|
||||
}
|
||||
|
||||
if request.supports_signature() {
|
||||
// collect stats for signature verification
|
||||
Self::verify_signed_packet(&my_id, packet, &request, stats);
|
||||
} else {
|
||||
stats.unsigned_requests += 1;
|
||||
}
|
||||
if request.sender() == &my_id {
|
||||
stats.self_repair += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if request.sender() == &my_id {
|
||||
stats.self_repair += 1;
|
||||
continue;
|
||||
}
|
||||
let stake = epoch_staked_nodes
|
||||
.as_ref()
|
||||
.and_then(|stakes| stakes.get(request.sender()))
|
||||
.unwrap_or(&0);
|
||||
if *stake == 0 {
|
||||
stats.handle_requests_unstaked += 1;
|
||||
} else {
|
||||
stats.handle_requests_staked += 1;
|
||||
}
|
||||
|
||||
let stake = epoch_staked_nodes
|
||||
.as_ref()
|
||||
.and_then(|stakes| stakes.get(request.sender()))
|
||||
.unwrap_or(&0);
|
||||
if *stake == 0 {
|
||||
stats.handle_requests_unstaked += 1;
|
||||
} else {
|
||||
stats.handle_requests_staked += 1;
|
||||
let whitelisted = whitelist.contains(request.sender());
|
||||
if whitelisted {
|
||||
whitelisted_request_count += 1;
|
||||
}
|
||||
|
||||
decoded_requests.push(RepairRequestWithMeta {
|
||||
request,
|
||||
from_addr,
|
||||
stake: *stake,
|
||||
whitelisted,
|
||||
});
|
||||
}
|
||||
decoded_reqs.push((request, from_addr, *stake));
|
||||
}
|
||||
stats.decode_time_us += decode_start.elapsed().as_micros() as u64;
|
||||
stats.whitelisted_requests += whitelisted_request_count.min(MAX_REQUESTS_PER_ITERATION);
|
||||
|
||||
if decoded_reqs.len() > MAX_REQUESTS_PER_ITERATION {
|
||||
stats.dropped_requests_low_stake += decoded_reqs.len() - MAX_REQUESTS_PER_ITERATION;
|
||||
decoded_reqs.sort_unstable_by_key(|(_, _, stake)| Reverse(*stake));
|
||||
decoded_reqs.truncate(MAX_REQUESTS_PER_ITERATION);
|
||||
if decoded_requests.len() > MAX_REQUESTS_PER_ITERATION {
|
||||
stats.dropped_requests_low_stake += decoded_requests.len() - MAX_REQUESTS_PER_ITERATION;
|
||||
decoded_requests.sort_unstable_by_key(|r| Reverse((r.whitelisted, r.stake)));
|
||||
decoded_requests.truncate(MAX_REQUESTS_PER_ITERATION);
|
||||
}
|
||||
|
||||
self.handle_packets(
|
||||
ping_cache,
|
||||
recycler,
|
||||
blockstore,
|
||||
decoded_reqs,
|
||||
decoded_requests,
|
||||
response_sender,
|
||||
stats,
|
||||
data_budget,
|
||||
|
@ -564,6 +598,7 @@ impl ServeRepair {
|
|||
stats.dropped_requests_low_stake,
|
||||
i64
|
||||
),
|
||||
("whitelisted_requests", stats.whitelisted_requests, i64),
|
||||
(
|
||||
"total_dropped_response_packets",
|
||||
stats.total_dropped_response_packets,
|
||||
|
@ -778,7 +813,7 @@ impl ServeRepair {
|
|||
ping_cache: &mut PingCache,
|
||||
recycler: &PacketBatchRecycler,
|
||||
blockstore: &Blockstore,
|
||||
requests: Vec<(RepairProtocol, SocketAddr, /*stake*/ u64)>,
|
||||
requests: Vec<RepairRequestWithMeta>,
|
||||
response_sender: &PacketBatchSender,
|
||||
stats: &mut ServeRepairStats,
|
||||
data_budget: &DataBudget,
|
||||
|
@ -787,7 +822,16 @@ impl ServeRepair {
|
|||
let mut pending_pings = Vec::default();
|
||||
|
||||
let requests_len = requests.len();
|
||||
for (i, (request, from_addr, stake)) in requests.into_iter().enumerate() {
|
||||
for (
|
||||
i,
|
||||
RepairRequestWithMeta {
|
||||
request,
|
||||
from_addr,
|
||||
stake,
|
||||
..
|
||||
},
|
||||
) in requests.into_iter().enumerate()
|
||||
{
|
||||
if !matches!(&request, RepairProtocol::Pong(_)) {
|
||||
let (check, ping_pkt) =
|
||||
Self::check_ping_cache(ping_cache, &request, &from_addr, &identity_keypair);
|
||||
|
@ -1246,7 +1290,11 @@ mod tests {
|
|||
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
||||
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
||||
let cluster_info = Arc::new(new_test_cluster_info(me));
|
||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks);
|
||||
let serve_repair = ServeRepair::new(
|
||||
cluster_info.clone(),
|
||||
bank_forks,
|
||||
Arc::new(RwLock::new(HashSet::default())),
|
||||
);
|
||||
let keypair = cluster_info.keypair().clone();
|
||||
let repair_peer_id = solana_sdk::pubkey::new_rand();
|
||||
let repair_request = ShredRepairType::Orphan(123);
|
||||
|
@ -1292,7 +1340,11 @@ mod tests {
|
|||
let mut bank = Bank::new_for_tests(&genesis_config);
|
||||
bank.feature_set = Arc::new(FeatureSet::all_enabled());
|
||||
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
||||
let serve_repair = ServeRepair::new(cluster_info, bank_forks);
|
||||
let serve_repair = ServeRepair::new(
|
||||
cluster_info,
|
||||
bank_forks,
|
||||
Arc::new(RwLock::new(HashSet::default())),
|
||||
);
|
||||
|
||||
let request_bytes = serve_repair
|
||||
.ancestor_repair_request_bytes(&keypair, &repair_peer_id, slot, nonce)
|
||||
|
@ -1326,7 +1378,11 @@ mod tests {
|
|||
let bank_forks = Arc::new(RwLock::new(BankForks::new(bank)));
|
||||
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
||||
let cluster_info = Arc::new(new_test_cluster_info(me));
|
||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks);
|
||||
let serve_repair = ServeRepair::new(
|
||||
cluster_info.clone(),
|
||||
bank_forks,
|
||||
Arc::new(RwLock::new(HashSet::default())),
|
||||
);
|
||||
let keypair = cluster_info.keypair().clone();
|
||||
let repair_peer_id = solana_sdk::pubkey::new_rand();
|
||||
|
||||
|
@ -1653,7 +1709,11 @@ mod tests {
|
|||
let cluster_slots = ClusterSlots::default();
|
||||
let me = ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
||||
let cluster_info = Arc::new(new_test_cluster_info(me));
|
||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks);
|
||||
let serve_repair = ServeRepair::new(
|
||||
cluster_info.clone(),
|
||||
bank_forks,
|
||||
Arc::new(RwLock::new(HashSet::default())),
|
||||
);
|
||||
let identity_keypair = cluster_info.keypair().clone();
|
||||
let mut outstanding_requests = OutstandingShredRepairs::default();
|
||||
let rv = serve_repair.repair_request(
|
||||
|
@ -1984,7 +2044,11 @@ mod tests {
|
|||
cluster_info.insert_info(contact_info2.clone());
|
||||
cluster_info.insert_info(contact_info3.clone());
|
||||
let identity_keypair = cluster_info.keypair().clone();
|
||||
let serve_repair = ServeRepair::new(cluster_info, bank_forks);
|
||||
let serve_repair = ServeRepair::new(
|
||||
cluster_info,
|
||||
bank_forks,
|
||||
Arc::new(RwLock::new(HashSet::default())),
|
||||
);
|
||||
|
||||
// If:
|
||||
// 1) repair validator set doesn't exist in gossip
|
||||
|
|
|
@ -80,7 +80,10 @@ pub struct TvuSockets {
|
|||
pub struct TvuConfig {
|
||||
pub max_ledger_shreds: Option<u64>,
|
||||
pub shred_version: u16,
|
||||
// Validators from which repairs are requested
|
||||
pub repair_validators: Option<HashSet<Pubkey>>,
|
||||
// Validators which should be given priority when serving repairs
|
||||
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||
pub wait_for_vote_to_start_leader: bool,
|
||||
pub replay_slots_concurrently: bool,
|
||||
}
|
||||
|
@ -189,6 +192,7 @@ impl Tvu {
|
|||
epoch_schedule,
|
||||
duplicate_slots_reset_sender,
|
||||
repair_validators: tvu_config.repair_validators,
|
||||
repair_whitelist: tvu_config.repair_whitelist,
|
||||
cluster_info: cluster_info.clone(),
|
||||
cluster_slots: cluster_slots.clone(),
|
||||
};
|
||||
|
|
|
@ -140,6 +140,7 @@ pub struct ValidatorConfig {
|
|||
pub new_hard_forks: Option<Vec<Slot>>,
|
||||
pub known_validators: Option<HashSet<Pubkey>>, // None = trust all
|
||||
pub repair_validators: Option<HashSet<Pubkey>>, // None = repair from all
|
||||
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>, // Empty = repair with all
|
||||
pub gossip_validators: Option<HashSet<Pubkey>>, // None = gossip with all
|
||||
pub halt_on_known_validators_accounts_hash_mismatch: bool,
|
||||
pub accounts_hash_fault_injection_slots: u64, // 0 = no fault injection
|
||||
|
@ -201,6 +202,7 @@ impl Default for ValidatorConfig {
|
|||
new_hard_forks: None,
|
||||
known_validators: None,
|
||||
repair_validators: None,
|
||||
repair_whitelist: Arc::new(RwLock::new(HashSet::default())),
|
||||
gossip_validators: None,
|
||||
halt_on_known_validators_accounts_hash_mismatch: false,
|
||||
accounts_hash_fault_injection_slots: 0,
|
||||
|
@ -870,7 +872,11 @@ impl Validator {
|
|||
Some(stats_reporter_sender.clone()),
|
||||
&exit,
|
||||
);
|
||||
let serve_repair = ServeRepair::new(cluster_info.clone(), bank_forks.clone());
|
||||
let serve_repair = ServeRepair::new(
|
||||
cluster_info.clone(),
|
||||
bank_forks.clone(),
|
||||
config.repair_whitelist.clone(),
|
||||
);
|
||||
let serve_repair_service = ServeRepairService::new(
|
||||
serve_repair,
|
||||
blockstore.clone(),
|
||||
|
@ -964,6 +970,7 @@ impl Validator {
|
|||
max_ledger_shreds: config.max_ledger_shreds,
|
||||
shred_version: node.info.shred_version,
|
||||
repair_validators: config.repair_validators.clone(),
|
||||
repair_whitelist: config.repair_whitelist.clone(),
|
||||
wait_for_vote_to_start_leader,
|
||||
replay_slots_concurrently: config.replay_slots_concurrently,
|
||||
},
|
||||
|
|
|
@ -27,6 +27,7 @@ pub fn safe_clone_config(config: &ValidatorConfig) -> ValidatorConfig {
|
|||
new_hard_forks: config.new_hard_forks.clone(),
|
||||
known_validators: config.known_validators.clone(),
|
||||
repair_validators: config.repair_validators.clone(),
|
||||
repair_whitelist: config.repair_whitelist.clone(),
|
||||
gossip_validators: config.gossip_validators.clone(),
|
||||
halt_on_known_validators_accounts_hash_mismatch: config
|
||||
.halt_on_known_validators_accounts_hash_mismatch,
|
||||
|
|
|
@ -974,6 +974,10 @@ impl TestValidator {
|
|||
pub fn bank_forks(&self) -> Arc<RwLock<BankForks>> {
|
||||
self.validator.as_ref().unwrap().bank_forks.clone()
|
||||
}
|
||||
|
||||
pub fn repair_whitelist(&self) -> Arc<RwLock<HashSet<Pubkey>>> {
|
||||
Arc::new(RwLock::new(HashSet::default()))
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for TestValidator {
|
||||
|
|
|
@ -17,7 +17,7 @@ use {
|
|||
signature::{read_keypair_file, Keypair, Signer},
|
||||
},
|
||||
std::{
|
||||
collections::HashMap,
|
||||
collections::{HashMap, HashSet},
|
||||
error,
|
||||
fmt::{self, Display},
|
||||
net::SocketAddr,
|
||||
|
@ -33,6 +33,7 @@ pub struct AdminRpcRequestMetadataPostInit {
|
|||
pub cluster_info: Arc<ClusterInfo>,
|
||||
pub bank_forks: Arc<RwLock<BankForks>>,
|
||||
pub vote_account: Pubkey,
|
||||
pub repair_whitelist: Arc<RwLock<HashSet<Pubkey>>>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -80,6 +81,11 @@ pub struct AdminRpcContactInfo {
|
|||
pub shred_version: u16,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct AdminRpcRepairWhitelist {
|
||||
pub whitelist: Vec<Pubkey>,
|
||||
}
|
||||
|
||||
impl From<ContactInfo> for AdminRpcContactInfo {
|
||||
fn from(contact_info: ContactInfo) -> Self {
|
||||
let ContactInfo {
|
||||
|
@ -133,6 +139,12 @@ impl Display for AdminRpcContactInfo {
|
|||
}
|
||||
}
|
||||
|
||||
impl Display for AdminRpcRepairWhitelist {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
writeln!(f, "Repair whitelist: {:?}", &self.whitelist)
|
||||
}
|
||||
}
|
||||
|
||||
#[rpc]
|
||||
pub trait AdminRpc {
|
||||
type Metadata;
|
||||
|
@ -183,6 +195,12 @@ pub trait AdminRpc {
|
|||
|
||||
#[rpc(meta, name = "contactInfo")]
|
||||
fn contact_info(&self, meta: Self::Metadata) -> Result<AdminRpcContactInfo>;
|
||||
|
||||
#[rpc(meta, name = "repairWhitelist")]
|
||||
fn repair_whitelist(&self, meta: Self::Metadata) -> Result<AdminRpcRepairWhitelist>;
|
||||
|
||||
#[rpc(meta, name = "setRepairWhitelist")]
|
||||
fn set_repair_whitelist(&self, meta: Self::Metadata, whitelist: Vec<Pubkey>) -> Result<()>;
|
||||
}
|
||||
|
||||
pub struct AdminRpcImpl;
|
||||
|
@ -321,6 +339,35 @@ impl AdminRpc for AdminRpcImpl {
|
|||
fn contact_info(&self, meta: Self::Metadata) -> Result<AdminRpcContactInfo> {
|
||||
meta.with_post_init(|post_init| Ok(post_init.cluster_info.my_contact_info().into()))
|
||||
}
|
||||
|
||||
fn repair_whitelist(&self, meta: Self::Metadata) -> Result<AdminRpcRepairWhitelist> {
|
||||
debug!("repair_whitelist request received");
|
||||
|
||||
meta.with_post_init(|post_init| {
|
||||
let whitelist: Vec<_> = post_init
|
||||
.repair_whitelist
|
||||
.read()
|
||||
.unwrap()
|
||||
.iter()
|
||||
.copied()
|
||||
.collect();
|
||||
Ok(AdminRpcRepairWhitelist { whitelist })
|
||||
})
|
||||
}
|
||||
|
||||
fn set_repair_whitelist(&self, meta: Self::Metadata, whitelist: Vec<Pubkey>) -> Result<()> {
|
||||
debug!("set_repair_whitelist request received");
|
||||
|
||||
let whitelist: HashSet<Pubkey> = whitelist.into_iter().collect();
|
||||
meta.with_post_init(|post_init| {
|
||||
*post_init.repair_whitelist.write().unwrap() = whitelist;
|
||||
warn!(
|
||||
"Repair whitelist set to {:?}",
|
||||
&post_init.repair_whitelist.read().unwrap()
|
||||
);
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl AdminRpcImpl {
|
||||
|
|
|
@ -498,6 +498,7 @@ fn main() {
|
|||
bank_forks: test_validator.bank_forks(),
|
||||
cluster_info: test_validator.cluster_info(),
|
||||
vote_account: test_validator.vote_account_address(),
|
||||
repair_whitelist: test_validator.repair_whitelist(),
|
||||
});
|
||||
if let Some(dashboard) = dashboard {
|
||||
dashboard.run(Duration::from_millis(250));
|
||||
|
|
|
@ -714,6 +714,18 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
|
|||
.help("A list of validators to request repairs from. If specified, repair will not \
|
||||
request from validators outside this set [default: all validators]")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("repair_whitelist")
|
||||
.hidden(true)
|
||||
.long("repair-whitelist")
|
||||
.validator(is_pubkey)
|
||||
.value_name("VALIDATOR IDENTITY")
|
||||
.multiple(true)
|
||||
.takes_value(true)
|
||||
.help("A list of validators to prioritize repairs from. If specified, repair requests \
|
||||
from validators in the list will be prioritized over requests from other validators. \
|
||||
[default: all validators]")
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("gossip_validators")
|
||||
.long("gossip-validator")
|
||||
|
@ -1387,6 +1399,46 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> {
|
|||
.help("Output display mode")
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("repair-whitelist")
|
||||
.about("Manage the validator's repair protocol whitelist")
|
||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
||||
.setting(AppSettings::InferSubcommands)
|
||||
.subcommand(
|
||||
SubCommand::with_name("get")
|
||||
.about("Display the validator's repair protocol whitelist")
|
||||
.arg(
|
||||
Arg::with_name("output")
|
||||
.long("output")
|
||||
.takes_value(true)
|
||||
.value_name("MODE")
|
||||
.possible_values(&["json", "json-compact"])
|
||||
.help("Output display mode")
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("set")
|
||||
.about("Set the validator's repair protocol whitelist")
|
||||
.setting(AppSettings::ArgRequiredElseHelp)
|
||||
.arg(
|
||||
Arg::with_name("whitelist")
|
||||
.long("whitelist")
|
||||
.validator(is_pubkey)
|
||||
.value_name("VALIDATOR IDENTITY")
|
||||
.multiple(true)
|
||||
.takes_value(true)
|
||||
.help("Set the validator's repair protocol whitelist")
|
||||
)
|
||||
.after_help("Note: repair protocol whitelist changes only apply to the currently \
|
||||
running validator instance")
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("remove-all")
|
||||
.about("Clear the validator's repair protocol whitelist")
|
||||
.after_help("Note: repair protocol whitelist changes only apply to the currently \
|
||||
running validator instance")
|
||||
)
|
||||
)
|
||||
.subcommand(
|
||||
SubCommand::with_name("init")
|
||||
.about("Initialize the ledger directory then exit")
|
||||
|
|
|
@ -344,6 +344,22 @@ fn wait_for_restart_window(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn set_repair_whitelist(
|
||||
ledger_path: &Path,
|
||||
whitelist: Vec<Pubkey>,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let admin_client = admin_rpc_service::connect(ledger_path);
|
||||
admin_rpc_service::runtime()
|
||||
.block_on(async move { admin_client.await?.set_repair_whitelist(whitelist).await })
|
||||
.map_err(|err| {
|
||||
std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format!("setRepairWhitelist request failed: {}", err),
|
||||
)
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns the default fifo shred storage size (include both data and coding
|
||||
/// shreds) based on the validator config.
|
||||
fn default_fifo_shred_storage_size(vc: &ValidatorConfig) -> Option<u64> {
|
||||
|
@ -667,6 +683,59 @@ pub fn main() {
|
|||
});
|
||||
return;
|
||||
}
|
||||
("repair-whitelist", Some(repair_whitelist_subcommand_matches)) => {
|
||||
match repair_whitelist_subcommand_matches.subcommand() {
|
||||
("get", Some(subcommand_matches)) => {
|
||||
let output_mode = subcommand_matches.value_of("output");
|
||||
let admin_client = admin_rpc_service::connect(&ledger_path);
|
||||
let repair_whitelist = admin_rpc_service::runtime()
|
||||
.block_on(async move { admin_client.await?.repair_whitelist().await })
|
||||
.unwrap_or_else(|err| {
|
||||
eprintln!("Repair whitelist query failed: {}", err);
|
||||
exit(1);
|
||||
});
|
||||
if let Some(mode) = output_mode {
|
||||
match mode {
|
||||
"json" => println!(
|
||||
"{}",
|
||||
serde_json::to_string_pretty(&repair_whitelist).unwrap()
|
||||
),
|
||||
"json-compact" => {
|
||||
print!("{}", serde_json::to_string(&repair_whitelist).unwrap())
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
} else {
|
||||
print!("{}", repair_whitelist);
|
||||
}
|
||||
return;
|
||||
}
|
||||
("set", Some(subcommand_matches)) => {
|
||||
let whitelist = if subcommand_matches.is_present("whitelist") {
|
||||
let validators_set: HashSet<_> =
|
||||
values_t_or_exit!(subcommand_matches, "whitelist", Pubkey)
|
||||
.into_iter()
|
||||
.collect();
|
||||
validators_set.into_iter().collect::<Vec<_>>()
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
set_repair_whitelist(&ledger_path, whitelist).unwrap_or_else(|err| {
|
||||
eprintln!("{err}");
|
||||
exit(1);
|
||||
});
|
||||
return;
|
||||
}
|
||||
("remove-all", _) => {
|
||||
set_repair_whitelist(&ledger_path, Vec::default()).unwrap_or_else(|err| {
|
||||
eprintln!("{err}");
|
||||
exit(1);
|
||||
});
|
||||
return;
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
|
@ -786,6 +855,13 @@ pub fn main() {
|
|||
"repair_validators",
|
||||
"--repair-validator",
|
||||
);
|
||||
let repair_whitelist = validators_set(
|
||||
&identity_keypair.pubkey(),
|
||||
&matches,
|
||||
"repair_whitelist",
|
||||
"--repair-whitelist",
|
||||
);
|
||||
let repair_whitelist = Arc::new(RwLock::new(repair_whitelist.unwrap_or_default()));
|
||||
let gossip_validators = validators_set(
|
||||
&identity_keypair.pubkey(),
|
||||
&matches,
|
||||
|
@ -1094,6 +1170,7 @@ pub fn main() {
|
|||
wait_for_supermajority: value_t!(matches, "wait_for_supermajority", Slot).ok(),
|
||||
known_validators,
|
||||
repair_validators,
|
||||
repair_whitelist: repair_whitelist.clone(),
|
||||
gossip_validators,
|
||||
wal_recovery_mode,
|
||||
poh_verify: !matches.is_present("skip_poh_verify"),
|
||||
|
@ -1572,6 +1649,7 @@ pub fn main() {
|
|||
bank_forks: validator.bank_forks.clone(),
|
||||
cluster_info: validator.cluster_info.clone(),
|
||||
vote_account,
|
||||
repair_whitelist,
|
||||
});
|
||||
|
||||
if let Some(filename) = init_complete_file {
|
||||
|
|
Loading…
Reference in New Issue