sends repair requests over QUIC protocol (#33016)
The commit implements client-side of serve-repair and ancestor-hash-service over QUIC protocol.
This commit is contained in:
parent
cf35799b2a
commit
e01269a9de
|
@ -7,12 +7,15 @@ use {
|
||||||
},
|
},
|
||||||
outstanding_requests::OutstandingRequests,
|
outstanding_requests::OutstandingRequests,
|
||||||
packet_threshold::DynamicPacketToProcessThreshold,
|
packet_threshold::DynamicPacketToProcessThreshold,
|
||||||
|
quic_endpoint::LocalRequest,
|
||||||
repair_service::{AncestorDuplicateSlotsSender, RepairInfo, RepairStatsGroup},
|
repair_service::{AncestorDuplicateSlotsSender, RepairInfo, RepairStatsGroup},
|
||||||
|
request_response::RequestResponse,
|
||||||
serve_repair::{
|
serve_repair::{
|
||||||
self, AncestorHashesRepairType, AncestorHashesResponse, RepairProtocol, ServeRepair,
|
self, AncestorHashesRepairType, AncestorHashesResponse, RepairProtocol, ServeRepair,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
replay_stage::DUPLICATE_THRESHOLD,
|
replay_stage::DUPLICATE_THRESHOLD,
|
||||||
|
shred_fetch_stage::receive_repair_quic_packets,
|
||||||
},
|
},
|
||||||
bincode::serialize,
|
bincode::serialize,
|
||||||
crossbeam_channel::{unbounded, Receiver, RecvTimeoutError, Sender},
|
crossbeam_channel::{unbounded, Receiver, RecvTimeoutError, Sender},
|
||||||
|
@ -36,7 +39,7 @@ use {
|
||||||
std::{
|
std::{
|
||||||
collections::HashSet,
|
collections::HashSet,
|
||||||
io::{Cursor, Read},
|
io::{Cursor, Read},
|
||||||
net::UdpSocket,
|
net::{SocketAddr, UdpSocket},
|
||||||
sync::{
|
sync::{
|
||||||
atomic::{AtomicBool, Ordering},
|
atomic::{AtomicBool, Ordering},
|
||||||
Arc, RwLock,
|
Arc, RwLock,
|
||||||
|
@ -44,6 +47,7 @@ use {
|
||||||
thread::{self, sleep, Builder, JoinHandle},
|
thread::{self, sleep, Builder, JoinHandle},
|
||||||
time::{Duration, Instant},
|
time::{Duration, Instant},
|
||||||
},
|
},
|
||||||
|
tokio::sync::mpsc::Sender as AsyncSender,
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
@ -149,6 +153,7 @@ impl AncestorHashesService {
|
||||||
exit: Arc<AtomicBool>,
|
exit: Arc<AtomicBool>,
|
||||||
blockstore: Arc<Blockstore>,
|
blockstore: Arc<Blockstore>,
|
||||||
ancestor_hashes_request_socket: Arc<UdpSocket>,
|
ancestor_hashes_request_socket: Arc<UdpSocket>,
|
||||||
|
quic_endpoint_sender: AsyncSender<LocalRequest>,
|
||||||
repair_info: RepairInfo,
|
repair_info: RepairInfo,
|
||||||
ancestor_hashes_replay_update_receiver: AncestorHashesReplayUpdateReceiver,
|
ancestor_hashes_replay_update_receiver: AncestorHashesReplayUpdateReceiver,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
|
@ -157,16 +162,31 @@ impl AncestorHashesService {
|
||||||
let t_receiver = streamer::receiver(
|
let t_receiver = streamer::receiver(
|
||||||
ancestor_hashes_request_socket.clone(),
|
ancestor_hashes_request_socket.clone(),
|
||||||
exit.clone(),
|
exit.clone(),
|
||||||
response_sender,
|
response_sender.clone(),
|
||||||
Recycler::default(),
|
Recycler::default(),
|
||||||
Arc::new(StreamerReceiveStats::new(
|
Arc::new(StreamerReceiveStats::new(
|
||||||
"ancestor_hashes_response_receiver",
|
"ancestor_hashes_response_receiver",
|
||||||
)),
|
)),
|
||||||
Duration::from_millis(1), // coalesce
|
Duration::from_millis(1), // coalesce
|
||||||
false,
|
false, // use_pinned_memory
|
||||||
None,
|
None, // in_vote_only_mode
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let (quic_endpoint_response_sender, quic_endpoint_response_receiver) = unbounded();
|
||||||
|
let t_receiver_quic = {
|
||||||
|
let exit = exit.clone();
|
||||||
|
Builder::new()
|
||||||
|
.name(String::from("solAncHashQuic"))
|
||||||
|
.spawn(|| {
|
||||||
|
receive_repair_quic_packets(
|
||||||
|
quic_endpoint_response_receiver,
|
||||||
|
response_sender,
|
||||||
|
Recycler::default(),
|
||||||
|
exit,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap()
|
||||||
|
};
|
||||||
let ancestor_hashes_request_statuses: Arc<DashMap<Slot, AncestorRequestStatus>> =
|
let ancestor_hashes_request_statuses: Arc<DashMap<Slot, AncestorRequestStatus>> =
|
||||||
Arc::new(DashMap::new());
|
Arc::new(DashMap::new());
|
||||||
let (retryable_slots_sender, retryable_slots_receiver) = unbounded();
|
let (retryable_slots_sender, retryable_slots_receiver) = unbounded();
|
||||||
|
@ -188,14 +208,22 @@ impl AncestorHashesService {
|
||||||
let t_ancestor_requests = Self::run_manage_ancestor_requests(
|
let t_ancestor_requests = Self::run_manage_ancestor_requests(
|
||||||
ancestor_hashes_request_statuses,
|
ancestor_hashes_request_statuses,
|
||||||
ancestor_hashes_request_socket,
|
ancestor_hashes_request_socket,
|
||||||
|
quic_endpoint_sender,
|
||||||
|
quic_endpoint_response_sender,
|
||||||
repair_info,
|
repair_info,
|
||||||
outstanding_requests,
|
outstanding_requests,
|
||||||
exit,
|
exit,
|
||||||
ancestor_hashes_replay_update_receiver,
|
ancestor_hashes_replay_update_receiver,
|
||||||
retryable_slots_receiver,
|
retryable_slots_receiver,
|
||||||
);
|
);
|
||||||
let thread_hdls = vec![t_receiver, t_ancestor_hashes_responses, t_ancestor_requests];
|
Self {
|
||||||
Self { thread_hdls }
|
thread_hdls: vec![
|
||||||
|
t_receiver,
|
||||||
|
t_receiver_quic,
|
||||||
|
t_ancestor_hashes_responses,
|
||||||
|
t_ancestor_requests,
|
||||||
|
],
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn join(self) -> thread::Result<()> {
|
pub(crate) fn join(self) -> thread::Result<()> {
|
||||||
|
@ -551,6 +579,8 @@ impl AncestorHashesService {
|
||||||
fn run_manage_ancestor_requests(
|
fn run_manage_ancestor_requests(
|
||||||
ancestor_hashes_request_statuses: Arc<DashMap<Slot, AncestorRequestStatus>>,
|
ancestor_hashes_request_statuses: Arc<DashMap<Slot, AncestorRequestStatus>>,
|
||||||
ancestor_hashes_request_socket: Arc<UdpSocket>,
|
ancestor_hashes_request_socket: Arc<UdpSocket>,
|
||||||
|
quic_endpoint_sender: AsyncSender<LocalRequest>,
|
||||||
|
quic_endpoint_response_sender: Sender<(SocketAddr, Vec<u8>)>,
|
||||||
repair_info: RepairInfo,
|
repair_info: RepairInfo,
|
||||||
outstanding_requests: Arc<RwLock<OutstandingAncestorHashesRepairs>>,
|
outstanding_requests: Arc<RwLock<OutstandingAncestorHashesRepairs>>,
|
||||||
exit: Arc<AtomicBool>,
|
exit: Arc<AtomicBool>,
|
||||||
|
@ -587,10 +617,11 @@ impl AncestorHashesService {
|
||||||
if exit.load(Ordering::Relaxed) {
|
if exit.load(Ordering::Relaxed) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
Self::manage_ancestor_requests(
|
Self::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -612,6 +643,8 @@ impl AncestorHashesService {
|
||||||
fn manage_ancestor_requests(
|
fn manage_ancestor_requests(
|
||||||
ancestor_hashes_request_statuses: &DashMap<Slot, AncestorRequestStatus>,
|
ancestor_hashes_request_statuses: &DashMap<Slot, AncestorRequestStatus>,
|
||||||
ancestor_hashes_request_socket: &UdpSocket,
|
ancestor_hashes_request_socket: &UdpSocket,
|
||||||
|
quic_endpoint_sender: &AsyncSender<LocalRequest>,
|
||||||
|
quic_endpoint_response_sender: &Sender<(SocketAddr, Vec<u8>)>,
|
||||||
repair_info: &RepairInfo,
|
repair_info: &RepairInfo,
|
||||||
outstanding_requests: &RwLock<OutstandingAncestorHashesRepairs>,
|
outstanding_requests: &RwLock<OutstandingAncestorHashesRepairs>,
|
||||||
ancestor_hashes_replay_update_receiver: &AncestorHashesReplayUpdateReceiver,
|
ancestor_hashes_replay_update_receiver: &AncestorHashesReplayUpdateReceiver,
|
||||||
|
@ -710,6 +743,8 @@ impl AncestorHashesService {
|
||||||
if Self::initiate_ancestor_hashes_requests_for_duplicate_slot(
|
if Self::initiate_ancestor_hashes_requests_for_duplicate_slot(
|
||||||
ancestor_hashes_request_statuses,
|
ancestor_hashes_request_statuses,
|
||||||
ancestor_hashes_request_socket,
|
ancestor_hashes_request_socket,
|
||||||
|
quic_endpoint_sender,
|
||||||
|
quic_endpoint_response_sender,
|
||||||
&repair_info.cluster_slots,
|
&repair_info.cluster_slots,
|
||||||
serve_repair,
|
serve_repair,
|
||||||
&repair_info.repair_validators,
|
&repair_info.repair_validators,
|
||||||
|
@ -787,6 +822,8 @@ impl AncestorHashesService {
|
||||||
fn initiate_ancestor_hashes_requests_for_duplicate_slot(
|
fn initiate_ancestor_hashes_requests_for_duplicate_slot(
|
||||||
ancestor_hashes_request_statuses: &DashMap<Slot, AncestorRequestStatus>,
|
ancestor_hashes_request_statuses: &DashMap<Slot, AncestorRequestStatus>,
|
||||||
ancestor_hashes_request_socket: &UdpSocket,
|
ancestor_hashes_request_socket: &UdpSocket,
|
||||||
|
quic_endpoint_sender: &AsyncSender<LocalRequest>,
|
||||||
|
quic_endpoint_response_sender: &Sender<(SocketAddr, Vec<u8>)>,
|
||||||
cluster_slots: &ClusterSlots,
|
cluster_slots: &ClusterSlots,
|
||||||
serve_repair: &ServeRepair,
|
serve_repair: &ServeRepair,
|
||||||
repair_validators: &Option<HashSet<Pubkey>>,
|
repair_validators: &Option<HashSet<Pubkey>>,
|
||||||
|
@ -811,10 +848,11 @@ impl AncestorHashesService {
|
||||||
repair_stats
|
repair_stats
|
||||||
.ancestor_requests
|
.ancestor_requests
|
||||||
.update(pubkey, duplicate_slot, 0);
|
.update(pubkey, duplicate_slot, 0);
|
||||||
|
let ancestor_hashes_repair_type = AncestorHashesRepairType(duplicate_slot);
|
||||||
let nonce = outstanding_requests
|
let nonce = outstanding_requests
|
||||||
.write()
|
.write()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.add_request(AncestorHashesRepairType(duplicate_slot), timestamp());
|
.add_request(ancestor_hashes_repair_type, timestamp());
|
||||||
let Ok(request_bytes) = serve_repair.ancestor_repair_request_bytes(
|
let Ok(request_bytes) = serve_repair.ancestor_repair_request_bytes(
|
||||||
identity_keypair,
|
identity_keypair,
|
||||||
pubkey,
|
pubkey,
|
||||||
|
@ -827,7 +865,21 @@ impl AncestorHashesService {
|
||||||
Protocol::UDP => {
|
Protocol::UDP => {
|
||||||
let _ = ancestor_hashes_request_socket.send_to(&request_bytes, socket_addr);
|
let _ = ancestor_hashes_request_socket.send_to(&request_bytes, socket_addr);
|
||||||
}
|
}
|
||||||
Protocol::QUIC => todo!(),
|
Protocol::QUIC => {
|
||||||
|
let num_expected_responses =
|
||||||
|
usize::try_from(ancestor_hashes_repair_type.num_expected_responses())
|
||||||
|
.unwrap();
|
||||||
|
let request = LocalRequest {
|
||||||
|
remote_address: *socket_addr,
|
||||||
|
bytes: request_bytes,
|
||||||
|
num_expected_responses,
|
||||||
|
response_sender: quic_endpoint_response_sender.clone(),
|
||||||
|
};
|
||||||
|
if quic_endpoint_sender.blocking_send(request).is_err() {
|
||||||
|
// The receiver end of the channel is disconnected.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1441,10 +1493,14 @@ mod test {
|
||||||
repair_validators,
|
repair_validators,
|
||||||
..
|
..
|
||||||
} = repair_info;
|
} = repair_info;
|
||||||
|
let (quic_endpoint_response_sender, _quic_endpoint_response_receiver) = unbounded();
|
||||||
|
let (quic_endpoint_sender, _quic_endpoint_sender) =
|
||||||
|
tokio::sync::mpsc::channel(/*buffer:*/ 128);
|
||||||
AncestorHashesService::initiate_ancestor_hashes_requests_for_duplicate_slot(
|
AncestorHashesService::initiate_ancestor_hashes_requests_for_duplicate_slot(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&cluster_slots,
|
&cluster_slots,
|
||||||
&requester_serve_repair,
|
&requester_serve_repair,
|
||||||
&repair_validators,
|
&repair_validators,
|
||||||
|
@ -1494,6 +1550,8 @@ mod test {
|
||||||
AncestorHashesService::initiate_ancestor_hashes_requests_for_duplicate_slot(
|
AncestorHashesService::initiate_ancestor_hashes_requests_for_duplicate_slot(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&cluster_slots,
|
&cluster_slots,
|
||||||
&requester_serve_repair,
|
&requester_serve_repair,
|
||||||
&repair_validators,
|
&repair_validators,
|
||||||
|
@ -1555,6 +1613,8 @@ mod test {
|
||||||
AncestorHashesService::initiate_ancestor_hashes_requests_for_duplicate_slot(
|
AncestorHashesService::initiate_ancestor_hashes_requests_for_duplicate_slot(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&cluster_slots,
|
&cluster_slots,
|
||||||
&requester_serve_repair,
|
&requester_serve_repair,
|
||||||
&repair_validators,
|
&repair_validators,
|
||||||
|
@ -1640,10 +1700,15 @@ mod test {
|
||||||
} = repair_info;
|
} = repair_info;
|
||||||
cluster_info.insert_info(responder_node.info);
|
cluster_info.insert_info(responder_node.info);
|
||||||
bank_forks.read().unwrap().root_bank().epoch_schedule();
|
bank_forks.read().unwrap().root_bank().epoch_schedule();
|
||||||
|
let (quic_endpoint_response_sender, _quic_endpoint_response_receiver) = unbounded();
|
||||||
|
let (quic_endpoint_sender, _quic_endpoint_sender) =
|
||||||
|
tokio::sync::mpsc::channel(/*buffer:*/ 128);
|
||||||
// 1) No signals from ReplayStage, no requests should be made
|
// 1) No signals from ReplayStage, no requests should be made
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -1686,6 +1751,8 @@ mod test {
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -1725,6 +1792,8 @@ mod test {
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -1756,6 +1825,8 @@ mod test {
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -1793,6 +1864,8 @@ mod test {
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -1833,6 +1906,8 @@ mod test {
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -1989,10 +2064,15 @@ mod test {
|
||||||
&leader_schedule_cache,
|
&leader_schedule_cache,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let (quic_endpoint_response_sender, _quic_endpoint_response_receiver) = unbounded();
|
||||||
|
let (quic_endpoint_sender, _quic_endpoint_sender) =
|
||||||
|
tokio::sync::mpsc::channel(/*buffer:*/ 128);
|
||||||
// Simulate making a request
|
// Simulate making a request
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -2088,6 +2168,9 @@ mod test {
|
||||||
&repair_info.ancestor_duplicate_slots_sender,
|
&repair_info.ancestor_duplicate_slots_sender,
|
||||||
&retryable_slots_sender,
|
&retryable_slots_sender,
|
||||||
);
|
);
|
||||||
|
let (quic_endpoint_response_sender, _quic_endpoint_response_receiver) = unbounded();
|
||||||
|
let (quic_endpoint_sender, _quic_endpoint_sender) =
|
||||||
|
tokio::sync::mpsc::channel(/*buffer:*/ 128);
|
||||||
|
|
||||||
// Simulate ancestor request thread getting the retry signal
|
// Simulate ancestor request thread getting the retry signal
|
||||||
assert!(dead_slot_pool.is_empty());
|
assert!(dead_slot_pool.is_empty());
|
||||||
|
@ -2096,6 +2179,8 @@ mod test {
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
@ -2134,6 +2219,8 @@ mod test {
|
||||||
AncestorHashesService::manage_ancestor_requests(
|
AncestorHashesService::manage_ancestor_requests(
|
||||||
&ancestor_hashes_request_statuses,
|
&ancestor_hashes_request_statuses,
|
||||||
&ancestor_hashes_request_socket,
|
&ancestor_hashes_request_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
&repair_info,
|
&repair_info,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
&ancestor_hashes_replay_update_receiver,
|
&ancestor_hashes_replay_update_receiver,
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
#![allow(dead_code)]
|
|
||||||
use {
|
use {
|
||||||
bincode::Options,
|
bincode::Options,
|
||||||
crossbeam_channel::Sender,
|
crossbeam_channel::Sender,
|
||||||
|
|
|
@ -13,6 +13,7 @@ use {
|
||||||
ancestor_hashes_service::{AncestorHashesReplayUpdateReceiver, AncestorHashesService},
|
ancestor_hashes_service::{AncestorHashesReplayUpdateReceiver, AncestorHashesService},
|
||||||
duplicate_repair_status::AncestorDuplicateSlotToRepair,
|
duplicate_repair_status::AncestorDuplicateSlotToRepair,
|
||||||
outstanding_requests::OutstandingRequests,
|
outstanding_requests::OutstandingRequests,
|
||||||
|
quic_endpoint::LocalRequest,
|
||||||
repair_weight::RepairWeight,
|
repair_weight::RepairWeight,
|
||||||
serve_repair::{self, ServeRepair, ShredRepairType, REPAIR_PEERS_CACHE_CAPACITY},
|
serve_repair::{self, ServeRepair, ShredRepairType, REPAIR_PEERS_CACHE_CAPACITY},
|
||||||
},
|
},
|
||||||
|
@ -46,6 +47,7 @@ use {
|
||||||
thread::{self, sleep, Builder, JoinHandle},
|
thread::{self, sleep, Builder, JoinHandle},
|
||||||
time::{Duration, Instant},
|
time::{Duration, Instant},
|
||||||
},
|
},
|
||||||
|
tokio::sync::mpsc::Sender as AsyncSender,
|
||||||
};
|
};
|
||||||
|
|
||||||
// Time to defer repair requests to allow for turbine propagation
|
// Time to defer repair requests to allow for turbine propagation
|
||||||
|
@ -239,6 +241,8 @@ impl RepairService {
|
||||||
exit: Arc<AtomicBool>,
|
exit: Arc<AtomicBool>,
|
||||||
repair_socket: Arc<UdpSocket>,
|
repair_socket: Arc<UdpSocket>,
|
||||||
ancestor_hashes_socket: Arc<UdpSocket>,
|
ancestor_hashes_socket: Arc<UdpSocket>,
|
||||||
|
quic_endpoint_sender: AsyncSender<LocalRequest>,
|
||||||
|
quic_endpoint_response_sender: CrossbeamSender<(SocketAddr, Vec<u8>)>,
|
||||||
repair_info: RepairInfo,
|
repair_info: RepairInfo,
|
||||||
verified_vote_receiver: VerifiedVoteReceiver,
|
verified_vote_receiver: VerifiedVoteReceiver,
|
||||||
outstanding_requests: Arc<RwLock<OutstandingShredRepairs>>,
|
outstanding_requests: Arc<RwLock<OutstandingShredRepairs>>,
|
||||||
|
@ -250,6 +254,7 @@ impl RepairService {
|
||||||
let blockstore = blockstore.clone();
|
let blockstore = blockstore.clone();
|
||||||
let exit = exit.clone();
|
let exit = exit.clone();
|
||||||
let repair_info = repair_info.clone();
|
let repair_info = repair_info.clone();
|
||||||
|
let quic_endpoint_sender = quic_endpoint_sender.clone();
|
||||||
Builder::new()
|
Builder::new()
|
||||||
.name("solRepairSvc".to_string())
|
.name("solRepairSvc".to_string())
|
||||||
.spawn(move || {
|
.spawn(move || {
|
||||||
|
@ -257,6 +262,8 @@ impl RepairService {
|
||||||
&blockstore,
|
&blockstore,
|
||||||
&exit,
|
&exit,
|
||||||
&repair_socket,
|
&repair_socket,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
repair_info,
|
repair_info,
|
||||||
verified_vote_receiver,
|
verified_vote_receiver,
|
||||||
&outstanding_requests,
|
&outstanding_requests,
|
||||||
|
@ -271,6 +278,7 @@ impl RepairService {
|
||||||
exit,
|
exit,
|
||||||
blockstore,
|
blockstore,
|
||||||
ancestor_hashes_socket,
|
ancestor_hashes_socket,
|
||||||
|
quic_endpoint_sender,
|
||||||
repair_info,
|
repair_info,
|
||||||
ancestor_hashes_replay_update_receiver,
|
ancestor_hashes_replay_update_receiver,
|
||||||
);
|
);
|
||||||
|
@ -281,10 +289,13 @@ impl RepairService {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn run(
|
fn run(
|
||||||
blockstore: &Blockstore,
|
blockstore: &Blockstore,
|
||||||
exit: &AtomicBool,
|
exit: &AtomicBool,
|
||||||
repair_socket: &UdpSocket,
|
repair_socket: &UdpSocket,
|
||||||
|
quic_endpoint_sender: &AsyncSender<LocalRequest>,
|
||||||
|
quic_endpoint_response_sender: &CrossbeamSender<(SocketAddr, Vec<u8>)>,
|
||||||
repair_info: RepairInfo,
|
repair_info: RepairInfo,
|
||||||
verified_vote_receiver: VerifiedVoteReceiver,
|
verified_vote_receiver: VerifiedVoteReceiver,
|
||||||
outstanding_requests: &RwLock<OutstandingShredRepairs>,
|
outstanding_requests: &RwLock<OutstandingShredRepairs>,
|
||||||
|
@ -433,9 +444,11 @@ impl RepairService {
|
||||||
&repair_info.repair_validators,
|
&repair_info.repair_validators,
|
||||||
&mut outstanding_requests,
|
&mut outstanding_requests,
|
||||||
identity_keypair,
|
identity_keypair,
|
||||||
|
quic_endpoint_sender,
|
||||||
|
quic_endpoint_response_sender,
|
||||||
repair_protocol,
|
repair_protocol,
|
||||||
)
|
)
|
||||||
.ok()?;
|
.ok()??;
|
||||||
Some((req, to))
|
Some((req, to))
|
||||||
})
|
})
|
||||||
.collect()
|
.collect()
|
||||||
|
|
|
@ -26,11 +26,13 @@ pub enum Error {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
InvalidContactInfo(#[from] contact_info::Error),
|
InvalidContactInfo(#[from] contact_info::Error),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
|
RepairVerify(#[from] RepairVerifyError),
|
||||||
|
#[error("Send Error")]
|
||||||
|
SendError,
|
||||||
|
#[error(transparent)]
|
||||||
Serialize(#[from] std::boxed::Box<bincode::ErrorKind>),
|
Serialize(#[from] std::boxed::Box<bincode::ErrorKind>),
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
WeightedIndex(#[from] rand::distributions::weighted::WeightedError),
|
WeightedIndex(#[from] rand::distributions::weighted::WeightedError),
|
||||||
#[error(transparent)]
|
|
||||||
RepairVerify(#[from] RepairVerifyError),
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, Error>;
|
pub type Result<T> = std::result::Result<T, Error>;
|
||||||
|
|
|
@ -3,7 +3,7 @@ use {
|
||||||
cluster_slots_service::cluster_slots::ClusterSlots,
|
cluster_slots_service::cluster_slots::ClusterSlots,
|
||||||
repair::{
|
repair::{
|
||||||
duplicate_repair_status::get_ancestor_hash_repair_sample_size,
|
duplicate_repair_status::get_ancestor_hash_repair_sample_size,
|
||||||
quic_endpoint::RemoteRequest,
|
quic_endpoint::{LocalRequest, RemoteRequest},
|
||||||
repair_response,
|
repair_response,
|
||||||
repair_service::{OutstandingShredRepairs, RepairStats, REPAIR_MS},
|
repair_service::{OutstandingShredRepairs, RepairStats, REPAIR_MS},
|
||||||
request_response::RequestResponse,
|
request_response::RequestResponse,
|
||||||
|
@ -11,7 +11,7 @@ use {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
bincode::{serialize, Options},
|
bincode::{serialize, Options},
|
||||||
crossbeam_channel::{Receiver, RecvTimeoutError},
|
crossbeam_channel::{Receiver, RecvTimeoutError, Sender},
|
||||||
lru::LruCache,
|
lru::LruCache,
|
||||||
rand::{
|
rand::{
|
||||||
distributions::{Distribution, WeightedError, WeightedIndex},
|
distributions::{Distribution, WeightedError, WeightedIndex},
|
||||||
|
@ -59,7 +59,7 @@ use {
|
||||||
thread::{Builder, JoinHandle},
|
thread::{Builder, JoinHandle},
|
||||||
time::{Duration, Instant},
|
time::{Duration, Instant},
|
||||||
},
|
},
|
||||||
tokio::sync::oneshot::Sender as OneShotSender,
|
tokio::sync::{mpsc::Sender as AsyncSender, oneshot::Sender as OneShotSender},
|
||||||
};
|
};
|
||||||
|
|
||||||
/// the number of slots to respond with when responding to `Orphan` requests
|
/// the number of slots to respond with when responding to `Orphan` requests
|
||||||
|
@ -132,6 +132,7 @@ impl RequestResponse for ShredRepairType {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
pub struct AncestorHashesRepairType(pub Slot);
|
pub struct AncestorHashesRepairType(pub Slot);
|
||||||
impl AncestorHashesRepairType {
|
impl AncestorHashesRepairType {
|
||||||
pub fn slot(&self) -> Slot {
|
pub fn slot(&self) -> Slot {
|
||||||
|
@ -339,7 +340,6 @@ pub(crate) struct RepairPeers {
|
||||||
struct Node {
|
struct Node {
|
||||||
pubkey: Pubkey,
|
pubkey: Pubkey,
|
||||||
serve_repair: SocketAddr,
|
serve_repair: SocketAddr,
|
||||||
#[allow(dead_code)]
|
|
||||||
serve_repair_quic: SocketAddr,
|
serve_repair_quic: SocketAddr,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1027,6 +1027,7 @@ impl ServeRepair {
|
||||||
Self::repair_proto_to_bytes(&request, keypair)
|
Self::repair_proto_to_bytes(&request, keypair)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub(crate) fn repair_request(
|
pub(crate) fn repair_request(
|
||||||
&self,
|
&self,
|
||||||
cluster_slots: &ClusterSlots,
|
cluster_slots: &ClusterSlots,
|
||||||
|
@ -1036,8 +1037,10 @@ impl ServeRepair {
|
||||||
repair_validators: &Option<HashSet<Pubkey>>,
|
repair_validators: &Option<HashSet<Pubkey>>,
|
||||||
outstanding_requests: &mut OutstandingShredRepairs,
|
outstanding_requests: &mut OutstandingShredRepairs,
|
||||||
identity_keypair: &Keypair,
|
identity_keypair: &Keypair,
|
||||||
|
quic_endpoint_sender: &AsyncSender<LocalRequest>,
|
||||||
|
quic_endpoint_response_sender: &Sender<(SocketAddr, Vec<u8>)>,
|
||||||
repair_protocol: Protocol,
|
repair_protocol: Protocol,
|
||||||
) -> Result<(SocketAddr, Vec<u8>)> {
|
) -> Result<Option<(SocketAddr, Vec<u8>)>> {
|
||||||
// find a peer that appears to be accepting replication and has the desired slot, as indicated
|
// find a peer that appears to be accepting replication and has the desired slot, as indicated
|
||||||
// by a valid tvu port location
|
// by a valid tvu port location
|
||||||
let slot = repair_request.slot();
|
let slot = repair_request.slot();
|
||||||
|
@ -1067,8 +1070,21 @@ impl ServeRepair {
|
||||||
repair_request
|
repair_request
|
||||||
);
|
);
|
||||||
match repair_protocol {
|
match repair_protocol {
|
||||||
Protocol::UDP => Ok((peer.serve_repair, out)),
|
Protocol::UDP => Ok(Some((peer.serve_repair, out))),
|
||||||
Protocol::QUIC => todo!(),
|
Protocol::QUIC => {
|
||||||
|
let num_expected_responses =
|
||||||
|
usize::try_from(repair_request.num_expected_responses()).unwrap();
|
||||||
|
let request = LocalRequest {
|
||||||
|
remote_address: peer.serve_repair_quic,
|
||||||
|
bytes: out,
|
||||||
|
num_expected_responses,
|
||||||
|
response_sender: quic_endpoint_response_sender.clone(),
|
||||||
|
};
|
||||||
|
quic_endpoint_sender
|
||||||
|
.blocking_send(request)
|
||||||
|
.map_err(|_| Error::SendError)
|
||||||
|
.map(|()| None)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1970,6 +1986,10 @@ mod tests {
|
||||||
);
|
);
|
||||||
let identity_keypair = cluster_info.keypair().clone();
|
let identity_keypair = cluster_info.keypair().clone();
|
||||||
let mut outstanding_requests = OutstandingShredRepairs::default();
|
let mut outstanding_requests = OutstandingShredRepairs::default();
|
||||||
|
let (quic_endpoint_sender, _quic_endpoint_receiver) =
|
||||||
|
tokio::sync::mpsc::channel(/*buffer:*/ 128);
|
||||||
|
let (quic_endpoint_response_sender, _quic_endpoint_response_receiver) =
|
||||||
|
crossbeam_channel::unbounded();
|
||||||
let rv = serve_repair.repair_request(
|
let rv = serve_repair.repair_request(
|
||||||
&cluster_slots,
|
&cluster_slots,
|
||||||
ShredRepairType::Shred(0, 0),
|
ShredRepairType::Shred(0, 0),
|
||||||
|
@ -1978,6 +1998,8 @@ mod tests {
|
||||||
&None,
|
&None,
|
||||||
&mut outstanding_requests,
|
&mut outstanding_requests,
|
||||||
&identity_keypair,
|
&identity_keypair,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
Protocol::UDP, // repair_protocol
|
Protocol::UDP, // repair_protocol
|
||||||
);
|
);
|
||||||
assert_matches!(rv, Err(Error::ClusterInfo(ClusterInfoError::NoPeers)));
|
assert_matches!(rv, Err(Error::ClusterInfo(ClusterInfoError::NoPeers)));
|
||||||
|
@ -2009,8 +2031,11 @@ mod tests {
|
||||||
&None,
|
&None,
|
||||||
&mut outstanding_requests,
|
&mut outstanding_requests,
|
||||||
&identity_keypair,
|
&identity_keypair,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
Protocol::UDP, // repair_protocol
|
Protocol::UDP, // repair_protocol
|
||||||
)
|
)
|
||||||
|
.unwrap()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert_eq!(nxt.serve_repair(Protocol::UDP).unwrap(), serve_repair_addr);
|
assert_eq!(nxt.serve_repair(Protocol::UDP).unwrap(), serve_repair_addr);
|
||||||
assert_eq!(rv.0, nxt.serve_repair(Protocol::UDP).unwrap());
|
assert_eq!(rv.0, nxt.serve_repair(Protocol::UDP).unwrap());
|
||||||
|
@ -2046,8 +2071,11 @@ mod tests {
|
||||||
&None,
|
&None,
|
||||||
&mut outstanding_requests,
|
&mut outstanding_requests,
|
||||||
&identity_keypair,
|
&identity_keypair,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
Protocol::UDP, // repair_protocol
|
Protocol::UDP, // repair_protocol
|
||||||
)
|
)
|
||||||
|
.unwrap()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
if rv.0 == serve_repair_addr {
|
if rv.0 == serve_repair_addr {
|
||||||
one = true;
|
one = true;
|
||||||
|
@ -2294,7 +2322,10 @@ mod tests {
|
||||||
let cluster_slots = ClusterSlots::default();
|
let cluster_slots = ClusterSlots::default();
|
||||||
let cluster_info = Arc::new(new_test_cluster_info());
|
let cluster_info = Arc::new(new_test_cluster_info());
|
||||||
let me = cluster_info.my_contact_info();
|
let me = cluster_info.my_contact_info();
|
||||||
|
let (quic_endpoint_sender, _quic_endpoint_receiver) =
|
||||||
|
tokio::sync::mpsc::channel(/*buffer:*/ 128);
|
||||||
|
let (quic_endpoint_response_sender, _quic_endpoint_response_receiver) =
|
||||||
|
crossbeam_channel::unbounded();
|
||||||
// Insert two peers on the network
|
// Insert two peers on the network
|
||||||
let contact_info2 =
|
let contact_info2 =
|
||||||
ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
ContactInfo::new_localhost(&solana_sdk::pubkey::new_rand(), timestamp());
|
||||||
|
@ -2325,6 +2356,8 @@ mod tests {
|
||||||
&known_validators,
|
&known_validators,
|
||||||
&mut OutstandingShredRepairs::default(),
|
&mut OutstandingShredRepairs::default(),
|
||||||
&identity_keypair,
|
&identity_keypair,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
Protocol::UDP, // repair_protocol
|
Protocol::UDP, // repair_protocol
|
||||||
),
|
),
|
||||||
Err(Error::ClusterInfo(ClusterInfoError::NoPeers))
|
Err(Error::ClusterInfo(ClusterInfoError::NoPeers))
|
||||||
|
@ -2345,9 +2378,11 @@ mod tests {
|
||||||
&known_validators,
|
&known_validators,
|
||||||
&mut OutstandingShredRepairs::default(),
|
&mut OutstandingShredRepairs::default(),
|
||||||
&identity_keypair,
|
&identity_keypair,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
Protocol::UDP, // repair_protocol
|
Protocol::UDP, // repair_protocol
|
||||||
),
|
),
|
||||||
Ok(_)
|
Ok(Some(_))
|
||||||
);
|
);
|
||||||
|
|
||||||
// Using no known validators should default to all
|
// Using no known validators should default to all
|
||||||
|
@ -2369,9 +2404,11 @@ mod tests {
|
||||||
&None,
|
&None,
|
||||||
&mut OutstandingShredRepairs::default(),
|
&mut OutstandingShredRepairs::default(),
|
||||||
&identity_keypair,
|
&identity_keypair,
|
||||||
|
&quic_endpoint_sender,
|
||||||
|
&quic_endpoint_response_sender,
|
||||||
Protocol::UDP, // repair_protocol
|
Protocol::UDP, // repair_protocol
|
||||||
),
|
),
|
||||||
Ok(_)
|
Ok(Some(_))
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -162,8 +162,9 @@ impl ShredFetchStage {
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub(crate) fn new(
|
pub(crate) fn new(
|
||||||
sockets: Vec<Arc<UdpSocket>>,
|
sockets: Vec<Arc<UdpSocket>>,
|
||||||
quic_endpoint_receiver: Receiver<(Pubkey, SocketAddr, Bytes)>,
|
turbine_quic_endpoint_receiver: Receiver<(Pubkey, SocketAddr, Bytes)>,
|
||||||
repair_socket: Arc<UdpSocket>,
|
repair_socket: Arc<UdpSocket>,
|
||||||
|
repair_quic_endpoint_receiver: Receiver<(SocketAddr, Vec<u8>)>,
|
||||||
sender: Sender<PacketBatch>,
|
sender: Sender<PacketBatch>,
|
||||||
shred_version: u16,
|
shred_version: u16,
|
||||||
bank_forks: Arc<RwLock<BankForks>>,
|
bank_forks: Arc<RwLock<BankForks>>,
|
||||||
|
@ -202,13 +203,55 @@ impl ShredFetchStage {
|
||||||
tvu_threads.extend(repair_receiver);
|
tvu_threads.extend(repair_receiver);
|
||||||
tvu_threads.push(tvu_filter);
|
tvu_threads.push(tvu_filter);
|
||||||
tvu_threads.push(repair_handler);
|
tvu_threads.push(repair_handler);
|
||||||
|
// Repair shreds fetched over QUIC protocol.
|
||||||
|
{
|
||||||
|
let (packet_sender, packet_receiver) = unbounded();
|
||||||
|
let bank_forks = bank_forks.clone();
|
||||||
|
let recycler = recycler.clone();
|
||||||
|
let exit = exit.clone();
|
||||||
|
let sender = sender.clone();
|
||||||
|
let turbine_disabled = turbine_disabled.clone();
|
||||||
|
tvu_threads.extend([
|
||||||
|
Builder::new()
|
||||||
|
.name("solTvuRecvRpr".to_string())
|
||||||
|
.spawn(|| {
|
||||||
|
receive_repair_quic_packets(
|
||||||
|
repair_quic_endpoint_receiver,
|
||||||
|
packet_sender,
|
||||||
|
recycler,
|
||||||
|
exit,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap(),
|
||||||
|
Builder::new()
|
||||||
|
.name("solTvuFetchRpr".to_string())
|
||||||
|
.spawn(move || {
|
||||||
|
Self::modify_packets(
|
||||||
|
packet_receiver,
|
||||||
|
sender,
|
||||||
|
&bank_forks,
|
||||||
|
shred_version,
|
||||||
|
"shred_fetch_repair_quic",
|
||||||
|
PacketFlags::REPAIR,
|
||||||
|
None, // repair_context; no ping packets!
|
||||||
|
turbine_disabled,
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap(),
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
// Turbine shreds fetched over QUIC protocol.
|
||||||
let (packet_sender, packet_receiver) = unbounded();
|
let (packet_sender, packet_receiver) = unbounded();
|
||||||
tvu_threads.extend([
|
tvu_threads.extend([
|
||||||
Builder::new()
|
Builder::new()
|
||||||
.name("solTvuRecvQuic".to_string())
|
.name("solTvuRecvQuic".to_string())
|
||||||
.spawn(|| {
|
.spawn(|| {
|
||||||
receive_quic_datagrams(quic_endpoint_receiver, packet_sender, recycler, exit)
|
receive_quic_datagrams(
|
||||||
|
turbine_quic_endpoint_receiver,
|
||||||
|
packet_sender,
|
||||||
|
recycler,
|
||||||
|
exit,
|
||||||
|
)
|
||||||
})
|
})
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
Builder::new()
|
Builder::new()
|
||||||
|
@ -241,14 +284,14 @@ impl ShredFetchStage {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn receive_quic_datagrams(
|
fn receive_quic_datagrams(
|
||||||
quic_endpoint_receiver: Receiver<(Pubkey, SocketAddr, Bytes)>,
|
turbine_quic_endpoint_receiver: Receiver<(Pubkey, SocketAddr, Bytes)>,
|
||||||
sender: Sender<PacketBatch>,
|
sender: Sender<PacketBatch>,
|
||||||
recycler: PacketBatchRecycler,
|
recycler: PacketBatchRecycler,
|
||||||
exit: Arc<AtomicBool>,
|
exit: Arc<AtomicBool>,
|
||||||
) {
|
) {
|
||||||
const RECV_TIMEOUT: Duration = Duration::from_secs(1);
|
const RECV_TIMEOUT: Duration = Duration::from_secs(1);
|
||||||
while !exit.load(Ordering::Relaxed) {
|
while !exit.load(Ordering::Relaxed) {
|
||||||
let entry = match quic_endpoint_receiver.recv_timeout(RECV_TIMEOUT) {
|
let entry = match turbine_quic_endpoint_receiver.recv_timeout(RECV_TIMEOUT) {
|
||||||
Ok(entry) => entry,
|
Ok(entry) => entry,
|
||||||
Err(RecvTimeoutError::Timeout) => continue,
|
Err(RecvTimeoutError::Timeout) => continue,
|
||||||
Err(RecvTimeoutError::Disconnected) => return,
|
Err(RecvTimeoutError::Disconnected) => return,
|
||||||
|
@ -260,7 +303,7 @@ fn receive_quic_datagrams(
|
||||||
};
|
};
|
||||||
let deadline = Instant::now() + PACKET_COALESCE_DURATION;
|
let deadline = Instant::now() + PACKET_COALESCE_DURATION;
|
||||||
let entries = std::iter::once(entry).chain(
|
let entries = std::iter::once(entry).chain(
|
||||||
std::iter::repeat_with(|| quic_endpoint_receiver.recv_deadline(deadline).ok())
|
std::iter::repeat_with(|| turbine_quic_endpoint_receiver.recv_deadline(deadline).ok())
|
||||||
.while_some(),
|
.while_some(),
|
||||||
);
|
);
|
||||||
let size = entries
|
let size = entries
|
||||||
|
@ -285,6 +328,51 @@ fn receive_quic_datagrams(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn receive_repair_quic_packets(
|
||||||
|
repair_quic_endpoint_receiver: Receiver<(SocketAddr, Vec<u8>)>,
|
||||||
|
sender: Sender<PacketBatch>,
|
||||||
|
recycler: PacketBatchRecycler,
|
||||||
|
exit: Arc<AtomicBool>,
|
||||||
|
) {
|
||||||
|
const RECV_TIMEOUT: Duration = Duration::from_secs(1);
|
||||||
|
while !exit.load(Ordering::Relaxed) {
|
||||||
|
let entry = match repair_quic_endpoint_receiver.recv_timeout(RECV_TIMEOUT) {
|
||||||
|
Ok(entry) => entry,
|
||||||
|
Err(RecvTimeoutError::Timeout) => continue,
|
||||||
|
Err(RecvTimeoutError::Disconnected) => return,
|
||||||
|
};
|
||||||
|
let mut packet_batch =
|
||||||
|
PacketBatch::new_with_recycler(&recycler, PACKETS_PER_BATCH, "receive_quic_datagrams");
|
||||||
|
unsafe {
|
||||||
|
packet_batch.set_len(PACKETS_PER_BATCH);
|
||||||
|
};
|
||||||
|
let deadline = Instant::now() + PACKET_COALESCE_DURATION;
|
||||||
|
let entries = std::iter::once(entry).chain(
|
||||||
|
std::iter::repeat_with(|| repair_quic_endpoint_receiver.recv_deadline(deadline).ok())
|
||||||
|
.while_some(),
|
||||||
|
);
|
||||||
|
let size = entries
|
||||||
|
.filter(|(_, bytes)| bytes.len() <= PACKET_DATA_SIZE)
|
||||||
|
.zip(packet_batch.iter_mut())
|
||||||
|
.map(|((addr, bytes), packet)| {
|
||||||
|
*packet.meta_mut() = Meta {
|
||||||
|
size: bytes.len(),
|
||||||
|
addr: addr.ip(),
|
||||||
|
port: addr.port(),
|
||||||
|
flags: PacketFlags::REPAIR,
|
||||||
|
};
|
||||||
|
packet.buffer_mut()[..bytes.len()].copy_from_slice(&bytes);
|
||||||
|
})
|
||||||
|
.count();
|
||||||
|
if size > 0 {
|
||||||
|
packet_batch.truncate(size);
|
||||||
|
if sender.send(packet_batch).is_err() {
|
||||||
|
return; // The receiver end of the channel is disconnected.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use {
|
use {
|
||||||
|
|
|
@ -15,7 +15,7 @@ use {
|
||||||
cost_update_service::CostUpdateService,
|
cost_update_service::CostUpdateService,
|
||||||
drop_bank_service::DropBankService,
|
drop_bank_service::DropBankService,
|
||||||
ledger_cleanup_service::LedgerCleanupService,
|
ledger_cleanup_service::LedgerCleanupService,
|
||||||
repair::repair_service::RepairInfo,
|
repair::{quic_endpoint::LocalRequest, repair_service::RepairInfo},
|
||||||
replay_stage::{ReplayStage, ReplayStageConfig},
|
replay_stage::{ReplayStage, ReplayStageConfig},
|
||||||
rewards_recorder_service::RewardsRecorderSender,
|
rewards_recorder_service::RewardsRecorderSender,
|
||||||
shred_fetch_stage::ShredFetchStage,
|
shred_fetch_stage::ShredFetchStage,
|
||||||
|
@ -138,6 +138,7 @@ impl Tvu {
|
||||||
banking_tracer: Arc<BankingTracer>,
|
banking_tracer: Arc<BankingTracer>,
|
||||||
turbine_quic_endpoint_sender: AsyncSender<(SocketAddr, Bytes)>,
|
turbine_quic_endpoint_sender: AsyncSender<(SocketAddr, Bytes)>,
|
||||||
turbine_quic_endpoint_receiver: Receiver<(Pubkey, SocketAddr, Bytes)>,
|
turbine_quic_endpoint_receiver: Receiver<(Pubkey, SocketAddr, Bytes)>,
|
||||||
|
repair_quic_endpoint_sender: AsyncSender<LocalRequest>,
|
||||||
) -> Result<Self, String> {
|
) -> Result<Self, String> {
|
||||||
let TvuSockets {
|
let TvuSockets {
|
||||||
repair: repair_socket,
|
repair: repair_socket,
|
||||||
|
@ -151,10 +152,13 @@ impl Tvu {
|
||||||
let repair_socket = Arc::new(repair_socket);
|
let repair_socket = Arc::new(repair_socket);
|
||||||
let ancestor_hashes_socket = Arc::new(ancestor_hashes_socket);
|
let ancestor_hashes_socket = Arc::new(ancestor_hashes_socket);
|
||||||
let fetch_sockets: Vec<Arc<UdpSocket>> = fetch_sockets.into_iter().map(Arc::new).collect();
|
let fetch_sockets: Vec<Arc<UdpSocket>> = fetch_sockets.into_iter().map(Arc::new).collect();
|
||||||
|
let (repair_quic_endpoint_response_sender, repair_quic_endpoint_response_receiver) =
|
||||||
|
unbounded();
|
||||||
let fetch_stage = ShredFetchStage::new(
|
let fetch_stage = ShredFetchStage::new(
|
||||||
fetch_sockets,
|
fetch_sockets,
|
||||||
turbine_quic_endpoint_receiver,
|
turbine_quic_endpoint_receiver,
|
||||||
repair_socket.clone(),
|
repair_socket.clone(),
|
||||||
|
repair_quic_endpoint_response_receiver,
|
||||||
fetch_sender,
|
fetch_sender,
|
||||||
tvu_config.shred_version,
|
tvu_config.shred_version,
|
||||||
bank_forks.clone(),
|
bank_forks.clone(),
|
||||||
|
@ -209,6 +213,8 @@ impl Tvu {
|
||||||
retransmit_sender,
|
retransmit_sender,
|
||||||
repair_socket,
|
repair_socket,
|
||||||
ancestor_hashes_socket,
|
ancestor_hashes_socket,
|
||||||
|
repair_quic_endpoint_sender,
|
||||||
|
repair_quic_endpoint_response_sender,
|
||||||
exit.clone(),
|
exit.clone(),
|
||||||
repair_info,
|
repair_info,
|
||||||
leader_schedule_cache.clone(),
|
leader_schedule_cache.clone(),
|
||||||
|
@ -401,6 +407,8 @@ pub mod tests {
|
||||||
let (turbine_quic_endpoint_sender, _turbine_quic_endpoint_receiver) =
|
let (turbine_quic_endpoint_sender, _turbine_quic_endpoint_receiver) =
|
||||||
tokio::sync::mpsc::channel(/*capacity:*/ 128);
|
tokio::sync::mpsc::channel(/*capacity:*/ 128);
|
||||||
let (_turbine_quic_endpoint_sender, turbine_quic_endpoint_receiver) = unbounded();
|
let (_turbine_quic_endpoint_sender, turbine_quic_endpoint_receiver) = unbounded();
|
||||||
|
let (repair_quic_endpoint_sender, _repair_quic_endpoint_receiver) =
|
||||||
|
tokio::sync::mpsc::channel(/*buffer:*/ 128);
|
||||||
//start cluster_info1
|
//start cluster_info1
|
||||||
let cluster_info1 =
|
let cluster_info1 =
|
||||||
ClusterInfo::new(target1.info.clone(), keypair, SocketAddrSpace::Unspecified);
|
ClusterInfo::new(target1.info.clone(), keypair, SocketAddrSpace::Unspecified);
|
||||||
|
@ -484,6 +492,7 @@ pub mod tests {
|
||||||
BankingTracer::new_disabled(),
|
BankingTracer::new_disabled(),
|
||||||
turbine_quic_endpoint_sender,
|
turbine_quic_endpoint_sender,
|
||||||
turbine_quic_endpoint_receiver,
|
turbine_quic_endpoint_receiver,
|
||||||
|
repair_quic_endpoint_sender,
|
||||||
)
|
)
|
||||||
.expect("assume success");
|
.expect("assume success");
|
||||||
exit.store(true, Ordering::Relaxed);
|
exit.store(true, Ordering::Relaxed);
|
||||||
|
|
|
@ -16,7 +16,7 @@ use {
|
||||||
},
|
},
|
||||||
ledger_metric_report_service::LedgerMetricReportService,
|
ledger_metric_report_service::LedgerMetricReportService,
|
||||||
poh_timing_report_service::PohTimingReportService,
|
poh_timing_report_service::PohTimingReportService,
|
||||||
repair::{serve_repair::ServeRepair, serve_repair_service::ServeRepairService},
|
repair::{self, serve_repair::ServeRepair, serve_repair_service::ServeRepairService},
|
||||||
rewards_recorder_service::{RewardsRecorderSender, RewardsRecorderService},
|
rewards_recorder_service::{RewardsRecorderSender, RewardsRecorderService},
|
||||||
sample_performance_service::SamplePerformanceService,
|
sample_performance_service::SamplePerformanceService,
|
||||||
sigverify,
|
sigverify,
|
||||||
|
@ -132,6 +132,7 @@ use {
|
||||||
},
|
},
|
||||||
strum::VariantNames,
|
strum::VariantNames,
|
||||||
strum_macros::{Display, EnumString, EnumVariantNames, IntoStaticStr},
|
strum_macros::{Display, EnumString, EnumVariantNames, IntoStaticStr},
|
||||||
|
tokio::runtime::Runtime as TokioRuntime,
|
||||||
};
|
};
|
||||||
|
|
||||||
const MAX_COMPLETED_DATA_SETS_IN_CHANNEL: usize = 100_000;
|
const MAX_COMPLETED_DATA_SETS_IN_CHANNEL: usize = 100_000;
|
||||||
|
@ -463,8 +464,11 @@ pub struct Validator {
|
||||||
accounts_background_service: AccountsBackgroundService,
|
accounts_background_service: AccountsBackgroundService,
|
||||||
accounts_hash_verifier: AccountsHashVerifier,
|
accounts_hash_verifier: AccountsHashVerifier,
|
||||||
turbine_quic_endpoint: Endpoint,
|
turbine_quic_endpoint: Endpoint,
|
||||||
turbine_quic_endpoint_runtime: Option<tokio::runtime::Runtime>,
|
turbine_quic_endpoint_runtime: Option<TokioRuntime>,
|
||||||
turbine_quic_endpoint_join_handle: solana_turbine::quic_endpoint::AsyncTryJoinHandle,
|
turbine_quic_endpoint_join_handle: solana_turbine::quic_endpoint::AsyncTryJoinHandle,
|
||||||
|
repair_quic_endpoint: Endpoint,
|
||||||
|
repair_quic_endpoint_runtime: Option<TokioRuntime>,
|
||||||
|
repair_quic_endpoint_join_handle: repair::quic_endpoint::AsyncTryJoinHandle,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Validator {
|
impl Validator {
|
||||||
|
@ -1048,7 +1052,7 @@ impl Validator {
|
||||||
serve_repair,
|
serve_repair,
|
||||||
// Incoming UDP repair requests are adapted into RemoteRequest
|
// Incoming UDP repair requests are adapted into RemoteRequest
|
||||||
// and also sent through the same channel.
|
// and also sent through the same channel.
|
||||||
repair_quic_endpoint_sender,
|
repair_quic_endpoint_sender.clone(),
|
||||||
repair_quic_endpoint_receiver,
|
repair_quic_endpoint_receiver,
|
||||||
blockstore.clone(),
|
blockstore.clone(),
|
||||||
node.sockets.serve_repair,
|
node.sockets.serve_repair,
|
||||||
|
@ -1149,7 +1153,7 @@ impl Validator {
|
||||||
) = solana_turbine::quic_endpoint::new_quic_endpoint(
|
) = solana_turbine::quic_endpoint::new_quic_endpoint(
|
||||||
turbine_quic_endpoint_runtime
|
turbine_quic_endpoint_runtime
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(tokio::runtime::Runtime::handle)
|
.map(TokioRuntime::handle)
|
||||||
.unwrap_or_else(|| current_runtime_handle.as_ref().unwrap()),
|
.unwrap_or_else(|| current_runtime_handle.as_ref().unwrap()),
|
||||||
&identity_keypair,
|
&identity_keypair,
|
||||||
node.sockets.tvu_quic,
|
node.sockets.tvu_quic,
|
||||||
|
@ -1161,6 +1165,30 @@ impl Validator {
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
// Repair quic endpoint.
|
||||||
|
let repair_quic_endpoint_runtime = current_runtime_handle.is_err().then(|| {
|
||||||
|
tokio::runtime::Builder::new_multi_thread()
|
||||||
|
.enable_all()
|
||||||
|
.thread_name("solRepairQuic")
|
||||||
|
.build()
|
||||||
|
.unwrap()
|
||||||
|
});
|
||||||
|
let (repair_quic_endpoint, repair_quic_endpoint_sender, repair_quic_endpoint_join_handle) =
|
||||||
|
repair::quic_endpoint::new_quic_endpoint(
|
||||||
|
repair_quic_endpoint_runtime
|
||||||
|
.as_ref()
|
||||||
|
.map(TokioRuntime::handle)
|
||||||
|
.unwrap_or_else(|| current_runtime_handle.as_ref().unwrap()),
|
||||||
|
&identity_keypair,
|
||||||
|
node.sockets.serve_repair_quic,
|
||||||
|
node.info
|
||||||
|
.serve_repair(Protocol::QUIC)
|
||||||
|
.expect("Operator must spin up node with valid QUIC serve-repair address")
|
||||||
|
.ip(),
|
||||||
|
repair_quic_endpoint_sender,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let (replay_vote_sender, replay_vote_receiver) = unbounded();
|
let (replay_vote_sender, replay_vote_receiver) = unbounded();
|
||||||
let tvu = Tvu::new(
|
let tvu = Tvu::new(
|
||||||
vote_account,
|
vote_account,
|
||||||
|
@ -1213,6 +1241,7 @@ impl Validator {
|
||||||
banking_tracer.clone(),
|
banking_tracer.clone(),
|
||||||
turbine_quic_endpoint_sender.clone(),
|
turbine_quic_endpoint_sender.clone(),
|
||||||
turbine_quic_endpoint_receiver,
|
turbine_quic_endpoint_receiver,
|
||||||
|
repair_quic_endpoint_sender,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let tpu = Tpu::new(
|
let tpu = Tpu::new(
|
||||||
|
@ -1301,6 +1330,9 @@ impl Validator {
|
||||||
turbine_quic_endpoint,
|
turbine_quic_endpoint,
|
||||||
turbine_quic_endpoint_runtime,
|
turbine_quic_endpoint_runtime,
|
||||||
turbine_quic_endpoint_join_handle,
|
turbine_quic_endpoint_join_handle,
|
||||||
|
repair_quic_endpoint,
|
||||||
|
repair_quic_endpoint_runtime,
|
||||||
|
repair_quic_endpoint_join_handle,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1410,9 +1442,14 @@ impl Validator {
|
||||||
}
|
}
|
||||||
|
|
||||||
self.gossip_service.join().expect("gossip_service");
|
self.gossip_service.join().expect("gossip_service");
|
||||||
|
repair::quic_endpoint::close_quic_endpoint(&self.repair_quic_endpoint);
|
||||||
self.serve_repair_service
|
self.serve_repair_service
|
||||||
.join()
|
.join()
|
||||||
.expect("serve_repair_service");
|
.expect("serve_repair_service");
|
||||||
|
self.repair_quic_endpoint_runtime
|
||||||
|
.map(|runtime| runtime.block_on(self.repair_quic_endpoint_join_handle))
|
||||||
|
.transpose()
|
||||||
|
.unwrap();
|
||||||
self.stats_reporter_service
|
self.stats_reporter_service
|
||||||
.join()
|
.join()
|
||||||
.expect("stats_reporter_service");
|
.expect("stats_reporter_service");
|
||||||
|
|
|
@ -7,6 +7,7 @@ use {
|
||||||
completed_data_sets_service::CompletedDataSetsSender,
|
completed_data_sets_service::CompletedDataSetsSender,
|
||||||
repair::{
|
repair::{
|
||||||
ancestor_hashes_service::AncestorHashesReplayUpdateReceiver,
|
ancestor_hashes_service::AncestorHashesReplayUpdateReceiver,
|
||||||
|
quic_endpoint::LocalRequest,
|
||||||
repair_response,
|
repair_response,
|
||||||
repair_service::{
|
repair_service::{
|
||||||
DumpedSlotsReceiver, OutstandingShredRepairs, PopularPrunedForksSender, RepairInfo,
|
DumpedSlotsReceiver, OutstandingShredRepairs, PopularPrunedForksSender, RepairInfo,
|
||||||
|
@ -39,6 +40,7 @@ use {
|
||||||
thread::{self, Builder, JoinHandle},
|
thread::{self, Builder, JoinHandle},
|
||||||
time::{Duration, Instant},
|
time::{Duration, Instant},
|
||||||
},
|
},
|
||||||
|
tokio::sync::mpsc::Sender as AsyncSender,
|
||||||
};
|
};
|
||||||
|
|
||||||
type ShredPayload = Vec<u8>;
|
type ShredPayload = Vec<u8>;
|
||||||
|
@ -325,6 +327,8 @@ impl WindowService {
|
||||||
retransmit_sender: Sender<Vec<ShredPayload>>,
|
retransmit_sender: Sender<Vec<ShredPayload>>,
|
||||||
repair_socket: Arc<UdpSocket>,
|
repair_socket: Arc<UdpSocket>,
|
||||||
ancestor_hashes_socket: Arc<UdpSocket>,
|
ancestor_hashes_socket: Arc<UdpSocket>,
|
||||||
|
repair_quic_endpoint_sender: AsyncSender<LocalRequest>,
|
||||||
|
repair_quic_endpoint_response_sender: Sender<(SocketAddr, Vec<u8>)>,
|
||||||
exit: Arc<AtomicBool>,
|
exit: Arc<AtomicBool>,
|
||||||
repair_info: RepairInfo,
|
repair_info: RepairInfo,
|
||||||
leader_schedule_cache: Arc<LeaderScheduleCache>,
|
leader_schedule_cache: Arc<LeaderScheduleCache>,
|
||||||
|
@ -344,6 +348,8 @@ impl WindowService {
|
||||||
exit.clone(),
|
exit.clone(),
|
||||||
repair_socket,
|
repair_socket,
|
||||||
ancestor_hashes_socket,
|
ancestor_hashes_socket,
|
||||||
|
repair_quic_endpoint_sender,
|
||||||
|
repair_quic_endpoint_response_sender,
|
||||||
repair_info,
|
repair_info,
|
||||||
verified_vote_receiver,
|
verified_vote_receiver,
|
||||||
outstanding_requests.clone(),
|
outstanding_requests.clone(),
|
||||||
|
|
Loading…
Reference in New Issue