From 269db1710ea2b1935ed5f2ed0a40127ab5249297 Mon Sep 17 00:00:00 2001 From: Michael Vines Date: Fri, 22 May 2020 14:33:01 -0700 Subject: [PATCH] Retry a couple times before declaring a UDP port unreachable (#10181) --- net-utils/src/lib.rs | 97 ++++++++++++++++++++++++++----------------- validator/src/main.rs | 8 ++-- 2 files changed, 64 insertions(+), 41 deletions(-) diff --git a/net-utils/src/lib.rs b/net-utils/src/lib.rs index a3220b6abe..a57d3e2b94 100644 --- a/net-utils/src/lib.rs +++ b/net-utils/src/lib.rs @@ -90,25 +90,18 @@ pub fn get_public_ip_addr(ip_echo_server_addr: &SocketAddr) -> Result, udp_sockets: &[&UdpSocket], -) { - let udp: Vec<(_, _)> = udp_sockets +) -> bool { + let udp_ports: Vec<_> = udp_sockets .iter() - .map(|udp_socket| { - ( - udp_socket.local_addr().unwrap().port(), - udp_socket.try_clone().expect("Unable to clone udp socket"), - ) - }) + .map(|udp_socket| udp_socket.local_addr().unwrap().port()) .collect(); - let udp_ports: Vec<_> = udp.iter().map(|x| x.0).collect(); - info!( "Checking that tcp ports {:?} and udp ports {:?} are reachable from {:?}", tcp_listeners, udp_ports, ip_echo_server_addr @@ -121,6 +114,8 @@ pub fn verify_reachable_ports( ) .map_err(|err| warn!("ip_echo_server request failed: {}", err)); + let mut ok = true; + // Wait for a connection to open on each TCP port for (port, tcp_listener) in tcp_listeners { let (sender, receiver) = channel(); @@ -129,38 +124,64 @@ pub fn verify_reachable_ports( let _ = tcp_listener.incoming().next().expect("tcp incoming failed"); sender.send(()).expect("send failure"); }); - receiver - .recv_timeout(Duration::from_secs(5)) - .unwrap_or_else(|err| { + match receiver.recv_timeout(Duration::from_secs(5)) { + Ok(_) => { + info!("tcp/{} is reachable", port); + } + Err(err) => { error!( "Received no response at tcp/{}, check your port configuration: {}", port, err ); - std::process::exit(1); - }); - info!("tcp/{} is reachable", port); + ok = false; + } + } } - // Wait for a datagram to arrive at each UDP port - for (port, udp_socket) in udp { - let (sender, receiver) = channel(); - std::thread::spawn(move || { - let mut buf = [0; 1]; - debug!("Waiting for incoming datagram on udp/{}", port); - let _ = udp_socket.recv(&mut buf).expect("udp recv failure"); - sender.send(()).expect("send failure"); - }); - receiver - .recv_timeout(Duration::from_secs(5)) - .unwrap_or_else(|err| { - error!( - "Received no response at udp/{}, check your port configuration: {}", - port, err - ); - std::process::exit(1); - }); - info!("udp/{} is reachable", port); + if !ok { + // No retries for TCP, abort on the first failure + return ok; } + + for _udp_retries in 0..5 { + // Wait for a datagram to arrive at each UDP port + for udp_socket in udp_sockets { + let port = udp_socket.local_addr().unwrap().port(); + let udp_socket = udp_socket.try_clone().expect("Unable to clone udp socket"); + let (sender, receiver) = channel(); + std::thread::spawn(move || { + let mut buf = [0; 1]; + debug!("Waiting for incoming datagram on udp/{}", port); + let _ = udp_socket.recv(&mut buf).expect("udp recv failure"); + sender.send(()).expect("send failure"); + }); + match receiver.recv_timeout(Duration::from_secs(5)) { + Ok(_) => { + info!("udp/{} is reachable", port); + } + Err(err) => { + error!( + "Received no response at udp/{}, check your port configuration: {}", + port, err + ); + ok = false; + } + } + } + if ok { + break; + } + ok = true; + + // Might have lost a UDP packet, retry a couple times + let _ = ip_echo_server_request( + ip_echo_server_addr, + IpEchoServerMessage::new(&[], &udp_ports), + ) + .map_err(|err| warn!("ip_echo_server request failed: {}", err)); + } + + ok } pub fn parse_port_or_addr(optstr: Option<&str>, default_addr: SocketAddr) -> SocketAddr { @@ -499,10 +520,10 @@ mod tests { parse_host("127.0.0.1"), ); - verify_reachable_ports( + assert!(verify_reachable_ports( &ip_echo_server_addr, vec![(client_port, client_tcp_listener)], &[&client_udp_socket], - ); + )); } } diff --git a/validator/src/main.rs b/validator/src/main.rs index 4df88b7ca7..074f0355a2 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -1100,7 +1100,7 @@ pub fn main() { TcpListener::bind(&SocketAddr::from((rpc_bind_address, *port))) .unwrap_or_else(|err| { error!("Unable to bind to tcp/{} for {}: {}", port, purpose, err); - std::process::exit(1); + exit(1); }), )); } @@ -1112,11 +1112,13 @@ pub fn main() { tcp_listeners.push((node.info.gossip.port(), ip_echo)); } - solana_net_utils::verify_reachable_ports( + if !solana_net_utils::verify_reachable_ports( &cluster_entrypoint.gossip, tcp_listeners, &udp_sockets, - ); + ) { + exit(1); + } if !no_genesis_fetch { let (cluster_info, gossip_exit_flag, gossip_service) = start_gossip_node( &identity_keypair,