#![allow(clippy::arithmetic_side_effects)] use { clap::{crate_description, crate_name, crate_version, Arg, ArgMatches, Command}, rand::{thread_rng, Rng}, serde::{Deserialize, Serialize}, std::{fs, io, path::PathBuf}, }; #[derive(Deserialize, Serialize, Debug)] struct NetworkInterconnect { pub a: u8, pub b: u8, pub config: String, } #[derive(Deserialize, Serialize, Debug)] struct NetworkTopology { pub partitions: Vec, pub interconnects: Vec, } impl Default for NetworkTopology { fn default() -> Self { Self { partitions: vec![100], interconnects: vec![], } } } impl NetworkTopology { pub fn verify(&self) -> bool { let sum: u8 = self.partitions.iter().sum(); if sum != 100 { return false; } for x in self.interconnects.iter() { if x.a as usize > self.partitions.len() || x.b as usize > self.partitions.len() { return false; } } true } pub fn new_from_stdin() -> Self { let mut input = String::new(); println!("Configure partition map (must add up to 100, e.g. [70, 20, 10]):"); let partitions_str = match io::stdin().read_line(&mut input) { Ok(_) => input, Err(error) => panic!("error: {error}"), }; let partitions: Vec = serde_json::from_str(&partitions_str) .expect("Failed to parse input. It must be a JSON string"); let mut interconnects: Vec = vec![]; for i in 0..partitions.len() - 1 { for j in i + 1..partitions.len() { println!("Configure interconnect ({i} <-> {j}):"); let mut input = String::new(); let mut interconnect_config = match io::stdin().read_line(&mut input) { Ok(_) => input, Err(error) => panic!("error: {error}"), }; if interconnect_config.ends_with('\n') { interconnect_config.pop(); if interconnect_config.ends_with('\r') { interconnect_config.pop(); } } if !interconnect_config.is_empty() { let interconnect = NetworkInterconnect { a: i as u8, b: j as u8, config: interconnect_config.clone(), }; interconnects.push(interconnect); let interconnect = NetworkInterconnect { a: j as u8, b: i as u8, config: interconnect_config, }; interconnects.push(interconnect); } } } Self { partitions, interconnects, } } fn new_random(max_partitions: usize, max_packet_drop: u8, max_packet_delay: u32) -> Self { let mut rng = thread_rng(); let num_partitions = rng.gen_range(0..max_partitions + 1); if num_partitions == 0 { return NetworkTopology::default(); } let mut partitions = vec![]; let mut used_partition = 0; for i in 0..num_partitions { let partition = if i == num_partitions - 1 { 100 - used_partition } else { rng.gen_range(0..100 - used_partition - num_partitions + i) }; used_partition += partition; partitions.push(partition as u8); } let mut interconnects: Vec = vec![]; for i in 0..partitions.len() - 1 { for j in i + 1..partitions.len() { let drop_config = if max_packet_drop > 0 { let packet_drop = rng.gen_range(0..max_packet_drop + 1); format!("loss {packet_drop}% 25% ") } else { String::default() }; let config = if max_packet_delay > 0 { let packet_delay = rng.gen_range(0..max_packet_delay + 1); format!("{drop_config}delay {packet_delay}ms 10ms") } else { drop_config }; let interconnect = NetworkInterconnect { a: i as u8, b: j as u8, config: config.clone(), }; interconnects.push(interconnect); let interconnect = NetworkInterconnect { a: j as u8, b: i as u8, config, }; interconnects.push(interconnect); } } Self { partitions, interconnects, } } } fn run( cmd: &str, args: &[&str], launch_err_msg: &str, status_err_msg: &str, ignore_err: bool, ) -> bool { println!("Running {:?}", std::process::Command::new(cmd).args(args)); let output = std::process::Command::new(cmd) .args(args) .output() .expect(launch_err_msg); if ignore_err { return true; } if !output.status.success() { eprintln!( "{} command failed with exit code: {}", status_err_msg, output.status ); use std::str::from_utf8; println!("stdout: {}", from_utf8(&output.stdout).unwrap_or("?")); println!("stderr: {}", from_utf8(&output.stderr).unwrap_or("?")); false } else { true } } fn insert_iptables_rule(tos: u8) -> bool { let my_tos = tos.to_string(); // iptables -t mangle -A PREROUTING -p udp -j TOS --set-tos run( "iptables", &[ "-t", "mangle", "-A", "OUTPUT", "-p", "udp", "-j", "TOS", "--set-tos", my_tos.as_str(), ], "Failed to add iptables rule", "iptables", false, ) } fn flush_iptables_rule() { run( "iptables", &["-F", "-t", "mangle"], "Failed to flush iptables", "iptables flush", true, ); } fn setup_ifb(interface: &str) -> bool { // modprobe ifb numifbs=1 run( "modprobe", &[ "ifb", "numifbs=1", ], "Failed to load ifb module", "modprobe ifb numifbs=1", false ) && // ip link set dev ifb0 up run( "ip", &[ "link", "set", "dev", "ifb0", "up" ], "Failed to bring ifb0 online", "ip link set dev ifb0 up", false ) && // tc qdisc add dev handle ffff: ingress run( "tc", &[ "qdisc", "add", "dev", interface, "handle", "ffff:", "ingress" ], "Failed to setup ingress qdisc", "tc qdisc add dev handle ffff: ingress", false ) && // tc filter add dev parent ffff: protocol ip u32 match u32 0 0 action mirred egress redirect dev ifb0 run( "tc", &[ "filter", "add", "dev", interface, "parent", "ffff:", "protocol", "ip", "u32", "match", "u32", "0", "0", "action", "mirred", "egress", "redirect", "dev", "ifb0" ], "Failed to redirect ingress traffc", "tc filter add dev parent ffff: protocol ip u32 match u32 0 0 action mirred egress redirect dev ifb0", false ) } fn delete_ifb(interface: &str) -> bool { run( "tc", &[ "qdisc", "delete", "dev", interface, "handle", "ffff:", "ingress", ], "Failed to setup ingress qdisc", "tc qdisc delete dev handle ffff: ingress", true, ) && run( "modprobe", &["ifb", "--remove"], "Failed to delete ifb module", "modprobe ifb --remove", true, ) } fn insert_tc_ifb_root(num_bands: &str) -> bool { // tc qdisc add dev ifb0 root handle 1: prio bands run( "tc", &[ "qdisc", "add", "dev", "ifb0", "root", "handle", "1:", "prio", "bands", num_bands, ], "Failed to add root ifb qdisc", "tc qdisc add dev ifb0 root handle 1: prio bands ", false, ) } fn insert_tc_ifb_netem(class: &str, handle: &str, filter: &str) -> bool { let mut filters: Vec<&str> = filter.split(' ').collect(); let mut args = vec![ "qdisc", "add", "dev", "ifb0", "parent", class, "handle", handle, "netem", ]; args.append(&mut filters); // tc qdisc add dev ifb0 parent handle netem run("tc", &args, "Failed to add tc child", "tc add child", false) } fn insert_tos_ifb_filter(class: &str, tos: &str) -> bool { // tc filter add dev ifb0 protocol ip parent 1: prio 1 u32 match ip tos 0xff flowid run( "tc", &[ "filter", "add", "dev", "ifb0", "protocol", "ip", "parent", "1:", "prio", "1", "u32", "match", "ip", "tos", tos, "0xff", "flowid", class, ], "Failed to add tos filter", "tc filter add dev ifb0 protocol ip parent 1: prio 1 u32 match ip tos 0xff flowid ", false, ) } fn insert_default_ifb_filter(class: &str) -> bool { // tc filter add dev ifb0 parent 1: protocol all prio 2 u32 match u32 0 0 flowid 1: run( "tc", &[ "filter", "add", "dev", "ifb0", "parent", "1:", "protocol", "all", "prio", "2", "u32", "match", "u32", "0", "0", "flowid", class, ], "Failed to add catch-all filter", "tc filter add dev ifb0 parent 1: protocol all prio 2 u32 match u32 0 0 flowid 1:", false, ) } fn delete_all_filters(interface: &str) { // tc filter delete dev run( "tc", &["filter", "delete", "dev", interface], "Failed to delete all filters", "tc delete all filters", true, ); } fn identify_my_partition(partitions: &[u8], index: u64, size: u64) -> usize { let mut my_partition = 0; let mut watermark = 0; for (i, p) in partitions.iter().enumerate() { watermark += *p; if u64::from(watermark) >= index * 100 / size { my_partition = i; break; } } my_partition } fn partition_id_to_tos(partition: usize) -> u8 { if partition < 4 { 2u8.pow(partition as u32 + 1) } else { 0 } } fn shape_network(matches: &ArgMatches) { let config_path = PathBuf::from(matches.value_of_t_or_exit::("file")); let config = fs::read_to_string(config_path).expect("Unable to read config file"); let topology: NetworkTopology = serde_json::from_str(&config).expect("Failed to parse log as JSON"); let interface: String = matches.value_of_t_or_exit("iface"); let network_size: u64 = matches.value_of_t_or_exit("size"); let my_index: u64 = matches.value_of_t_or_exit("position"); if !shape_network_steps(&topology, &interface, network_size, my_index) { delete_ifb(interface.as_str()); flush_iptables_rule(); } } fn shape_network_steps( topology: &NetworkTopology, interface: &str, network_size: u64, my_index: u64, ) -> bool { // Integrity checks assert!(topology.verify(), "Failed to verify the configuration file"); assert!(my_index < network_size); // Figure out partition we belong in let my_partition = identify_my_partition(&topology.partitions, my_index + 1, network_size); // Clear any lingering state println!( "my_index: {}, network_size: {}, partitions: {:?}", my_index, network_size, topology.partitions ); println!("My partition is {my_partition}"); cleanup_network(interface); // Mark egress packets with our partition id if !insert_iptables_rule(partition_id_to_tos(my_partition)) { return false; } let num_bands = topology.partitions.len() + 1; let default_filter_class = format!("1:{num_bands}"); if !topology.interconnects.is_empty() { let num_bands_str = num_bands.to_string(); // Redirect ingress traffic to the virtual interface ifb0 so we can // apply egress rules if !setup_ifb(interface) // Setup root qdisc on ifb0 || !insert_tc_ifb_root(num_bands_str.as_str()) // Catch all so regular traffic/traffic within the same partition // is not filtered out || !insert_default_ifb_filter(default_filter_class.as_str()) { return false; } } println!("Setting up interconnects"); for i in &topology.interconnects { if i.b as usize == my_partition { println!("interconnects: {i:#?}"); let tos = partition_id_to_tos(i.a as usize); if tos == 0 { println!("Incorrect value of TOS/Partition in config {}", i.a); return false; } let tos_string = tos.to_string(); // First valid class is 1:1 let class = format!("1:{}", i.a + 1); if !insert_tc_ifb_netem(class.as_str(), tos_string.as_str(), i.config.as_str()) { return false; } if !insert_tos_ifb_filter(class.as_str(), tos_string.as_str()) { return false; } } } true } fn parse_interface(interfaces: &str) -> &str { for line in interfaces.lines() { if line != "ifb0" { return line; } } panic!("No valid interfaces"); } fn cleanup_network(interface: &str) { delete_all_filters("ifb0"); delete_ifb(interface); flush_iptables_rule(); } fn configure(matches: &ArgMatches) { let config = if !matches.is_present("random") { NetworkTopology::new_from_stdin() } else { let max_partitions: usize = matches.value_of_t("max-partitions").unwrap_or(4); let max_drop: u8 = matches.value_of_t("max-drop").unwrap_or(100); let max_delay: u32 = matches.value_of_t("max-delay").unwrap_or(50); NetworkTopology::new_random(max_partitions, max_drop, max_delay) }; assert!(config.verify(), "Failed to verify the configuration"); let topology = serde_json::to_string(&config).expect("Failed to write as JSON"); println!("{topology}"); } fn main() { solana_logger::setup(); let matches = Command::new(crate_name!()) .about(crate_description!()) .version(crate_version!()) .subcommand( Command::new("shape") .about("Shape the network using config file") .arg( Arg::new("file") .short('f') .long("file") .value_name("config file") .takes_value(true) .required(true) .help("Location of the network config file"), ) .arg( Arg::new("size") .short('s') .long("size") .value_name("network size") .takes_value(true) .required(true) .help("Number of nodes in the network"), ) .arg( Arg::new("iface") .short('i') .long("iface") .value_name("network interface name") .takes_value(true) .required(true) .help("Name of network interface"), ) .arg( Arg::new("position") .short('p') .long("position") .value_name("position of node") .takes_value(true) .required(true) .help("Position of current node in the network"), ), ) .subcommand( Command::new("cleanup") .about("Remove the network filters using config file") .arg( Arg::new("file") .short('f') .long("file") .value_name("config file") .takes_value(true) .required(true) .help("Location of the network config file"), ) .arg( Arg::new("size") .short('s') .long("size") .value_name("network size") .takes_value(true) .required(true) .help("Number of nodes in the network"), ) .arg( Arg::new("iface") .short('i') .long("iface") .value_name("network interface name") .takes_value(true) .required(true) .help("Name of network interface"), ) .arg( Arg::new("position") .short('p') .long("position") .value_name("position of node") .takes_value(true) .required(true) .help("Position of current node in the network"), ), ) .subcommand( Command::new("configure") .about("Generate a config file") .arg( Arg::new("random") .short('r') .long("random") .required(false) .help("Generate a random config file"), ) .arg( Arg::new("max-partitions") .short('p') .long("max-partitions") .value_name("count") .takes_value(true) .required(false) .help("Maximum number of partitions. Used only with random configuration generation"), ) .arg( Arg::new("max-drop") .short('d') .long("max-drop") .value_name("percentage") .takes_value(true) .required(false) .help("Maximum amount of packet drop. Used only with random configuration generation"), ) .arg( Arg::new("max-delay") .short('y') .long("max-delay") .value_name("ms") .takes_value(true) .required(false) .help("Maximum amount of packet delay. Used only with random configuration generation"), ), ) .get_matches(); match matches.subcommand() { Some(("shape", args_matches)) => shape_network(args_matches), Some(("cleanup", args_matches)) => { let interfaces: String = args_matches.value_of_t_or_exit("iface"); let iface = parse_interface(&interfaces); cleanup_network(iface) } Some(("configure", args_matches)) => configure(args_matches), _ => {} }; }