Add replicator support to multinode demo (#4221)

automerge
This commit is contained in:
Sagar Dhawan 2019-05-09 13:43:39 -07:00 committed by Grimes
parent df43e721e3
commit a031b09190
6 changed files with 158 additions and 47 deletions

View File

@ -7,6 +7,7 @@ use crate::cluster_info::FULLNODE_PORT_RANGE;
use crate::contact_info::ContactInfo;
use crate::service::Service;
use crate::streamer;
use rand::{thread_rng, Rng};
use solana_client::thin_client::{create_client, ThinClient};
use solana_sdk::pubkey::Pubkey;
use solana_sdk::signature::{Keypair, KeypairUtil};
@ -107,6 +108,7 @@ pub fn discover(
))
}
/// Creates a ThinClient per valid node
pub fn get_clients(nodes: &[ContactInfo]) -> Vec<ThinClient> {
nodes
.iter()
@ -115,6 +117,16 @@ pub fn get_clients(nodes: &[ContactInfo]) -> Vec<ThinClient> {
.collect()
}
/// Creates a ThinClient by selecting a valid node at random
pub fn get_client(nodes: &[ContactInfo]) -> ThinClient {
let nodes: Vec<_> = nodes
.iter()
.filter_map(ContactInfo::valid_client_facing_addr)
.collect();
let select = thread_rng().gen_range(0, nodes.len());
create_client(nodes[select], FULLNODE_PORT_RANGE)
}
fn spy(
spy_ref: Arc<RwLock<ClusterInfo>>,
num_nodes: Option<usize>,

View File

@ -206,8 +206,9 @@ impl Replicator {
&exit,
);
info!("Looking for leader at {:?}", cluster_entrypoint);
crate::gossip_service::discover_nodes(&cluster_entrypoint.gossip, 1)?;
info!("Connecting to the cluster via {:?}", cluster_entrypoint);
let nodes = crate::gossip_service::discover_nodes(&cluster_entrypoint.gossip, 1)?;
let client = crate::gossip_service::get_client(&nodes);
let (storage_blockhash, storage_slot) = Self::poll_for_blockhash_and_slot(&cluster_info)?;
@ -242,30 +243,33 @@ impl Replicator {
&Hash::default(),
);
let client = create_client(cluster_entrypoint.client_facing_addr(), FULLNODE_PORT_RANGE);
Self::setup_mining_account(&client, &keypair, &storage_keypair)?;
let mut thread_handles =
create_request_processor(node.sockets.storage.unwrap(), &exit, slot);
// receive blobs from retransmit and drop them.
let exit2 = exit.clone();
let t_retransmit = spawn(move || loop {
let _ = retransmit_receiver.recv_timeout(Duration::from_secs(1));
if exit2.load(Ordering::Relaxed) {
break;
}
});
let t_retransmit = {
let exit = exit.clone();
spawn(move || loop {
let _ = retransmit_receiver.recv_timeout(Duration::from_secs(1));
if exit.load(Ordering::Relaxed) {
break;
}
})
};
thread_handles.push(t_retransmit);
let exit3 = exit.clone();
let blocktree1 = blocktree.clone();
let t_replicate = spawn(move || loop {
Self::wait_for_ledger_download(slot, &blocktree1, &exit3, &node_info, &cluster_info);
if exit3.load(Ordering::Relaxed) {
break;
}
});
let t_replicate = {
let exit = exit.clone();
let blocktree = blocktree.clone();
spawn(move || loop {
Self::wait_for_ledger_download(slot, &blocktree, &exit, &node_info, &cluster_info);
if exit.load(Ordering::Relaxed) {
break;
}
})
};
//always push this last
thread_handles.push(t_replicate);
Ok(Self {
@ -291,6 +295,8 @@ impl Replicator {
}
pub fn run(&mut self) {
info!("waiting for ledger download");
self.thread_handles.pop().unwrap().join().unwrap();
self.encrypt_ledger()
.expect("ledger encrypt not successful");
loop {
@ -310,7 +316,7 @@ impl Replicator {
node_info: &ContactInfo,
cluster_info: &Arc<RwLock<ClusterInfo>>,
) {
info!("window created, waiting for ledger download done");
info!("window created, waiting for ledger download");
let mut _received_so_far = 0;
let mut current_slot = start_slot;

View File

@ -36,6 +36,9 @@ else
program=${BASH_REMATCH[1]}
features+="cuda,"
fi
if [[ $program = replicator ]]; then
features+="chacha,"
fi
if [[ -r "$SOLANA_ROOT/$program"/Cargo.toml ]]; then
maybe_package="--package solana-$program"
@ -60,6 +63,7 @@ solana_gossip=$(solana_program gossip)
solana_keygen=$(solana_program keygen)
solana_ledger_tool=$(solana_program ledger-tool)
solana_wallet=$(solana_program wallet)
solana_replicator=$(solana_program replicator)
export RUST_LOG=${RUST_LOG:-solana=info} # if RUST_LOG is unset, default to info
export RUST_BACKTRACE=1

View File

@ -15,9 +15,11 @@ fullnode_usage() {
echo
fi
cat <<EOF
Fullnode Usage:
usage: $0 [--blockstream PATH] [--init-complete-file FILE] [--label LABEL] [--stake LAMPORTS] [--no-voting] [--rpc-port port] [rsync network path to bootstrap leader configuration] [cluster entry point]
Start a full node
Start a full node or a replicator
--blockstream PATH - open blockstream at this unix domain socket location
--init-complete-file FILE - create this file, if it doesn't already exist, once node initialization is complete
@ -106,9 +108,22 @@ ledger_not_setup() {
exit 1
}
setup_replicator_account() {
declare entrypoint_ip=$1
declare node_keypair_path=$2
declare stake=$3
if [[ -f "$node_keypair_path".configured ]]; then
echo "Replicator account has already been configured"
else
$solana_wallet --keypair "$node_keypair_path" --url "http://$entrypoint_ip:8899" airdrop "$stake" || return $?
touch "$node_keypair_path".configured
fi
}
args=()
bootstrap_leader=false
stake=42 # number of lamports to assign as stake by default
node_type=validator
stake=42 # number of lamports to assign as stake
poll_for_new_genesis_block=0
label=
fullnode_keypair_path=
@ -120,7 +135,10 @@ while [[ -n $1 ]]; do
label="-$2"
shift 2
elif [[ $1 = --bootstrap-leader ]]; then
bootstrap_leader=true
node_type=bootstrap_leader
shift
elif [[ $1 = --replicator ]]; then
node_type=replicator
shift
elif [[ $1 = --poll-for-new-genesis-block ]]; then
poll_for_new_genesis_block=1
@ -169,7 +187,7 @@ while [[ -n $1 ]]; do
fi
done
if $bootstrap_leader; then
if [[ $node_type = bootstrap_leader ]]; then
if [[ ${#positional_args[@]} -ne 0 ]]; then
fullnode_usage "Unknown argument: ${positional_args[0]}"
fi
@ -187,6 +205,32 @@ if $bootstrap_leader; then
default_arg --rpc-port 8899
default_arg --rpc-drone-address 127.0.0.1:9900
default_arg --gossip-port 8001
elif [[ $node_type = replicator ]]; then
if [[ ${#positional_args[@]} -gt 2 ]]; then
fullnode_usage "Unknown arguments for replicator"
fi
read -r entrypoint entrypoint_address shift < <(find_entrypoint "${positional_args[@]}")
shift "$shift"
replicator_keypair_path=$SOLANA_CONFIG_DIR/replicator-id.json
replicator_storage_keypair_path="$SOLANA_CONFIG_DIR"/replicator-vote-id.json
ledger_config_dir=$SOLANA_CONFIG_DIR/replicator-ledger
mkdir -p "$SOLANA_CONFIG_DIR"
[[ -r "$replicator_keypair_path" ]] || $solana_keygen -o "$replicator_keypair_path"
[[ -r "$replicator_storage_keypair_path" ]] || $solana_keygen -o "$replicator_storage_keypair_path"
replicator_keypair=$($solana_keygen pubkey "$replicator_keypair_path")
replicator_storage_keypair=$($solana_keygen pubkey "$replicator_storage_keypair_path")
default_arg --entrypoint "$entrypoint_address"
default_arg --identity "$replicator_keypair_path"
default_arg --storage_id "$replicator_storage_keypair_path"
default_arg --ledger "$ledger_config_dir"
else
if [[ ${#positional_args[@]} -gt 2 ]]; then
fullnode_usage "$@"
@ -208,10 +252,24 @@ else
default_arg --rpc-drone-address "${entrypoint_address%:*}:9900"
fi
fullnode_keypair=$($solana_keygen pubkey "$fullnode_keypair_path")
fullnode_vote_keypair=$($solana_keygen pubkey "$fullnode_vote_keypair_path")
cat <<EOF
if [[ $node_type = replicator ]]; then
cat <<EOF
======================[ Replicator configuration ]======================
replicator pubkey: $replicator_keypair
storage pubkey: $replicator_storage_keypair
ledger: $ledger_config_dir
======================================================================
EOF
program=$solana_replicator
else
fullnode_keypair=$($solana_keygen pubkey "$fullnode_keypair_path")
fullnode_vote_keypair=$($solana_keygen pubkey "$fullnode_vote_keypair_path")
cat <<EOF
======================[ Fullnode configuration ]======================
node pubkey: $fullnode_keypair
vote pubkey: $fullnode_vote_keypair
@ -220,29 +278,33 @@ accounts: $accounts_config_dir
======================================================================
EOF
default_arg --identity "$fullnode_keypair_path"
default_arg --voting-keypair "$fullnode_vote_keypair_path"
default_arg --vote-account "$fullnode_vote_keypair"
default_arg --ledger "$ledger_config_dir"
default_arg --accounts "$accounts_config_dir"
if [[ -n $SOLANA_CUDA ]]; then
program=$solana_fullnode_cuda
else
program=$solana_fullnode
fi
fi
if [[ -z $CI ]]; then # Skip in CI
# shellcheck source=scripts/tune-system.sh
source "$here"/../scripts/tune-system.sh
fi
default_arg --identity "$fullnode_keypair_path"
default_arg --voting-keypair "$fullnode_vote_keypair_path"
default_arg --vote-account "$fullnode_vote_keypair"
default_arg --ledger "$ledger_config_dir"
default_arg --accounts "$accounts_config_dir"
if [[ -n $SOLANA_CUDA ]]; then
program=$solana_fullnode_cuda
else
program=$solana_fullnode
fi
set -e
secs_to_next_genesis_poll=0
PS4="$(basename "$0"): "
while true; do
if [[ ! -d "$SOLANA_RSYNC_CONFIG_DIR"/ledger ]]; then
if $bootstrap_leader; then
if [[ $node_type = bootstrap_leader ]]; then
ledger_not_setup "$SOLANA_RSYNC_CONFIG_DIR/ledger does not exist"
fi
rsync_entrypoint_url=$(rsync_url "$entrypoint")
@ -256,8 +318,10 @@ while true; do
trap '[[ -n $pid ]] && kill "$pid" >/dev/null 2>&1 && wait "$pid"' INT TERM ERR
if ! $bootstrap_leader && ((stake)); then
if [[ $node_type = validator ]] && ((stake)); then
setup_vote_account "${entrypoint_address%:*}" "$fullnode_keypair_path" "$fullnode_vote_keypair_path" "$stake"
elif [[ $node_type = replicator ]] && ((stake)); then
setup_replicator_account "${entrypoint_address%:*}" "$replicator_keypair_path" "$stake"
fi
echo "$PS4$program ${args[*]}"
@ -265,7 +329,7 @@ while true; do
pid=$!
oom_score_adj "$pid" 1000
if $bootstrap_leader; then
if [[ $node_type = bootstrap_leader ]]; then
wait "$pid"
sleep 1
else
@ -286,10 +350,10 @@ while true; do
done
echo "############## New genesis detected, restarting fullnode ##############"
echo "############## New genesis detected, restarting $node_type ##############"
kill "$pid" || true
wait "$pid" || true
rm -rf "$ledger_config_dir" "$accounts_config_dir" "$fullnode_vote_keypair_path".configured
rm -rf "$ledger_config_dir" "$accounts_config_dir" "$fullnode_vote_keypair_path".configured "$replicator_storage_keypair_path".configured
sleep 60 # give the network time to come back up
fi

View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
#
# Start a relpicator
#
here=$(dirname "$0")
exec "$here"/fullnode.sh --replicator "$@"

View File

@ -39,6 +39,15 @@ fn main() {
.required(true)
.help("use DIR as persistent ledger location"),
)
.arg(
Arg::with_name("storage_keypair")
.short("s")
.long("storage_id")
.value_name("DIR")
.takes_value(true)
.required(true)
.help("File containing the storage account keypair"),
)
.get_matches();
let ledger_path = matches.value_of("ledger").unwrap();
@ -51,6 +60,14 @@ fn main() {
} else {
Keypair::new()
};
let storage_keypair = if let Some(storage_keypair) = matches.value_of("storage_keypair") {
read_keypair(storage_keypair).unwrap_or_else(|err| {
eprintln!("{}: Unable to open keypair file: {}", err, storage_keypair);
exit(1);
})
} else {
Keypair::new()
};
let entrypoint_addr = matches
.value_of("entrypoint")
@ -74,13 +91,12 @@ fn main() {
);
let entrypoint_info = ContactInfo::new_gossip_entry_point(&entrypoint_addr);
let storage_keypair = Arc::new(Keypair::new());
let mut replicator = Replicator::new(
ledger_path,
node,
entrypoint_info,
Arc::new(keypair),
storage_keypair,
Arc::new(storage_keypair),
)
.unwrap();