Start validator nodes in parallel

- This speeds up overall network startup time
This commit is contained in:
Pankaj Garg 2018-07-04 04:25:45 -07:00 committed by Greg Fitzgerald
parent e76bf1438b
commit 8c08e614b7
1 changed files with 45 additions and 25 deletions

View File

@ -42,12 +42,10 @@ ssh_command_prefix='export PATH="$HOME/.cargo/bin:$PATH"; cd solana; USE_INSTALL
echo "Deployment started at $(date)" echo "Deployment started at $(date)"
SECONDS=0 SECONDS=0
count=0 count=0
leader= leader_ip=
for ip_addr in "${ip_addr_array[@]}"; do
echo "$ip_addr"
ssh-keygen -R "$ip_addr" common_setup() {
ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts ip_addr=$1
ssh -n -f "$remote_user@$ip_addr" 'mkdir -p ~/.ssh ~/solana ~/.cargo/bin' ssh -n -f "$remote_user@$ip_addr" 'mkdir -p ~/.ssh ~/solana ~/.cargo/bin'
@ -68,40 +66,62 @@ for ip_addr in "${ip_addr_array[@]}"; do
# Stop current nodes # Stop current nodes
ssh "$remote_user@$ip_addr" 'pkill -9 solana-' ssh "$remote_user@$ip_addr" 'pkill -9 solana-'
}
if [[ -n $leader ]]; then leader() {
echo "Adding known hosts for $ip_addr" common_setup "$1"
ssh -n -f "$remote_user@$ip_addr" "ssh-keygen -R $leader"
ssh -n -f "$remote_user@$ip_addr" "ssh-keyscan $leader >> ~/.ssh/known_hosts"
ssh -n -f "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader"":~/.cargo/bin/solana* ~/.cargo/bin/" rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/ # Deploy build and scripts to remote node
ssh -n -f "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader"":~/solana/multinode-demo ~/solana/" rsync -vPrz ./multinode-demo "$remote_user@$ip_addr":~/solana/
ssh -n -f "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader"":~/solana/fetch-perf-libs.sh ~/solana/" rsync -vPrz ./fetch-perf-libs.sh "$remote_user@$ip_addr":~/solana/
else
# Deploy build and scripts to remote node
rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/
rsync -vPrz ./multinode-demo "$remote_user@$ip_addr":~/solana/
rsync -vPrz ./fetch-perf-libs.sh "$remote_user@$ip_addr":~/solana/
fi
# Run setup # Run setup
ssh "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/setup.sh -p "$ip_addr"' ssh "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/setup.sh -p "$ip_addr"'
echo "Starting leader node $ip_addr"
ssh -n -f "$remote_user@$ip_addr" 'cd solana; ./fetch-perf-libs.sh'
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"' SOLANA_CUDA=1 ./multinode-demo/leader.sh > leader.log 2>&1'
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/drone.sh > drone.log 2>&1'
leader_ip=${ip_addr_array[0]}
}
validator() {
common_setup "$1"
echo "Adding known hosts for $ip_addr"
ssh "$remote_user@$ip_addr" "ssh-keygen -R ""$leader_ip"
ssh "$remote_user@$ip_addr" "ssh-keyscan ""$leader_ip >> ~/.ssh/known_hosts"
ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/.cargo/bin/solana* ~/.cargo/bin/"
ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/solana/multinode-demo ~/solana/"
ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/solana/fetch-perf-libs.sh ~/solana/"
# Run setup
ssh "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/setup.sh -p "$ip_addr"'
echo "Starting validator node $ip_addr"
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"" ./multinode-demo/validator.sh $remote_user@$leader_ip:~/solana $leader_ip > validator.log 2>&1"
}
for ip_addr in "${ip_addr_array[@]}"; do
echo "$ip_addr"
ssh-keygen -R "$ip_addr"
ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts
if ((!count)); then if ((!count)); then
# Start the leader on the first node # Start the leader on the first node
echo "Starting leader node $ip_addr" leader "$ip_addr"
ssh -n -f "$remote_user@$ip_addr" 'cd solana; ./fetch-perf-libs.sh'
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"' SOLANA_CUDA=1 ./multinode-demo/leader.sh > leader.log 2>&1'
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/drone.sh > drone.log 2>&1'
leader=${ip_addr_array[0]}
else else
# Start validator on all other nodes # Start validator on all other nodes
echo "Starting validator node $ip_addr" validator "$ip_addr" &
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"" ./multinode-demo/validator.sh $remote_user@$leader:~/solana $leader > validator.log 2>&1" # TBD: Remove the sleep or reduce time once GCP login quota is increased
sleep 2
fi fi
((count++)) ((count++))
done done
wait
echo "Deployment finished at $(date)" echo "Deployment finished at $(date)"
echo "Deployment took $SECONDS seconds" echo "Deployment took $SECONDS seconds"