Split start_nodes script ssh commands to individual scripts (#642)

This commit is contained in:
pgarg66 2018-07-16 16:21:32 -07:00 committed by GitHub
parent 015b7a1ddb
commit af40ab0c04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 73 additions and 45 deletions

16
multinode-demo/remote_leader.sh Executable file
View File

@ -0,0 +1,16 @@
#!/bin/bash -e
[[ -n $FORCE ]] || exit
mkdir -p ~/.ssh ~/solana ~/.cargo/bin
sudo apt-get --assume-yes install rsync libssl-dev
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
PATH="$HOME"/.cargo/bin:"$PATH"
./fetch-perf-libs.sh
# Run setup
USE_INSTALL=1 ./multinode-demo/setup.sh -p
USE_INSTALL=1 SOLANA_CUDA=1 ./multinode-demo/leader.sh >leader.log 2>&1 &
USE_INSTALL=1 ./multinode-demo/drone.sh >drone.log 2>&1 &

View File

@ -0,0 +1,19 @@
#!/bin/bash -e
[[ -n $FORCE ]] || exit
mkdir -p ~/.ssh ~/solana ~/.cargo/bin
sudo apt-get --assume-yes install rsync libssl-dev
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
PATH="$HOME"/.cargo/bin:"$PATH"
ssh-keygen -R "$1"
ssh-keyscan "$1" >>~/.ssh/known_hosts 2>/dev/null
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
rsync -vPrz "$1":~/solana/fetch-perf-libs.sh ~/solana/
# Run setup
USE_INSTALL=1 ./multinode-demo/setup.sh -p
USE_INSTALL=1 ./multinode-demo/validator.sh "$1":~/solana "$1" >validator.log 2>&1

View File

@ -29,6 +29,7 @@ SECONDS=0
PATH="$HOME"/.cargo/bin:"$PATH" PATH="$HOME"/.cargo/bin:"$PATH"
cargo install --force cargo install --force
build_time=$SECONDS
echo "Build took $SECONDS seconds" echo "Build took $SECONDS seconds"
ip_addr_array=() ip_addr_array=()
@ -36,83 +37,71 @@ ip_addr_array=()
# shellcheck source=/dev/null # shellcheck source=/dev/null
source "$ip_addr_file" source "$ip_addr_file"
# shellcheck disable=SC2089,SC2016
ssh_command_prefix='export PATH="$HOME/.cargo/bin:$PATH"; cd solana; USE_INSTALL=1'
echo "Deployment started at $(date)" echo "Deployment started at $(date)"
SECONDS=0 SECONDS=0
count=0 count=0
leader_ip= leader_ip=
leader_time=
mkdir -p log
common_setup() { common_setup() {
ip_addr=$1 ip_addr=$1
ssh -n -f "$remote_user@$ip_addr" 'mkdir -p ~/.ssh ~/solana ~/.cargo/bin'
# Killing sshguard for now. TODO: Find a better solution # Killing sshguard for now. TODO: Find a better solution
# sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts # sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts
ssh -n -f "$remote_user@$ip_addr" "sudo service sshguard stop" ssh -n -f "$remote_user@$ip_addr" "
ssh -n -f "$remote_user@$ip_addr" 'sudo apt-get --assume-yes install rsync libssl-dev' set -ex; \
sudo service sshguard stop; \
sudo apt-get --assume-yes install rsync libssl-dev; \
pkill -9 solana-; \
pkill -9 validator; \
pkill -9 leader; \
" >log/"$ip_addr".log
# If provided, deploy SSH keys # If provided, deploy SSH keys
if [[ -z $ssh_keys ]]; then if [[ -n $ssh_keys ]]; then
echo "skip copying the ssh keys" {
else rsync -vPrz "$ssh_keys"/id_rsa "$remote_user@$ip_addr":~/.ssh/
rsync -vPrz "$ssh_keys"/id_rsa "$remote_user@$ip_addr":~/.ssh/ rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/
rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/ rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/authorized_keys
rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/authorized_keys } >>log/"$ip_addr".log
ssh -n -f "$remote_user@$ip_addr" 'chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa'
fi fi
# Stop current nodes
ssh "$remote_user@$ip_addr" 'pkill -9 solana-'
} }
leader() { leader() {
common_setup "$1" common_setup "$1"
rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/ # Deploy build and scripts to remote node {
rsync -vPrz ./multinode-demo "$remote_user@$ip_addr":~/solana/ rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/
rsync -vPrz ./fetch-perf-libs.sh "$remote_user@$ip_addr":~/solana/ rsync -vPrz ./multinode-demo "$remote_user@$ip_addr":~/solana/
rsync -vPrz ./fetch-perf-libs.sh "$remote_user@$ip_addr":~/solana/
ssh -n -f "$remote_user@$ip_addr" 'cd solana; FORCE=1 ./multinode-demo/remote_leader.sh'
} >>log/"$1".log
# Run setup leader_ip=$1
ssh "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/setup.sh -p "$ip_addr"' leader_time=$SECONDS
SECONDS=0
echo "Starting leader node $ip_addr"
ssh -n -f "$remote_user@$ip_addr" 'cd solana; ./fetch-perf-libs.sh'
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"' SOLANA_CUDA=1 ./multinode-demo/leader.sh > leader.log 2>&1'
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/drone.sh > drone.log 2>&1'
leader_ip=${ip_addr_array[0]}
} }
validator() { validator() {
common_setup "$1" common_setup "$1"
echo "Adding known hosts for $ip_addr" ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/solana/multinode-demo ~/solana/" >>log/"$1".log
ssh "$remote_user@$ip_addr" "ssh-keygen -R ""$leader_ip" ssh -n -f "$remote_user@$ip_addr" "cd solana; FORCE=1 ./multinode-demo/remote_validator.sh $leader_ip" >>log/"$1".log
ssh "$remote_user@$ip_addr" "ssh-keyscan ""$leader_ip >> ~/.ssh/known_hosts"
ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/.cargo/bin/solana* ~/.cargo/bin/"
ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/solana/multinode-demo ~/solana/"
ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/solana/fetch-perf-libs.sh ~/solana/"
# Run setup
ssh "$remote_user@$ip_addr" "$ssh_command_prefix"' ./multinode-demo/setup.sh -p "$ip_addr"'
echo "Starting validator node $ip_addr"
ssh -n -f "$remote_user@$ip_addr" "$ssh_command_prefix"" ./multinode-demo/validator.sh $remote_user@$leader_ip:~/solana $leader_ip > validator.log 2>&1"
} }
for ip_addr in "${ip_addr_array[@]}"; do for ip_addr in "${ip_addr_array[@]}"; do
echo "$ip_addr" ssh-keygen -R "$ip_addr" >log/local.log
ssh-keygen -R "$ip_addr" ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts 2>/dev/null
ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts
if ((!count)); then if ((!count)); then
# Start the leader on the first node # Start the leader on the first node
echo "Leader node $ip_addr, killing previous instance and restarting"
leader "$ip_addr" leader "$ip_addr"
else else
# Start validator on all other nodes # Start validator on all other nodes
echo "Validator[$count] node $ip_addr, killing previous instance and restarting"
validator "$ip_addr" & validator "$ip_addr" &
# TBD: Remove the sleep or reduce time once GCP login quota is increased # TBD: Remove the sleep or reduce time once GCP login quota is increased
sleep 2 sleep 2
@ -123,5 +112,9 @@ done
wait wait
((validator_count = count - 1))
echo "Deployment finished at $(date)" echo "Deployment finished at $(date)"
echo "Deployment took $SECONDS seconds" echo "Build took $build_time seconds"
echo "Leader deployment too $leader_time seconds"
echo "$validator_count Validator deployment took $SECONDS seconds"