Remote multinode scripts cleanup (#666)
- Also added support for stop nodes
This commit is contained in:
parent
3d45b04da8
commit
f0c39cc84d
|
@ -2,8 +2,6 @@
|
||||||
|
|
||||||
[[ -n $FORCE ]] || exit
|
[[ -n $FORCE ]] || exit
|
||||||
|
|
||||||
mkdir -p ~/.ssh ~/solana ~/.cargo/bin
|
|
||||||
sudo apt-get --assume-yes install rsync libssl-dev
|
|
||||||
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
|
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
|
||||||
|
|
||||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||||
|
|
|
@ -2,8 +2,6 @@
|
||||||
|
|
||||||
[[ -n $FORCE ]] || exit
|
[[ -n $FORCE ]] || exit
|
||||||
|
|
||||||
mkdir -p ~/.ssh ~/solana ~/.cargo/bin
|
|
||||||
sudo apt-get --assume-yes install rsync libssl-dev
|
|
||||||
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
|
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
|
||||||
|
|
||||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||||
|
@ -12,7 +10,6 @@ ssh-keygen -R "$1"
|
||||||
ssh-keyscan "$1" >>~/.ssh/known_hosts 2>/dev/null
|
ssh-keyscan "$1" >>~/.ssh/known_hosts 2>/dev/null
|
||||||
|
|
||||||
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
|
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
|
||||||
rsync -vPrz "$1":~/solana/fetch-perf-libs.sh ~/solana/
|
|
||||||
|
|
||||||
# Run setup
|
# Run setup
|
||||||
USE_INSTALL=1 ./multinode-demo/setup.sh -p
|
USE_INSTALL=1 ./multinode-demo/setup.sh -p
|
||||||
|
|
|
@ -1,62 +1,89 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
ip_addr_file=$1
|
command=$1
|
||||||
remote_user=$2
|
ip_addr_file=
|
||||||
ssh_keys=$3
|
remote_user=
|
||||||
|
ssh_keys=
|
||||||
|
|
||||||
|
shift
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
echo -e "\\tUsage: $0 <IP Address array> <username> [path to ssh keys]\\n"
|
exitcode=0
|
||||||
echo -e "\\t <IP Address array>: A bash script that exports an array of IP addresses, ip_addr_array. Elements of the array are public IP address of remote nodes."
|
if [[ -n "$1" ]]; then
|
||||||
echo -e "\\t <username>: The username for logging into remote nodes."
|
exitcode=1
|
||||||
echo -e "\\t [path to ssh keys]: The public/private key pair that remote nodes can use to perform rsync and ssh among themselves. Must contain pub, priv and authorized_keys.\\n"
|
echo "Error: $*"
|
||||||
exit 1
|
fi
|
||||||
|
cat <<EOF
|
||||||
|
usage: $0 <start|stop> <-f IP Addr Array file> <-u username> [-k ssh-keys]
|
||||||
|
|
||||||
|
Manage a GCE multinode network
|
||||||
|
|
||||||
|
start|stop - Create or delete the network
|
||||||
|
-f file - A bash script that exports an array of IP addresses, ip_addr_array.
|
||||||
|
Elements of the array are public IP address of remote nodes.
|
||||||
|
-u username - The username for logging into remote nodes.
|
||||||
|
-k ssh-keys - Path to public/private key pair that remote nodes can use to perform
|
||||||
|
rsync and ssh among themselves. Must contain pub, and priv keys.
|
||||||
|
|
||||||
|
EOF
|
||||||
|
exit $exitcode
|
||||||
}
|
}
|
||||||
|
|
||||||
|
while getopts "h?f:u:k:" opt; do
|
||||||
|
case $opt in
|
||||||
|
h | \?)
|
||||||
|
usage
|
||||||
|
;;
|
||||||
|
f)
|
||||||
|
ip_addr_file=$OPTARG
|
||||||
|
;;
|
||||||
|
u)
|
||||||
|
remote_user=$OPTARG
|
||||||
|
;;
|
||||||
|
k)
|
||||||
|
ssh_keys=$OPTARG
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage "Error: unhandled option: $opt"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
# Sample IP Address array file contents
|
# Sample IP Address array file contents
|
||||||
# ip_addr_array=(192.168.1.1 192.168.1.5 192.168.2.2)
|
# ip_addr_array=(192.168.1.1 192.168.1.5 192.168.2.2)
|
||||||
|
|
||||||
if [[ -z "$ip_addr_file" ]]; then
|
[[ -n $command ]] || usage "Need a command (start|stop)"
|
||||||
usage
|
[[ -n $ip_addr_file ]] || usage "Need a file with IP address array"
|
||||||
fi
|
[[ -n $remote_user ]] || usage "Need the username for remote nodes"
|
||||||
|
|
||||||
if [[ -z "$remote_user" ]]; then
|
|
||||||
usage
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Build started at $(date)"
|
|
||||||
SECONDS=0
|
|
||||||
# Build and install locally
|
|
||||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
|
||||||
cargo install --force
|
|
||||||
|
|
||||||
build_time=$SECONDS
|
|
||||||
echo "Build took $SECONDS seconds"
|
|
||||||
|
|
||||||
ip_addr_array=()
|
ip_addr_array=()
|
||||||
# Get IP address array
|
# Get IP address array
|
||||||
# shellcheck source=/dev/null
|
# shellcheck source=/dev/null
|
||||||
source "$ip_addr_file"
|
source "$ip_addr_file"
|
||||||
|
|
||||||
echo "Deployment started at $(date)"
|
build_project() {
|
||||||
SECONDS=0
|
echo "Build started at $(date)"
|
||||||
count=0
|
SECONDS=0
|
||||||
leader_ip=
|
|
||||||
leader_time=
|
|
||||||
|
|
||||||
mkdir -p log
|
# Build and install locally
|
||||||
|
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||||
|
cargo install --force
|
||||||
|
|
||||||
common_setup() {
|
echo "Build took $SECONDS seconds"
|
||||||
|
}
|
||||||
|
|
||||||
|
common_start_setup() {
|
||||||
ip_addr=$1
|
ip_addr=$1
|
||||||
|
|
||||||
# Killing sshguard for now. TODO: Find a better solution
|
# Killing sshguard for now. TODO: Find a better solution
|
||||||
# sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts
|
# sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts
|
||||||
ssh -n -f "$remote_user@$ip_addr" "
|
ssh -n -f "$remote_user@$ip_addr" " \
|
||||||
set -ex; \
|
set -ex; \
|
||||||
sudo service sshguard stop; \
|
sudo service sshguard stop; \
|
||||||
sudo apt-get --assume-yes install rsync libssl-dev; \
|
sudo apt-get --assume-yes install rsync libssl-dev; \
|
||||||
pkill -9 solana-; \
|
mkdir -p ~/.ssh ~/solana ~/.cargo/bin; \
|
||||||
pkill -9 validator; \
|
|
||||||
pkill -9 leader; \
|
|
||||||
" >log/"$ip_addr".log
|
" >log/"$ip_addr".log
|
||||||
|
|
||||||
# If provided, deploy SSH keys
|
# If provided, deploy SSH keys
|
||||||
|
@ -69,8 +96,8 @@ common_setup() {
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
leader() {
|
start_leader() {
|
||||||
common_setup "$1"
|
common_start_setup "$1"
|
||||||
|
|
||||||
{
|
{
|
||||||
rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/
|
rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/
|
||||||
|
@ -84,37 +111,76 @@ leader() {
|
||||||
SECONDS=0
|
SECONDS=0
|
||||||
}
|
}
|
||||||
|
|
||||||
validator() {
|
start_validator() {
|
||||||
common_setup "$1"
|
common_start_setup "$1"
|
||||||
|
|
||||||
ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/solana/multinode-demo ~/solana/" >>log/"$1".log
|
ssh "$remote_user@$ip_addr" "rsync -vPrz ""$remote_user@$leader_ip"":~/solana/multinode-demo ~/solana/" >>log/"$1".log
|
||||||
ssh -n -f "$remote_user@$ip_addr" "cd solana; FORCE=1 ./multinode-demo/remote_validator.sh $leader_ip" >>log/"$1".log
|
ssh -n -f "$remote_user@$ip_addr" "cd solana; FORCE=1 ./multinode-demo/remote_validator.sh $leader_ip" >>log/"$1".log
|
||||||
}
|
}
|
||||||
|
|
||||||
for ip_addr in "${ip_addr_array[@]}"; do
|
start_all_nodes() {
|
||||||
|
echo "Deployment started at $(date)"
|
||||||
|
SECONDS=0
|
||||||
|
count=0
|
||||||
|
leader_ip=
|
||||||
|
leader_time=
|
||||||
|
|
||||||
|
mkdir -p log
|
||||||
|
|
||||||
|
for ip_addr in "${ip_addr_array[@]}"; do
|
||||||
ssh-keygen -R "$ip_addr" >log/local.log
|
ssh-keygen -R "$ip_addr" >log/local.log
|
||||||
ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts 2>/dev/null
|
ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts 2>/dev/null
|
||||||
|
|
||||||
if ((!count)); then
|
if ((!count)); then
|
||||||
# Start the leader on the first node
|
# Start the leader on the first node
|
||||||
echo "Leader node $ip_addr, killing previous instance and restarting"
|
echo "Leader node $ip_addr, killing previous instance and restarting"
|
||||||
leader "$ip_addr"
|
start_leader "$ip_addr"
|
||||||
else
|
else
|
||||||
# Start validator on all other nodes
|
# Start validator on all other nodes
|
||||||
echo "Validator[$count] node $ip_addr, killing previous instance and restarting"
|
echo "Validator[$count] node $ip_addr, killing previous instance and restarting"
|
||||||
validator "$ip_addr" &
|
start_validator "$ip_addr" &
|
||||||
# TBD: Remove the sleep or reduce time once GCP login quota is increased
|
# TBD: Remove the sleep or reduce time once GCP login quota is increased
|
||||||
sleep 2
|
sleep 2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
((count++))
|
((count = count + 1))
|
||||||
done
|
done
|
||||||
|
|
||||||
wait
|
wait
|
||||||
|
|
||||||
((validator_count = count - 1))
|
((validator_count = count - 1))
|
||||||
|
|
||||||
echo "Deployment finished at $(date)"
|
echo "Deployment finished at $(date)"
|
||||||
echo "Build took $build_time seconds"
|
echo "Leader deployment too $leader_time seconds"
|
||||||
echo "Leader deployment too $leader_time seconds"
|
echo "$validator_count Validator deployment took $SECONDS seconds"
|
||||||
echo "$validator_count Validator deployment took $SECONDS seconds"
|
}
|
||||||
|
|
||||||
|
stop_all_nodes() {
|
||||||
|
SECONDS=0
|
||||||
|
local count=0
|
||||||
|
for ip_addr in "${ip_addr_array[@]}"; do
|
||||||
|
echo "Stopping node[$count] $ip_addr. Remote user $remote_user"
|
||||||
|
|
||||||
|
ssh -n -f "$remote_user@$ip_addr" " \
|
||||||
|
set -ex; \
|
||||||
|
sudo service sshguard stop; \
|
||||||
|
pkill -9 solana-; \
|
||||||
|
pkill -9 validator; \
|
||||||
|
pkill -9 leader; \
|
||||||
|
"
|
||||||
|
sleep 2
|
||||||
|
((count = count + 1))
|
||||||
|
echo "Stopped node[$count] $ip_addr"
|
||||||
|
done
|
||||||
|
echo "Stopping $count nodes took $SECONDS seconds"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [[ $command == "start" ]]; then
|
||||||
|
#build_project
|
||||||
|
stop_all_nodes
|
||||||
|
start_all_nodes
|
||||||
|
elif [[ $command == "stop" ]]; then
|
||||||
|
stop_all_nodes
|
||||||
|
else
|
||||||
|
usage "Unknown command: $command"
|
||||||
|
fi
|
||||||
|
|
Loading…
Reference in New Issue