Morph gce_multinode-based scripts into net/
This commit is contained in:
parent
ffb72136c8
commit
399caf343c
|
@ -1,109 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
here=$(dirname "$0")
|
||||
# shellcheck source=scripts/gcloud.sh
|
||||
source "$here"/../scripts/gcloud.sh
|
||||
|
||||
command=$1
|
||||
prefix=
|
||||
num_nodes=
|
||||
out_file=
|
||||
image_name="ubuntu-16-04-cuda-9-2-new"
|
||||
internalNetwork=false
|
||||
zone="us-west1-b"
|
||||
|
||||
shift
|
||||
|
||||
usage() {
|
||||
exitcode=0
|
||||
if [[ -n "$1" ]]; then
|
||||
exitcode=1
|
||||
echo "Error: $*"
|
||||
fi
|
||||
cat <<EOF
|
||||
usage: $0 <create|delete> <-p prefix> <-n num_nodes> <-o file> [-i image-name]
|
||||
|
||||
Manage a GCE multinode network
|
||||
|
||||
create|delete - Create or delete the network
|
||||
-p prefix - A common prefix for node names, to avoid collision
|
||||
-n num_nodes - Number of nodes
|
||||
-P - Use IP addresses on GCE internal/private network
|
||||
-z - GCP Zone for the nodes (default $zone)
|
||||
-o out_file - Used for create option. Outputs an array of IP addresses
|
||||
of new nodes to the file
|
||||
-i image_name - Existing image on GCE (default $image_name)
|
||||
|
||||
EOF
|
||||
exit $exitcode
|
||||
}
|
||||
|
||||
while getopts "h?p:Pi:n:z:o:" opt; do
|
||||
case $opt in
|
||||
h | \?)
|
||||
usage
|
||||
;;
|
||||
p)
|
||||
prefix=$OPTARG
|
||||
;;
|
||||
P)
|
||||
internalNetwork=true
|
||||
;;
|
||||
i)
|
||||
image_name=$OPTARG
|
||||
;;
|
||||
o)
|
||||
out_file=$OPTARG
|
||||
;;
|
||||
n)
|
||||
num_nodes=$OPTARG
|
||||
;;
|
||||
z)
|
||||
zone=$OPTARG
|
||||
;;
|
||||
*)
|
||||
usage "Error: unhandled option: $opt"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
set -e
|
||||
|
||||
[[ -n $command ]] || usage "Need a command (create|delete)"
|
||||
|
||||
[[ -n $prefix ]] || usage "Need a prefix for GCE instance names"
|
||||
|
||||
|
||||
if [[ $command == "create" ]]; then
|
||||
[[ -n $num_nodes ]] || usage "Need number of nodes"
|
||||
[[ -n $out_file ]] || usage "Need an outfile to store IP Addresses"
|
||||
|
||||
gcloud_CreateInstances "$prefix" "$num_nodes" "$zone" "$image_name"
|
||||
gcloud_FindInstances "name~^$prefix"
|
||||
|
||||
echo "ip_addr_array=()" > "$out_file"
|
||||
recordPublicIp() {
|
||||
declare name="$1"
|
||||
declare publicIp="$3"
|
||||
declare privateIp="$4"
|
||||
|
||||
if $internalNetwork; then
|
||||
echo "ip_addr_array+=($privateIp) # $name" >> "$out_file"
|
||||
else
|
||||
echo "ip_addr_array+=($publicIp) # $name" >> "$out_file"
|
||||
fi
|
||||
}
|
||||
gcloud_ForEachInstance recordPublicIp
|
||||
|
||||
echo "Instance ip addresses recorded in $out_file"
|
||||
elif [[ $command == "delete" ]]; then
|
||||
gcloud_FindInstances "name~^$prefix"
|
||||
|
||||
if [[ ${#instances[@]} -eq 0 ]]; then
|
||||
echo "No instances found matching '^$prefix'"
|
||||
exit 0
|
||||
fi
|
||||
gcloud_DeleteInstances
|
||||
else
|
||||
usage "Unknown command: $command"
|
||||
fi
|
|
@ -1,14 +0,0 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
[[ -n $FORCE ]] || exit
|
||||
|
||||
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
|
||||
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
|
||||
./fetch-perf-libs.sh
|
||||
|
||||
# Run setup
|
||||
USE_INSTALL=1 ./multinode-demo/setup.sh
|
||||
USE_INSTALL=1 ./multinode-demo/drone.sh >drone.log 2>&1 &
|
||||
USE_INSTALL=1 SOLANA_CUDA=1 ./multinode-demo/leader.sh >leader.log 2>&1 &
|
|
@ -1,185 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
command=$1
|
||||
ip_addr_file=
|
||||
remote_user=
|
||||
ssh_keys=
|
||||
|
||||
shift
|
||||
|
||||
usage() {
|
||||
exitcode=0
|
||||
if [[ -n "$1" ]]; then
|
||||
exitcode=1
|
||||
echo "Error: $*"
|
||||
fi
|
||||
cat <<EOF
|
||||
usage: $0 <start|stop> <-f IP Addr Array file> <-u username> [-k ssh-keys]
|
||||
|
||||
Manage a GCE multinode network
|
||||
|
||||
start|stop - Create or delete the network
|
||||
-f file - A bash script that exports an array of IP addresses, ip_addr_array.
|
||||
Elements of the array are public IP address of remote nodes.
|
||||
-u username - The username for logging into remote nodes.
|
||||
-k ssh-keys - Path to public/private key pair that remote nodes can use to perform
|
||||
rsync and ssh among themselves. Must contain pub, and priv keys.
|
||||
|
||||
EOF
|
||||
exit $exitcode
|
||||
}
|
||||
|
||||
while getopts "h?f:u:k:" opt; do
|
||||
case $opt in
|
||||
h | \?)
|
||||
usage
|
||||
;;
|
||||
f)
|
||||
ip_addr_file=$OPTARG
|
||||
;;
|
||||
u)
|
||||
remote_user=$OPTARG
|
||||
;;
|
||||
k)
|
||||
ssh_keys=$OPTARG
|
||||
;;
|
||||
*)
|
||||
usage "Error: unhandled option: $opt"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
set -e
|
||||
|
||||
# Sample IP Address array file contents
|
||||
# ip_addr_array=(192.168.1.1 192.168.1.5 192.168.2.2)
|
||||
|
||||
[[ -n $command ]] || usage "Need a command (start|stop)"
|
||||
[[ -n $ip_addr_file ]] || usage "Need a file with IP address array"
|
||||
[[ -n $remote_user ]] || usage "Need the username for remote nodes"
|
||||
|
||||
ip_addr_array=()
|
||||
# Get IP address array
|
||||
# shellcheck source=/dev/null
|
||||
source "$ip_addr_file"
|
||||
|
||||
build_project() {
|
||||
echo "Build started at $(date)"
|
||||
SECONDS=0
|
||||
|
||||
# Build and install locally
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
cargo install --force
|
||||
|
||||
echo "Build took $SECONDS seconds"
|
||||
}
|
||||
|
||||
common_start_setup() {
|
||||
ip_addr=$1
|
||||
|
||||
# Killing sshguard for now. TODO: Find a better solution
|
||||
# sshguard is blacklisting IP address after ssh-keyscan and ssh login attempts
|
||||
ssh "$remote_user@$ip_addr" " \
|
||||
set -ex; \
|
||||
sudo service sshguard stop; \
|
||||
sudo apt-get --assume-yes install rsync libssl-dev; \
|
||||
mkdir -p ~/.ssh ~/solana ~/.cargo/bin; \
|
||||
" >log/"$ip_addr".log
|
||||
|
||||
# If provided, deploy SSH keys
|
||||
if [[ -n $ssh_keys ]]; then
|
||||
{
|
||||
rsync -vPrz "$ssh_keys"/id_rsa "$remote_user@$ip_addr":~/.ssh/
|
||||
rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/
|
||||
rsync -vPrz "$ssh_keys"/id_rsa.pub "$remote_user@$ip_addr":~/.ssh/authorized_keys
|
||||
rsync -vPrz ./multinode-demo "$remote_user@$ip_addr":~/solana/
|
||||
} >>log/"$ip_addr".log
|
||||
fi
|
||||
}
|
||||
|
||||
start_leader() {
|
||||
common_start_setup "$1"
|
||||
|
||||
{
|
||||
rsync -vPrz ~/.cargo/bin/solana* "$remote_user@$ip_addr":~/.cargo/bin/
|
||||
rsync -vPrz ./fetch-perf-libs.sh "$remote_user@$ip_addr":~/solana/
|
||||
ssh -n -f "$remote_user@$ip_addr" 'cd solana; FORCE=1 ./multinode-demo/remote_leader.sh'
|
||||
} >>log/"$1".log
|
||||
|
||||
leader_ip=$1
|
||||
leader_time=$SECONDS
|
||||
SECONDS=0
|
||||
}
|
||||
|
||||
start_validator() {
|
||||
common_start_setup "$1"
|
||||
|
||||
ssh -n -f "$remote_user@$ip_addr" "cd solana; FORCE=1 ./multinode-demo/remote_validator.sh $leader_ip" >>log/"$1".log
|
||||
}
|
||||
|
||||
start_all_nodes() {
|
||||
echo "Deployment started at $(date)"
|
||||
SECONDS=0
|
||||
count=0
|
||||
leader_ip=
|
||||
leader_time=
|
||||
|
||||
mkdir -p log
|
||||
|
||||
for ip_addr in "${ip_addr_array[@]}"; do
|
||||
if ((!count)); then
|
||||
# Start the leader on the first node
|
||||
echo "Leader node $ip_addr, killing previous instance and restarting"
|
||||
start_leader "$ip_addr"
|
||||
else
|
||||
# Start validator on all other nodes
|
||||
echo "Validator[$count] node $ip_addr, killing previous instance and restarting"
|
||||
start_validator "$ip_addr" &
|
||||
# TBD: Remove the sleep or reduce time once GCP login quota is increased
|
||||
sleep 2
|
||||
fi
|
||||
|
||||
((count = count + 1))
|
||||
done
|
||||
|
||||
wait
|
||||
|
||||
((validator_count = count - 1))
|
||||
|
||||
echo "Deployment finished at $(date)"
|
||||
echo "Leader deployment too $leader_time seconds"
|
||||
echo "$validator_count Validator deployment took $SECONDS seconds"
|
||||
}
|
||||
|
||||
stop_all_nodes() {
|
||||
SECONDS=0
|
||||
local count=0
|
||||
for ip_addr in "${ip_addr_array[@]}"; do
|
||||
ssh-keygen -R "$ip_addr" >log/local.log
|
||||
ssh-keyscan "$ip_addr" >>~/.ssh/known_hosts 2>/dev/null
|
||||
|
||||
echo "Stopping node[$count] $ip_addr. Remote user $remote_user"
|
||||
|
||||
ssh -n -f "$remote_user@$ip_addr" " \
|
||||
set -ex; \
|
||||
sudo service sshguard stop; \
|
||||
pkill -9 solana-; \
|
||||
pkill -9 validator; \
|
||||
pkill -9 leader; \
|
||||
"
|
||||
sleep 2
|
||||
((count = count + 1))
|
||||
echo "Stopped node[$count] $ip_addr"
|
||||
done
|
||||
echo "Stopping $count nodes took $SECONDS seconds"
|
||||
}
|
||||
|
||||
if [[ $command == "start" ]]; then
|
||||
build_project
|
||||
stop_all_nodes
|
||||
start_all_nodes
|
||||
elif [[ $command == "stop" ]]; then
|
||||
stop_all_nodes
|
||||
else
|
||||
usage "Unknown command: $command"
|
||||
fi
|
|
@ -1,17 +0,0 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
[[ -n $FORCE ]] || exit
|
||||
|
||||
chmod 600 ~/.ssh/authorized_keys ~/.ssh/id_rsa
|
||||
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
|
||||
touch ~/.ssh/known_hosts
|
||||
ssh-keygen -R "$1" 2>/dev/null
|
||||
ssh-keyscan "$1" >>~/.ssh/known_hosts 2>/dev/null
|
||||
|
||||
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
|
||||
|
||||
# Run setup
|
||||
USE_INSTALL=1 ./multinode-demo/setup.sh
|
||||
USE_INSTALL=1 ./multinode-demo/validator.sh "$1":~/solana "$1" >validator.log 2>&1
|
|
@ -0,0 +1,2 @@
|
|||
/config/
|
||||
/log/
|
|
@ -0,0 +1,29 @@
|
|||
|
||||
# Network Management
|
||||
This directory contains scripts useful for working with a test network. It's
|
||||
intended to be both dev and CD friendly.
|
||||
|
||||
### User Account Prerequisites
|
||||
|
||||
Log in to GCP with:
|
||||
```bash
|
||||
$ gcloud auth login
|
||||
```
|
||||
|
||||
Also ensure that `$(whoami)` is the name of an InfluxDB user account with enough
|
||||
access to create a new database.
|
||||
|
||||
You currently must be running on a Linux system (for now, TODO fix this)
|
||||
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
$ cd net/
|
||||
|
||||
$ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
|
||||
$ ./init-metrics $(whoami) #<-- Configure a metrics database for the testnet
|
||||
$ ./net.sh start #<-- Deploy the network from the local workspace
|
||||
$ ./ssh.sh #<-- Details on how to ssh into any testnet node
|
||||
$ ./gce.sh delete #<-- Dispose of the network (billing stops here)
|
||||
```
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
# |source| this file
|
||||
#
|
||||
# Common utilities shared by other scripts in this directory
|
||||
#
|
||||
# The following directive disable complaints about unused variables in this
|
||||
# file:
|
||||
# shellcheck disable=2034
|
||||
#
|
||||
|
||||
netConfigDir="$(dirname "${BASH_SOURCE[0]}")"/config
|
||||
netLogDir="$(dirname "${BASH_SOURCE[0]}")"/log
|
||||
mkdir -p "$netConfigDir" "$netLogDir"
|
||||
|
||||
configFile="$netConfigDir/config.sh"
|
||||
|
||||
clientIpList=()
|
||||
leaderIp=
|
||||
sshPrivateKey=
|
||||
sshUsername=
|
||||
sshOptions=()
|
||||
validatorIpList=()
|
||||
|
||||
loadConfigFile() {
|
||||
[[ -r $configFile ]] || usage "Config file unreadable: $configFile"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$configFile"
|
||||
[[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile"
|
||||
[[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile"
|
||||
[[ -n $sshUsername ]] || usage "Config file invalid, sshUsername unspecified: $configFile"
|
||||
[[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
|
||||
|
||||
sshOptions=(
|
||||
-o "BatchMode=yes"
|
||||
-o "StrictHostKeyChecking=no"
|
||||
-o "UserKnownHostsFile=/dev/null"
|
||||
-o "User=$sshUsername"
|
||||
-o "IdentityFile=$sshPrivateKey"
|
||||
-o "LogLevel=ERROR"
|
||||
)
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
here=$(dirname "$0")
|
||||
# shellcheck source=scripts/gcloud.sh
|
||||
source "$here"/../scripts/gcloud.sh
|
||||
# shellcheck source=net/common.sh
|
||||
source "$here"/common.sh
|
||||
|
||||
prefix=testnet-dev-$(whoami | sed -e s/[^a-z0-9].*//)
|
||||
validatorNodeCount=
|
||||
clientNodeCount=
|
||||
|
||||
imageName="ubuntu-16-04-cuda-9-2-new"
|
||||
internalNetwork=false
|
||||
zone="us-west1-b"
|
||||
|
||||
usage() {
|
||||
exitcode=0
|
||||
if [[ -n "$1" ]]; then
|
||||
exitcode=1
|
||||
echo "Error: $*"
|
||||
fi
|
||||
cat <<EOF
|
||||
usage: $0 [create|config|delete] [common options] [command-specific options]
|
||||
|
||||
Manage a GCE-based testnet
|
||||
|
||||
create - create a new testnet (implies 'config')
|
||||
config - configure the testnet and write a config file describing it
|
||||
delete - delete the testnet
|
||||
|
||||
common options:
|
||||
-p prefix - Optional common prefix for instance names to avoid collisions
|
||||
(default: $prefix)
|
||||
|
||||
create-specific options:
|
||||
-n number - Number of validator nodes
|
||||
-c number - Number of client nodes
|
||||
-P - Use GCE internal/private network
|
||||
-z - GCP Zone for the nodes (default: $zone)
|
||||
-i imageName - Existing image on GCE (default: $imageName)
|
||||
|
||||
config-specific options:
|
||||
none
|
||||
|
||||
delete-specific options:
|
||||
none
|
||||
|
||||
EOF
|
||||
exit $exitcode
|
||||
}
|
||||
|
||||
|
||||
command=$1
|
||||
[[ -n $command ]] || usage
|
||||
shift
|
||||
[[ $command = create || $command = config || $command = delete ]] || usage "Invalid command: $command"
|
||||
|
||||
while getopts "h?p:Pi:n:c:z:" opt; do
|
||||
case $opt in
|
||||
h | \?)
|
||||
usage
|
||||
;;
|
||||
p)
|
||||
prefix=$OPTARG
|
||||
;;
|
||||
P)
|
||||
internalNetwork=true
|
||||
;;
|
||||
i)
|
||||
imageName=$OPTARG
|
||||
;;
|
||||
n)
|
||||
validatorNodeCount=$OPTARG
|
||||
;;
|
||||
c)
|
||||
clientNodeCount=$OPTARG
|
||||
;;
|
||||
z)
|
||||
zone=$OPTARG
|
||||
;;
|
||||
*)
|
||||
usage "Error: unhandled option: $opt"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
|
||||
writeConfigFile() {
|
||||
echo "# autogenerated at $(date)" >> "$configFile"
|
||||
|
||||
declare sshPrivateKey="$netConfigDir/id_$prefix"
|
||||
rm -rf "$sshPrivateKey"{,.pub}
|
||||
(
|
||||
set -x
|
||||
ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
|
||||
)
|
||||
echo "sshPrivateKey=$sshPrivateKey" >> "$configFile"
|
||||
|
||||
recordInstanceIp() {
|
||||
declare name="$1"
|
||||
declare publicIp="$3"
|
||||
declare privateIp="$4"
|
||||
|
||||
declare arrayName="$6"
|
||||
|
||||
if $internalNetwork; then
|
||||
echo "$arrayName+=($privateIp) # $name" >> "$configFile"
|
||||
else
|
||||
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
|
||||
fi
|
||||
}
|
||||
|
||||
gcloud_FindInstances "name=$prefix-leader" show
|
||||
[[ ${#instances[@]} -eq 1 ]] || {
|
||||
echo "Unable to start leader"
|
||||
exit 1
|
||||
}
|
||||
gcloud_FigureRemoteUsername "${instances[0]}"
|
||||
echo "sshUsername=$gcloud_username" >> "$configFile"
|
||||
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
|
||||
|
||||
echo "leaderIp=()" >> "$configFile"
|
||||
gcloud_ForEachInstance recordInstanceIp leaderIp
|
||||
|
||||
gcloud_FindInstances "name~^$prefix-validator" show
|
||||
[[ ${#instances[@]} -gt 0 ]] || {
|
||||
echo "Unable to start validators"
|
||||
exit 1
|
||||
}
|
||||
echo "validatorIpList=()" >> "$configFile"
|
||||
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
|
||||
gcloud_ForEachInstance recordInstanceIp validatorIpList
|
||||
|
||||
echo "clientIpList=()" >> "$configFile"
|
||||
gcloud_FindInstances "name~^$prefix-client" show
|
||||
if [[ ${#instances[@]} -gt 0 ]]; then
|
||||
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
|
||||
gcloud_ForEachInstance recordInstanceIp clientIpList
|
||||
fi
|
||||
|
||||
echo "Wrote $configFile"
|
||||
}
|
||||
|
||||
case $command in
|
||||
delete)
|
||||
gcloud_FindInstances "name~^$prefix-"
|
||||
|
||||
if [[ ${#instances[@]} -eq 0 ]]; then
|
||||
echo "No instances found matching '^$prefix-'"
|
||||
exit 0
|
||||
fi
|
||||
gcloud_DeleteInstances
|
||||
;;
|
||||
|
||||
create)
|
||||
[[ -n $validatorNodeCount ]] || usage "Need number of nodes"
|
||||
|
||||
gcloud_CreateInstances "$prefix-leader" 1 "$zone" "$imageName"
|
||||
gcloud_CreateInstances "$prefix-validator" "$validatorNodeCount" "$zone" "$imageName"
|
||||
if [[ -n $clientNodeCount ]]; then
|
||||
gcloud_CreateInstances "$prefix-client" "$clientNodeCount" "$zone" "$imageName"
|
||||
fi
|
||||
writeConfigFile
|
||||
;;
|
||||
|
||||
config)
|
||||
writeConfigFile
|
||||
;;
|
||||
*)
|
||||
usage "Unknown command: $command"
|
||||
esac
|
|
@ -0,0 +1,76 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
here=$(dirname "$0")
|
||||
# shellcheck source=net/common.sh
|
||||
source "$here"/common.sh
|
||||
|
||||
usage() {
|
||||
exitcode=0
|
||||
if [[ -n "$1" ]]; then
|
||||
exitcode=1
|
||||
echo "Error: $*"
|
||||
fi
|
||||
cat <<EOF
|
||||
usage: $0 [-d] [username] [optional database name]
|
||||
|
||||
Creates a testnet dev metrics database
|
||||
|
||||
username InfluxDB user with access to create a new database
|
||||
database Uncommon. Optional database suffix to follow the mandiatory
|
||||
'testnet-dev-[username]' database name prefix
|
||||
|
||||
-d Delete the database instead of creating it
|
||||
|
||||
EOF
|
||||
exit $exitcode
|
||||
}
|
||||
|
||||
|
||||
delete=false
|
||||
while getopts "hd" opt; do
|
||||
case $opt in
|
||||
h|\?)
|
||||
usage
|
||||
exit 0
|
||||
;;
|
||||
d)
|
||||
delete=true;
|
||||
;;
|
||||
*)
|
||||
usage "Error: unhandled option: $opt"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $((OPTIND - 1))
|
||||
|
||||
username=$1
|
||||
[[ -n "$username" ]] || usage "username not specified"
|
||||
database="testnet-dev-$username"
|
||||
|
||||
if [[ -n "$2" ]]; then
|
||||
database="$database-$2"
|
||||
fi
|
||||
|
||||
read -rs -p "InfluxDB password for $username: " password
|
||||
[[ -n $password ]] || { echo "Password not specified"; exit 1; }
|
||||
echo
|
||||
|
||||
query() {
|
||||
echo "$*"
|
||||
curl -XPOST \
|
||||
"https://metrics.solana.com:8086/query?u=${username}&p=${password}" \
|
||||
--data-urlencode "q=$*"
|
||||
}
|
||||
|
||||
query "DROP DATABASE \"$database\""
|
||||
! $delete || exit 0
|
||||
query "CREATE DATABASE \"$database\""
|
||||
query "ALTER RETENTION POLICY autogen ON \"$database\" DURATION 7d"
|
||||
query "GRANT READ ON \"$database\" TO \"ro\""
|
||||
query "GRANT WRITE ON \"$database\" TO \"scratch_writer\""
|
||||
|
||||
echo "export \
|
||||
SOLANA_METRICS_CONFIG=\"db=$database,u=scratch_writer,p=topsecret\" \
|
||||
" >> "$configFile"
|
||||
|
||||
exit 0
|
|
@ -0,0 +1,197 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
here=$(dirname "$0")
|
||||
SOLANA_ROOT="$(cd "$here"/..; pwd)"
|
||||
|
||||
# shellcheck source=net/common.sh
|
||||
source "$here"/common.sh
|
||||
|
||||
usage() {
|
||||
exitcode=0
|
||||
if [[ -n "$1" ]]; then
|
||||
exitcode=1
|
||||
echo "Error: $*"
|
||||
fi
|
||||
cat <<EOF
|
||||
usage: $0 [start|stop]
|
||||
|
||||
Manage a multinode network
|
||||
|
||||
start|stop - Start or stop the network
|
||||
EOF
|
||||
exit $exitcode
|
||||
}
|
||||
|
||||
command=$1
|
||||
[[ -n $command ]] || usage
|
||||
shift
|
||||
[[ $command = start || $command = stop ]] || usage "Invalid command: $command"
|
||||
|
||||
while getopts "h?" opt; do
|
||||
case $opt in
|
||||
h | \?)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
usage "Error: unhandled option: $opt"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
loadConfigFile
|
||||
|
||||
build() {
|
||||
if [[ $(uname) != Linux ]]; then
|
||||
echo "Unable to build, this isn't a Linux system"
|
||||
exit 1
|
||||
fi
|
||||
SECONDS=0
|
||||
(
|
||||
cd "$SOLANA_ROOT"
|
||||
echo "****************"
|
||||
echo "Build started at $(date)"
|
||||
|
||||
# Build and install locally
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
cargo install --force
|
||||
)
|
||||
echo "Build took $SECONDS seconds"
|
||||
}
|
||||
|
||||
common_start_setup() {
|
||||
declare ipAddress=$1
|
||||
declare logFile="$2"
|
||||
|
||||
(
|
||||
set -x
|
||||
|
||||
ssh "${sshOptions[@]}" "$ipAddress" "
|
||||
set -ex;
|
||||
sudo systemctl disable apt-daily.service # disable run when system boot
|
||||
sudo systemctl disable apt-daily.timer # disable timer run
|
||||
sudo apt-get --assume-yes install rsync libssl-dev;
|
||||
mkdir -p ~/solana ~/.cargo/bin;
|
||||
"
|
||||
|
||||
test -d "$SOLANA_ROOT"
|
||||
rsync -vPrz -e "ssh ${sshOptions[*]}" \
|
||||
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
|
||||
"$ipAddress":~/solana/
|
||||
) >> "$logFile"
|
||||
}
|
||||
|
||||
startLeader() {
|
||||
declare ipAddress=$1
|
||||
declare logFile="$2"
|
||||
echo "****************"
|
||||
echo "Starting leader: $leaderIp"
|
||||
|
||||
common_start_setup "$ipAddress" "$logFile"
|
||||
|
||||
(
|
||||
set -x
|
||||
rsync -vPrz -e "ssh ${sshOptions[*]}" ~/.cargo/bin/solana* "$ipAddress":~/.cargo/bin/
|
||||
ssh "${sshOptions[@]}" -f "$ipAddress" \
|
||||
"./solana/net/remote/remote_leader.sh"
|
||||
) >> "$logFile"
|
||||
}
|
||||
|
||||
startValidator() {
|
||||
declare ipAddress=$1
|
||||
declare logFile="$2"
|
||||
echo "*******************"
|
||||
echo "Starting validator: $leaderIp"
|
||||
common_start_setup "$ipAddress" "$logFile"
|
||||
|
||||
(
|
||||
set -x
|
||||
ssh "${sshOptions[@]}" -f "$ipAddress" \
|
||||
"./solana/net/remote/remote_validator.sh $leaderIp"
|
||||
) >> "$logFile"
|
||||
}
|
||||
|
||||
startClient() {
|
||||
declare ipAddress=$1
|
||||
declare logFile="$2"
|
||||
echo "****************"
|
||||
echo "Starting client: $leaderIp"
|
||||
common_start_setup "$ipAddress" "$logFile"
|
||||
|
||||
ssh "${sshOptions[@]}" -f "$ipAddress" \
|
||||
"./solana/net/remote/remote_client.sh $leaderIp" >> "$logFile"
|
||||
}
|
||||
|
||||
start() {
|
||||
echo "Deployment started at $(date)"
|
||||
SECONDS=0
|
||||
leaderDeployTime=
|
||||
|
||||
startLeader "$leaderIp" "$netLogDir/leader-$leaderIp.log"
|
||||
leaderDeployTime=$SECONDS
|
||||
SECONDS=0
|
||||
|
||||
for ipAddress in "${validatorIpList[@]}"; do
|
||||
startValidator "$ipAddress" "$netLogDir/validator-$ipAddress.log" &
|
||||
done
|
||||
|
||||
wait
|
||||
validatorDeployTime=$SECONDS
|
||||
SECONDS=0
|
||||
|
||||
for ipAddress in "${clientIpList[@]}"; do
|
||||
startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
|
||||
done
|
||||
|
||||
clientDeployTime=$SECONDS
|
||||
SECONDS=0
|
||||
wait
|
||||
|
||||
echo
|
||||
echo "================================================================="
|
||||
echo "Deployment finished at $(date)"
|
||||
echo "Leader deployment took $leaderDeployTime seconds"
|
||||
echo "Validator deployment (${#validatorIpList[@]} instances) took $validatorDeployTime seconds"
|
||||
echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
|
||||
echo "Logs in $netLogDir:"
|
||||
ls -l "$netLogDir"
|
||||
}
|
||||
|
||||
|
||||
stop_node() {
|
||||
local ipAddress=$1
|
||||
echo "**************"
|
||||
echo "Stopping node: $ipAddress"
|
||||
(
|
||||
set -x
|
||||
ssh "${sshOptions[@]}" "$ipAddress" "
|
||||
set -x;
|
||||
pkill -9 solana-;
|
||||
pkill -9 validator;
|
||||
pkill -9 leader;
|
||||
"
|
||||
) || true
|
||||
}
|
||||
|
||||
stop() {
|
||||
SECONDS=0
|
||||
|
||||
stop_node "$leaderIp"
|
||||
|
||||
for ipAddress in "${validatorIpList[@]}" "${clientIpList[@]}"; do
|
||||
stop_node "$ipAddress"
|
||||
done
|
||||
|
||||
echo "Stopping nodes took $SECONDS seconds"
|
||||
}
|
||||
|
||||
mkdir -p log
|
||||
|
||||
if [[ $command == "start" ]]; then
|
||||
build
|
||||
stop
|
||||
start
|
||||
elif [[ $command == "stop" ]]; then
|
||||
stop
|
||||
else
|
||||
usage "Unknown command: $command"
|
||||
fi
|
|
@ -0,0 +1 @@
|
|||
Scripts that run on the remote testnet nodes
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
[[ -n $1 ]] || exit
|
||||
|
||||
cd "$(dirname "$0")"/../..
|
||||
source net/common.sh
|
||||
loadConfigFile
|
||||
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
|
||||
|
||||
numNodes=1 # TODO: Pass this in
|
||||
export USE_INSTALL=1
|
||||
|
||||
multinode-demo/client.sh "$1":~/solana $numNodes --loop -s 600 --sustained >client.log 2>&1 &
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
cd "$(dirname "$0")"/../..
|
||||
source net/common.sh
|
||||
loadConfigFile
|
||||
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
|
||||
export USE_INSTALL=1
|
||||
export SOLANA_CUDA=1
|
||||
|
||||
./fetch-perf-libs.sh
|
||||
./multinode-demo/setup.sh
|
||||
./multinode-demo/drone.sh >drone.log 2>&1 &
|
||||
./multinode-demo/leader.sh >leader.log 2>&1 &
|
|
@ -0,0 +1,15 @@
|
|||
#!/bin/bash -e
|
||||
|
||||
[[ -n $1 ]] || exit
|
||||
|
||||
cd "$(dirname "$0")"/../..
|
||||
source net/common.sh
|
||||
loadConfigFile
|
||||
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
|
||||
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
|
||||
|
||||
export USE_INSTALL=1
|
||||
./multinode-demo/setup.sh
|
||||
./multinode-demo/validator.sh "$1":~/solana "$1" >validator.log 2>&1 &
|
|
@ -0,0 +1,62 @@
|
|||
#!/bin/bash
|
||||
|
||||
here=$(dirname "$0")
|
||||
# shellcheck source=net/common.sh
|
||||
source "$here"/common.sh
|
||||
|
||||
usage() {
|
||||
exitcode=0
|
||||
if [[ -n "$1" ]]; then
|
||||
exitcode=1
|
||||
echo "Error: $*"
|
||||
fi
|
||||
cat <<EOF
|
||||
usage: $0 [ipAddress]
|
||||
|
||||
ssh into a node
|
||||
|
||||
ipAddress - IP address of the desired node.
|
||||
|
||||
If ipAddress is unspecified, a list of available nodes will be displayed.
|
||||
|
||||
EOF
|
||||
exit $exitcode
|
||||
}
|
||||
|
||||
while getopts "h?" opt; do
|
||||
case $opt in
|
||||
h | \?)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
usage "Error: unhandled option: $opt"
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
loadConfigFile
|
||||
|
||||
ipAddress=$1
|
||||
if [[ -n "$ipAddress" ]]; then
|
||||
set -x
|
||||
exec ssh "${sshOptions[@]}" "$ipAddress"
|
||||
fi
|
||||
|
||||
echo Leader:
|
||||
echo " $0 $leaderIp"
|
||||
echo
|
||||
echo Validators:
|
||||
for ipAddress in "${validatorIpList[@]}"; do
|
||||
echo " $0 $ipAddress"
|
||||
done
|
||||
echo
|
||||
echo Clients:
|
||||
if [[ ${#clientIpList[@]} -eq 0 ]]; then
|
||||
echo " None"
|
||||
else
|
||||
for ipAddress in "${clientIpList[@]}"; do
|
||||
echo " $0 $ipAddress"
|
||||
done
|
||||
fi
|
||||
|
||||
exit 0
|
|
@ -33,7 +33,7 @@ gcloud_FindInstances() {
|
|||
continue
|
||||
fi
|
||||
if [[ $options = show ]]; then
|
||||
printf "%-20s | zone=%-10s publicIp=%-16s privateIp=%s" "$name" "$zone" "$publicIp" "$privateIp"
|
||||
printf "%-30s | %-16s publicIp=%-16s privateIp=%s\n" "$name" "$zone" "$publicIp" "$privateIp"
|
||||
fi
|
||||
|
||||
instances+=("$name:$zone:$publicIp:$privateIp")
|
||||
|
@ -93,7 +93,11 @@ gcloud_CreateInstances() {
|
|||
declare imageName="$4"
|
||||
|
||||
declare nodes
|
||||
read -ra nodes <<<$(seq -f "${namePrefix}%g" 1 "$numNodes")
|
||||
if [[ $numNodes = 1 ]]; then
|
||||
nodes=("$namePrefix")
|
||||
else
|
||||
read -ra nodes <<<$(seq -f "${namePrefix}%g" 1 "$numNodes")
|
||||
fi
|
||||
|
||||
(
|
||||
set -x
|
||||
|
@ -110,10 +114,20 @@ gcloud_CreateInstances() {
|
|||
# Deletes all the instances listed in the `instances` array
|
||||
#
|
||||
gcloud_DeleteInstances() {
|
||||
if [[ ${#instances[0]} -eq 0 ]]; then
|
||||
echo No instances to delete
|
||||
return
|
||||
fi
|
||||
declare names=("${instances[@]/:*/}")
|
||||
|
||||
# Assume all instances are in the same zone
|
||||
# TODO: One day this assumption will be invalid
|
||||
declare zone
|
||||
IFS=: read -r _ zone _ < <(echo "${instances[0]}")
|
||||
|
||||
(
|
||||
set -x
|
||||
gcloud beta compute instances delete "${names[@]}"
|
||||
gcloud beta compute instances delete --zone "$zone" "${names[@]}"
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -137,13 +151,20 @@ gcloud_FigureRemoteUsername() {
|
|||
fi
|
||||
|
||||
declare instanceInfo="$1"
|
||||
declare name zone
|
||||
IFS=: read -r name zone _ < <(echo "$instanceInfo")
|
||||
declare name zone publicIp
|
||||
IFS=: read -r name zone publicIp _ < <(echo "$instanceInfo")
|
||||
|
||||
echo "Detecting remote username using $zone in $zone:"
|
||||
|
||||
|
||||
# Figure the gcp ssh username
|
||||
(
|
||||
set -x
|
||||
|
||||
# Try to ping the machine first. There can be a delay between when the
|
||||
# instance is reported as RUNNING and when it's reachable over the network
|
||||
timeout 30s bash -c "set -o pipefail; until ping -c 3 $publicIp | tr - _; do echo .; done"
|
||||
|
||||
gcloud compute ssh "$name" --zone "$zone" -- "echo whoami \$(whoami)" | tee whoami
|
||||
)
|
||||
|
||||
|
@ -152,7 +173,7 @@ gcloud_FigureRemoteUsername() {
|
|||
exit 1
|
||||
}
|
||||
gcloud_username="${BASH_REMATCH[1]}"
|
||||
echo "Remote username: $gcloud_username--"
|
||||
echo "Remote username: $gcloud_username"
|
||||
}
|
||||
|
||||
#
|
||||
|
@ -163,13 +184,12 @@ gcloud_FigureRemoteUsername() {
|
|||
# use plain |ssh| instead.
|
||||
#
|
||||
# username - gcp ssh username as computed by gcloud_FigureRemoteUsername
|
||||
# publicKey - public key to install on all the instances
|
||||
# privateKey - matching private key, used to verify ssh access
|
||||
# privateKey - private key to install on all the instances
|
||||
#
|
||||
gcloud_PrepInstancesForSsh() {
|
||||
declare username="$1"
|
||||
declare publicKey="$2"
|
||||
declare privateKey="$3"
|
||||
declare privateKey="$2"
|
||||
declare publicKey="$privateKey".pub
|
||||
[[ -r $publicKey ]] || {
|
||||
echo "Unable to read public key: $publicKey"
|
||||
exit 1
|
||||
|
@ -186,12 +206,28 @@ gcloud_PrepInstancesForSsh() {
|
|||
(
|
||||
set -x
|
||||
|
||||
# TODO: stomping on the authorized_keys isn't great, maybe do something
|
||||
# clever with |ssh-copy-id| one day
|
||||
gcloud compute scp --zone "$zone" "$publicKey" "$name":.ssh/authorized_keys
|
||||
# Try to ping the machine first. There can be a delay between when the
|
||||
# instance is reported as RUNNING and when it's reachable over the network
|
||||
timeout 30s bash -c "set -o pipefail; until ping -c 3 $publicIp | tr - _; do echo .; done"
|
||||
|
||||
# Confirm normal ssh now works
|
||||
ssh -i "$privateKey" "$username@$publicIp" uptime
|
||||
gcloud compute ssh --zone "$zone" "$name" -- "
|
||||
set -x;
|
||||
rm -rf .ssh;
|
||||
mkdir -p .ssh;
|
||||
echo \"$(cat "$publicKey")\" > .ssh/authorized_keys;
|
||||
echo \"
|
||||
Host *
|
||||
BatchMode yes
|
||||
IdentityFile ~/.ssh/id_testnet
|
||||
StrictHostKeyChecking no
|
||||
\" > .ssh/config;
|
||||
"
|
||||
#gcloud compute scp --zone "$zone" "$publicKey" "$name":.ssh/authorized_keys
|
||||
scp \
|
||||
-o StrictHostKeyChecking=no \
|
||||
-o UserKnownHostsFile=/dev/null \
|
||||
-i "$privateKey" \
|
||||
"$privateKey" "$username@$publicIp:.ssh/id_testnet"
|
||||
)
|
||||
done
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue