net/ can now deploy Snaps

This commit is contained in:
Michael Vines 2018-09-03 18:15:55 -10:00
parent 449d7042f0
commit fa07c49cc9
9 changed files with 308 additions and 120 deletions

View File

@ -11,25 +11,20 @@ netConfigDir="$(dirname "${BASH_SOURCE[0]}")"/config
netLogDir="$(dirname "${BASH_SOURCE[0]}")"/log
mkdir -p "$netConfigDir" "$netLogDir"
# shellcheck source=scripts/configure-metrics.sh
source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh
configFile="$netConfigDir/config"
clientIpList=()
leaderIp=
netBasename=
sshOptions=()
sshPrivateKey=
sshUsername=
sshOptions=()
validatorIpList=()
loadConfigFile() {
[[ -r $configFile ]] || usage "Config file unreadable: $configFile"
# shellcheck source=/dev/null
source "$configFile"
[[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile"
[[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile"
[[ -n $sshUsername ]] || usage "Config file invalid, sshUsername unspecified: $configFile"
[[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
buildSshOptions() {
sshOptions=(
-o "BatchMode=yes"
-o "StrictHostKeyChecking=no"
@ -39,3 +34,18 @@ loadConfigFile() {
-o "LogLevel=ERROR"
)
}
loadConfigFile() {
[[ -r $configFile ]] || usage "Config file unreadable: $configFile"
# shellcheck source=/dev/null
source "$configFile"
[[ -n "$netBasename" ]] || usage "Config file invalid, netBasename unspecified: $configFile"
[[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile"
[[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile"
[[ -n $sshUsername ]] || usage "Config file invalid, sshUsername unspecified: $configFile"
[[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
buildSshOptions
configureMetrics
}

View File

@ -29,7 +29,7 @@ usage() {
cat <<EOF
usage: $0 [create|config|delete] [common options] [command-specific options]
Manage a GCE-based testnet
Configure a GCE-based testnet
create - create a new testnet (implies 'config')
config - configure the testnet and write a config file describing it
@ -92,9 +92,11 @@ while getopts "h?p:Pi:n:c:z:" opt; do
done
writeConfigFile() {
prepareInstancesAndWriteConfigFile() {
echo "# autogenerated at $(date)" >> "$configFile"
echo "netBasename=$prefix" >> "$configFile"
declare sshPrivateKey="$netConfigDir/id_$prefix"
rm -rf "$sshPrivateKey"{,.pub}
(
@ -117,17 +119,36 @@ writeConfigFile() {
fi
}
prepareInstance() {
declare name="$1"
declare publicIp="$3"
# TODO: Make the following a requirement of $imageName
# instead of a manual install
ssh "${sshOptions[@]}" "$publicIp" "
set -ex;
sudo systemctl disable apt-daily.service # disable run when system boot
sudo systemctl disable apt-daily.timer # disable timer run
sudo apt-get --assume-yes install rsync libssl-dev;
mkdir -p ~/solana ~/.cargo/bin;
"
}
gcloud_FindInstances "name=$prefix-leader" show
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to start leader"
exit 1
}
gcloud_FigureRemoteUsername "${instances[0]}"
echo "sshUsername=$gcloud_username" >> "$configFile"
sshUsername=$gcloud_username
echo "sshUsername=$sshUsername" >> "$configFile"
buildSshOptions
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
echo "leaderIp=()" >> "$configFile"
gcloud_ForEachInstance recordInstanceIp leaderIp
gcloud_ForEachInstance prepareInstance
gcloud_FindInstances "name~^$prefix-validator" show
[[ ${#instances[@]} -gt 0 ]] || {
@ -137,12 +158,14 @@ writeConfigFile() {
echo "validatorIpList=()" >> "$configFile"
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
gcloud_ForEachInstance recordInstanceIp validatorIpList
gcloud_ForEachInstance prepareInstance
echo "clientIpList=()" >> "$configFile"
gcloud_FindInstances "name~^$prefix-client" show
if [[ ${#instances[@]} -gt 0 ]]; then
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
gcloud_ForEachInstance recordInstanceIp clientIpList
gcloud_ForEachInstance prepareInstance
fi
echo "Wrote $configFile"
@ -177,11 +200,11 @@ create)
"$zone" "$imageName" "$clientMachineType" "$clientAccelerator"
fi
writeConfigFile
prepareInstancesAndWriteConfigFile
;;
config)
writeConfigFile
prepareInstancesAndWriteConfigFile
;;
*)
usage "Unknown command: $command"

View File

@ -11,20 +11,18 @@ usage() {
echo "Error: $*"
fi
cat <<EOF
usage: $0 [-d] [username] [optional database name]
usage: $0 [-d] [username]
Creates a testnet dev metrics database
username InfluxDB user with access to create a new database
database Uncommon. Optional database suffix to follow the mandiatory
'testnet-dev-[username]' database name prefix
-d Delete the database instead of creating it
EOF
exit $exitcode
}
loadConfigFile
delete=false
while getopts "hd" opt; do
@ -34,7 +32,7 @@ while getopts "hd" opt; do
exit 0
;;
d)
delete=true;
delete=true
;;
*)
usage "Error: unhandled option: $opt"
@ -45,11 +43,6 @@ shift $((OPTIND - 1))
username=$1
[[ -n "$username" ]] || usage "username not specified"
database="testnet-dev-$username"
if [[ -n "$2" ]]; then
database="$database-$2"
fi
read -rs -p "InfluxDB password for $username: " password
[[ -n $password ]] || { echo "Password not specified"; exit 1; }
@ -62,15 +55,15 @@ query() {
--data-urlencode "q=$*"
}
query "DROP DATABASE \"$database\""
query "DROP DATABASE \"$netBasename\""
! $delete || exit 0
query "CREATE DATABASE \"$database\""
query "ALTER RETENTION POLICY autogen ON \"$database\" DURATION 7d"
query "GRANT READ ON \"$database\" TO \"ro\""
query "GRANT WRITE ON \"$database\" TO \"scratch_writer\""
query "CREATE DATABASE \"$netBasename\""
query "ALTER RETENTION POLICY autogen ON \"$netBasename\" DURATION 7d"
query "GRANT READ ON \"$netBasename\" TO \"ro\""
query "GRANT WRITE ON \"$netBasename\" TO \"scratch_writer\""
echo "export \
SOLANA_METRICS_CONFIG=\"db=$database,u=scratch_writer,p=topsecret\" \
SOLANA_METRICS_CONFIG=\"db=$netBasename,u=scratch_writer,p=topsecret\" \
" >> "$configFile"
exit 0

View File

@ -15,23 +15,60 @@ usage() {
cat <<EOF
usage: $0 [start|stop]
Manage a multinode network
Operate a configured testnet
start - Start the network
stop - Stop the network
start-specific options:
-S snapFilename - Deploy the specified Snap file
-s edge|beta|stable - Deploy the latest Snap on the specified Snap release channel
-a "setup args" - Optional additional arguments for ./multinode-demo/setup.sh
Note: if RUST_LOG is set in the environment it will be propogated into the
network nodes.
stop-specific options:
none
start|stop - Start or stop the network
EOF
exit $exitcode
}
snapChannel=
snapFilename=
nodeSetupArgs=
deployMethod=local
command=$1
[[ -n $command ]] || usage
shift
[[ $command = start || $command = stop ]] || usage "Invalid command: $command"
while getopts "h?" opt; do
while getopts "h?S:s:a:" opt; do
case $opt in
h | \?)
usage
;;
S)
[[ $command = start ]] || usage "-s is only valid with the 'start' command"
snapFilename=$OPTARG
[[ -f $snapFilename ]] || usage "Snap not readable: $snapFilename"
deployMethod=snap
;;
s)
case $OPTARG in
edge|beta|stable)
snapChannel=$OPTARG
;;
*)
usage "Invalid snap channel: $OPTARG"
;;
esac
;;
a)
nodeSetupArgs="$OPTARG"
;;
*)
usage "Error: unhandled option: $opt"
;;
@ -64,15 +101,6 @@ common_start_setup() {
(
set -x
ssh "${sshOptions[@]}" "$ipAddress" "
set -ex;
sudo systemctl disable apt-daily.service # disable run when system boot
sudo systemctl disable apt-daily.timer # disable timer run
sudo apt-get --assume-yes install rsync libssl-dev;
mkdir -p ~/solana ~/.cargo/bin;
"
test -d "$SOLANA_ROOT"
rsync -vPrz -e "ssh ${sshOptions[*]}" \
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
@ -88,11 +116,24 @@ startLeader() {
common_start_setup "$ipAddress" "$logFile"
# Deploy local binaries to leader. Validators and clients later fetch the
# binaries from the leader.
(
set -x
rsync -vPrz -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress":~/.cargo/bin/
case $deployMethod in
snap)
rsync -vPrz -e "ssh ${sshOptions[*]}" "$snapFilename" "$ipAddress:~/solana/solana.snap"
;;
local)
rsync -vPrz -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/"
;;
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote_leader.sh"
"./solana/net/remote/remote_node.sh $deployMethod leader $leaderIp \"$nodeSetupArgs\" \"$RUST_LOG\""
) >> "$logFile"
}
@ -106,7 +147,7 @@ startValidator() {
(
set -x
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote_validator.sh $leaderIp"
"./solana/net/remote/remote_node.sh $deployMethod validator $leaderIp \"$nodeSetupArgs\" \"$RUST_LOG\""
) >> "$logFile"
}
@ -117,35 +158,67 @@ startClient() {
echo "Starting client: $leaderIp"
common_start_setup "$ipAddress" "$logFile"
declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote_client.sh $leaderIp" >> "$logFile"
"./solana/net/remote/remote_client.sh $deployMethod $leaderIp $expectedNodeCount \"$RUST_LOG\"" >> "$logFile"
}
start() {
echo "Deployment started at $(date)"
SECONDS=0
leaderDeployTime=
[[ $command = "start" ]] || return
case $deployMethod in
snap)
if [[ -n $snapChannel ]]; then
if [[ $(uname) != Linux ]]; then
echo Error: snap channel deployment only supported in Linux
exit 1
fi
usage "TODO: the snap download command below is probably wrong..."
snap download --"$snapChannel" solana
snapFilename=solana.snap
fi
;;
local)
build
;;
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
echo "Deployment started at $(date)"
SECONDS=0
declare leaderDeployTime=
declare networkVersion=unknown
startLeader "$leaderIp" "$netLogDir/leader-$leaderIp.log"
leaderDeployTime=$SECONDS
SECONDS=0
SECONDS=0
for ipAddress in "${validatorIpList[@]}"; do
startValidator "$ipAddress" "$netLogDir/validator-$ipAddress.log" &
done
wait
validatorDeployTime=$SECONDS
SECONDS=0
SECONDS=0
for ipAddress in "${clientIpList[@]}"; do
startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
done
clientDeployTime=$SECONDS
SECONDS=0
wait
if [[ $deployMethod = "snap" ]]; then
IFS=\ read -r _ networkVersion _ < <(
ssh "${sshOptions[@]}" "$leaderIp" \
"snap info solana | grep \"^installed:\""
)
networkVersion=${networkVersion/0+git./}
fi
$metricsWriteDatapoint "testnet-deploy,name=$netBasename start=1,version=\"$networkVersion\""
echo
echo "================================================================="
echo "Deployment finished at $(date)"
@ -165,7 +238,11 @@ stop_node() {
set -x
ssh "${sshOptions[@]}" "$ipAddress" "
set -x;
pkill -9 solana- remote_ oom-monitor
if snap list solana; then
sudo snap set solana mode=;
sudo snap remove solana;
fi; \
pkill -9 solana- remote_ oom-monitor;
"
) || true
}
@ -173,6 +250,8 @@ stop_node() {
stop() {
SECONDS=0
$metricsWriteDatapoint "testnet-deploy,name=$netBasename stop=1"
stop_node "$leaderIp"
for ipAddress in "${validatorIpList[@]}" "${clientIpList[@]}"; do
@ -182,14 +261,6 @@ stop() {
echo "Stopping nodes took $SECONDS seconds"
}
mkdir -p log
if [[ $command == "start" ]]; then
build
stop
start
elif [[ $command == "stop" ]]; then
stop
else
usage "Unknown command: $command"
fi

View File

@ -1,18 +1,56 @@
#!/bin/bash -e
[[ -n $1 ]] || exit
deployMethod="$1"
netEntrypoint="$2"
numNodes="$3"
RUST_LOG="$4"
[[ -n $deployMethod ]] || exit
[[ -n $netEntrypoint ]] || exit
[[ -n $numNodes ]] || exit
cd "$(dirname "$0")"/../..
source net/common.sh
loadConfigFile
PATH="$HOME"/.cargo/bin:"$PATH"
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
numNodes=1 # TODO: Pass this in
threadCount=$(nproc)
if [[ $threadCount -gt 4 ]]; then
threadCount=4
fi
./script/install-earlyoom.sh
case $deployMethod in
snap)
sudo snap install solana.snap --devmode --dangerous
rm solana.snap
sudo snap set solana metrics-config="$SOLANA_METRICS_CONFIG" rust-log="$RUST_LOG"
solana_bench_tps=/snap/bin/solana.bench-tps
;;
local)
PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1
export RUST_LOG
rsync -vPrz "$netEntrypoint:~/.cargo/bin/solana*" ~/.cargo/bin/
solana_bench_tps=multinode-demo/client.sh
netEntrypoint="$:~/solana"
;;
*)
echo "Unknown deployment method: $deployMethod"
exit 1
esac
./scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
export USE_INSTALL=1
multinode-demo/client.sh "$1":~/solana $numNodes --loop -s 600 --sustained > client.log 2>&1 &
while true; do
echo "=== Client start: $(date)" >> client.log
clientCommand="$solana_bench_tps $netEntrypoint $numNodes --loop -s 600 --sustained -t threadCount"
echo "$ $clientCommand" >> client.log
$clientCommand >> client.log 2>&1
$metricsWriteDatapoint "testnet-deploy,name=$netBasename clientexit=1"
echo Error: bench-tps should never exit | tee -a client.log
done

View File

@ -1,18 +0,0 @@
#!/bin/bash -e
cd "$(dirname "$0")"/../..
source net/common.sh
loadConfigFile
PATH="$HOME"/.cargo/bin:"$PATH"
./fetch-perf-libs.sh
./script/install-earlyoom.sh
./scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
export USE_INSTALL=1
export SOLANA_CUDA=1
./multinode-demo/setup.sh
./multinode-demo/drone.sh > drone.log 2>&1 &
./multinode-demo/leader.sh > leader.log 2>&1 &

83
net/remote/remote_node.sh Executable file
View File

@ -0,0 +1,83 @@
#!/bin/bash -e
deployMethod="$1"
nodeType="$2"
netEntrypoint="$3"
setupArgs="$4"
RUST_LOG="$5"
[[ -n $deployMethod ]] || exit
[[ -n $nodeType ]] || exit
[[ -n $netEntrypoint ]] || exit
cd "$(dirname "$0")"/../..
source net/common.sh
loadConfigFile
./script/install-earlyoom.sh
case $deployMethod in
snap)
SECONDS=0
sudo snap install solana.snap --devmode --dangerous
rm solana.snap
commonNodeConfig="\
rust-log=$RUST_LOG \
metrics-config=$SOLANA_METRICS_CONFIG \
setup-args=$setupArgs \
enable-cuda=1 \
"
if [[ $nodeType = leader ]]; then
nodeConfig="mode=leader+drone $commonNodeConfig"
else
nodeConfig="mode=validator leader-address=$netEntrypoint $commonNodeConfig"
fi
logmarker="solana deploy $(date)/$RANDOM"
logger "$logmarker"
# shellcheck disable=SC2086 # Don't want to double quote "$nodeConfig"
sudo snap set solana $nodeConfig
snap info solana
sudo snap get solana
echo Slight delay to get more syslog output
sleep 2
sudo grep -Pzo "$logmarker(.|\\n)*" /var/log/syslog
echo "Succeeded in ${SECONDS} seconds"
;;
local)
PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1
export SOLANA_CUDA=1
export RUST_LOG=1
./fetch-perf-libs.sh
./scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
case $nodeType in
leader)
# shellcheck disable=SC2086 # Don't want to double quote "$setupArgs"
./multinode-demo/setup.sh -t leader -p $setupArgs
./multinode-demo/drone.sh > drone.log 2>&1 &
./multinode-demo/leader.sh > leader.log 2>&1 &
;;
validator)
rsync -vPrz "$netEntrypoint:~/.cargo/bin/solana*" ~/.cargo/bin/
# shellcheck disable=SC2086 # Don't want to double quote "$setupArgs"
./multinode-demo/setup.sh -t validator -p $setupArgs
./multinode-demo/validator.sh "$netEntrypoint":~/solana "$netEntrypoint" >validator.log 2>&1 &
;;
*)
echo "Error: unknown node type: $nodeType"
exit 1
;;
esac
;;
*)
echo "Unknown deployment method: $deployMethod"
exit 1
esac

View File

@ -1,18 +0,0 @@
#!/bin/bash -e
[[ -n $1 ]] || exit
cd "$(dirname "$0")"/../..
source net/common.sh
loadConfigFile
PATH="$HOME"/.cargo/bin:"$PATH"
rsync -vPrz "$1":~/.cargo/bin/solana* ~/.cargo/bin/
./script/install-earlyoom.sh
./scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
export USE_INSTALL=1
./multinode-demo/setup.sh
./multinode-demo/validator.sh "$1":~/solana "$1" >validator.log 2>&1 &

View File

@ -6,12 +6,18 @@
# Example:
# export SOLANA_METRICS_CONFIG="host=<metrics host>,db=<database name>,u=<username>,p=<password>"
#
configure_metrics() {
# The following directive disable complaints about unused variables in this
# file:
# shellcheck disable=2034
#
metricsWriteDatapoint="$(dirname "${BASH_SOURCE[0]}")"/metrics-write-datapoint.sh
configureMetrics() {
[[ -n $SOLANA_METRICS_CONFIG ]] || return 0
declare metrics_params
IFS=',' read -r -a metrics_params <<< "$SOLANA_METRICS_CONFIG"
for param in "${metrics_params[@]}"; do
declare metricsParams
IFS=',' read -r -a metricsParams <<< "$SOLANA_METRICS_CONFIG"
for param in "${metricsParams[@]}"; do
IFS='=' read -r -a pair <<< "$param"
if [[ ${#pair[@]} != 2 ]]; then
echo Error: invalid metrics parameter: "$param" >&2
@ -42,4 +48,4 @@ configure_metrics() {
fi
done
}
configure_metrics
configureMetrics