2018-07-15 09:23:35 -07:00
|
|
|
#!/bin/bash -e
|
2018-07-12 19:47:07 -07:00
|
|
|
#
|
2018-07-16 12:05:48 -07:00
|
|
|
# Deploys the Solana software running on the testnet full nodes
|
2018-07-12 19:47:07 -07:00
|
|
|
#
|
|
|
|
# This script must be run by a user/machine that has successfully authenticated
|
|
|
|
# with GCP and has sufficient permission.
|
|
|
|
#
|
2018-07-16 12:05:48 -07:00
|
|
|
cd "$(dirname "$0")/.."
|
|
|
|
|
2018-07-18 20:52:14 -07:00
|
|
|
# TODO: Switch over to rolling updates
|
|
|
|
ROLLING_UPDATE=false
|
|
|
|
#ROLLING_UPDATE=true
|
|
|
|
|
2018-07-12 19:47:07 -07:00
|
|
|
if [[ -z $SOLANA_METRICS_CONFIG ]]; then
|
|
|
|
echo Error: SOLANA_METRICS_CONFIG environment variable is unset
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2018-07-16 09:42:54 -07:00
|
|
|
# Default to edge channel. To select the beta channel:
|
|
|
|
# export SOLANA_SNAP_CHANNEL=beta
|
2018-07-12 19:47:07 -07:00
|
|
|
if [[ -z $SOLANA_SNAP_CHANNEL ]]; then
|
2018-07-16 09:42:54 -07:00
|
|
|
SOLANA_SNAP_CHANNEL=edge
|
2018-07-12 19:47:07 -07:00
|
|
|
fi
|
|
|
|
|
2018-07-20 08:50:08 -07:00
|
|
|
# Select default network URL based on SOLANA_SNAP_CHANNEL if SOLANA_NET_URL is
|
|
|
|
# unspecified
|
|
|
|
if [[ -z $SOLANA_NET_URL ]]; then
|
|
|
|
case $SOLANA_SNAP_CHANNEL in
|
|
|
|
edge)
|
|
|
|
SOLANA_NET_URL=master.testnet.solana.com
|
|
|
|
;;
|
|
|
|
beta)
|
|
|
|
SOLANA_NET_URL=testnet.solana.com
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
echo Error: Unknown SOLANA_SNAP_CHANNEL=$SOLANA_SNAP_CHANNEL
|
|
|
|
exit 1
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "+++ Configuration"
|
|
|
|
publicUrl="$SOLANA_NET_URL"
|
|
|
|
if [[ $publicUrl = testnet.solana.com ]]; then
|
2018-07-18 08:21:48 -07:00
|
|
|
publicIp="" # Use default value
|
2018-07-20 08:50:08 -07:00
|
|
|
else
|
|
|
|
publicIp=$(dig +short $publicUrl | head -n1)
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "Network entrypoint URL: $publicUrl ($publicIp)"
|
|
|
|
echo "Snap channel: $SOLANA_SNAP_CHANNEL"
|
|
|
|
|
|
|
|
leaderName=${publicUrl//./-}
|
|
|
|
vmlist=()
|
|
|
|
|
|
|
|
findVms() {
|
|
|
|
declare filter="$1"
|
|
|
|
gcloud compute instances list --filter="$filter"
|
|
|
|
while read -r vmName vmZone status; do
|
|
|
|
if [[ $status != RUNNING ]]; then
|
|
|
|
echo "Warning: $vmName is not RUNNING, ignoring it."
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
vmlist+=("$vmName:$vmZone")
|
|
|
|
done < <(gcloud compute instances list --filter="$filter" --format 'value(name,zone,status)')
|
|
|
|
}
|
2018-07-12 20:08:35 -07:00
|
|
|
|
2018-07-17 19:39:43 -07:00
|
|
|
wait_for_node() {
|
|
|
|
declare pid=$1
|
|
|
|
|
|
|
|
declare ok=true
|
|
|
|
wait "$pid" || ok=false
|
|
|
|
cat "log-$pid.txt"
|
|
|
|
if ! $ok; then
|
|
|
|
echo ^^^ +++
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
}
|
|
|
|
|
2018-07-20 08:50:08 -07:00
|
|
|
echo "Leader node:"
|
|
|
|
findVms "name=$leaderName"
|
|
|
|
[[ ${#vmlist[@]} = 1 ]] || {
|
|
|
|
echo "Unable to find $leaderName"
|
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
|
2018-07-20 17:01:36 -07:00
|
|
|
echo "Client node:"
|
|
|
|
findVms "name=$leaderName-client"
|
|
|
|
clientVm=
|
|
|
|
if [[ ${#vmlist[@]} = 2 ]]; then
|
|
|
|
clientVm=${vmlist[1]}
|
|
|
|
unset 'vmlist[1]'
|
|
|
|
fi
|
|
|
|
|
2018-07-20 08:50:08 -07:00
|
|
|
echo "Validator nodes:"
|
|
|
|
findVms "name~^$leaderName-validator-"
|
|
|
|
|
2018-07-18 20:52:14 -07:00
|
|
|
if ! $ROLLING_UPDATE; then
|
|
|
|
count=1
|
|
|
|
for info in "${vmlist[@]}"; do
|
|
|
|
nodePosition="($count/${#vmlist[*]})"
|
|
|
|
vmName=${info%:*}
|
|
|
|
vmZone=${info#*:}
|
|
|
|
echo "--- Shutting down $vmName in zone $vmZone $nodePosition"
|
|
|
|
gcloud compute ssh "$vmName" --zone "$vmZone" \
|
|
|
|
--ssh-flag="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" \
|
2018-07-20 17:01:36 -07:00
|
|
|
--command="sudo snap remove solana" &
|
2018-07-18 20:52:14 -07:00
|
|
|
|
2018-07-19 10:11:04 -07:00
|
|
|
if [[ $((count % 5)) = 0 ]]; then
|
2018-07-18 20:52:14 -07:00
|
|
|
# Slow down deployment to avoid triggering GCP login
|
|
|
|
# quota limits (each |ssh| counts as a login)
|
|
|
|
sleep 3
|
|
|
|
fi
|
|
|
|
|
|
|
|
count=$((count + 1))
|
|
|
|
done
|
|
|
|
|
|
|
|
wait
|
|
|
|
fi
|
2018-07-17 19:39:43 -07:00
|
|
|
|
2018-07-20 17:01:36 -07:00
|
|
|
|
|
|
|
client_run() {
|
|
|
|
declare message=$1
|
|
|
|
declare cmd=$2
|
|
|
|
[[ -n $clientVm ]] || return 0;
|
|
|
|
vmName=${clientVm%:*}
|
|
|
|
vmZone=${clientVm#*:}
|
|
|
|
echo "--- $message $vmName in zone $vmZone"
|
|
|
|
gcloud compute ssh "$vmName" --zone "$vmZone" \
|
|
|
|
--ssh-flag="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null" \
|
|
|
|
--command="$cmd"
|
|
|
|
}
|
|
|
|
|
|
|
|
client_run \
|
|
|
|
"Shutting down" \
|
|
|
|
"\
|
|
|
|
set -x;
|
|
|
|
tmux list-sessions; \
|
|
|
|
tmux capture-pane -t solana -p; \
|
|
|
|
tmux kill-session -t solana; \
|
|
|
|
sudo snap remove solana; \
|
|
|
|
"
|
|
|
|
|
2018-07-20 08:50:08 -07:00
|
|
|
echo "--- Refreshing leader"
|
2018-07-13 12:17:39 -07:00
|
|
|
leader=true
|
2018-07-17 19:39:43 -07:00
|
|
|
pids=()
|
2018-07-17 20:19:52 -07:00
|
|
|
count=1
|
2018-07-12 19:47:07 -07:00
|
|
|
for info in "${vmlist[@]}"; do
|
2018-07-17 20:19:52 -07:00
|
|
|
nodePosition="($count/${#vmlist[*]})"
|
|
|
|
|
2018-07-12 19:47:07 -07:00
|
|
|
vmName=${info%:*}
|
|
|
|
vmZone=${info#*:}
|
2018-07-17 20:19:52 -07:00
|
|
|
echo "Starting refresh for $vmName $nodePosition"
|
2018-07-12 19:47:07 -07:00
|
|
|
|
|
|
|
(
|
2018-07-15 09:23:35 -07:00
|
|
|
SECONDS=0
|
2018-07-17 20:19:52 -07:00
|
|
|
echo "--- $vmName in zone $vmZone $nodePosition"
|
2018-07-18 08:10:03 -07:00
|
|
|
commonNodeConfig="\
|
|
|
|
rust-log=$RUST_LOG \
|
|
|
|
default-metrics-rate=$SOLANA_DEFAULT_METRICS_RATE \
|
|
|
|
metrics-config=$SOLANA_METRICS_CONFIG \
|
|
|
|
"
|
2018-07-13 12:17:39 -07:00
|
|
|
if $leader; then
|
2018-07-18 08:10:03 -07:00
|
|
|
nodeConfig="mode=leader+drone $commonNodeConfig"
|
2018-07-16 16:19:49 -07:00
|
|
|
if [[ -n $SOLANA_CUDA ]]; then
|
|
|
|
nodeConfig="$nodeConfig enable-cuda=1"
|
|
|
|
fi
|
2018-07-13 12:17:39 -07:00
|
|
|
else
|
2018-07-18 08:10:03 -07:00
|
|
|
nodeConfig="mode=validator leader-address=$publicIp $commonNodeConfig"
|
2018-07-13 12:17:39 -07:00
|
|
|
fi
|
2018-07-15 09:23:35 -07:00
|
|
|
|
2018-07-12 19:47:07 -07:00
|
|
|
set -x
|
|
|
|
gcloud compute ssh "$vmName" --zone "$vmZone" \
|
|
|
|
--ssh-flag="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -t" \
|
2018-07-17 20:19:52 -07:00
|
|
|
--command="\
|
|
|
|
set -ex; \
|
|
|
|
logmarker='solana deploy $(date)/$RANDOM'; \
|
|
|
|
sudo snap remove solana; \
|
|
|
|
logger \$logmarker; \
|
|
|
|
sudo snap install solana --$SOLANA_SNAP_CHANNEL --devmode; \
|
|
|
|
sudo snap set solana $nodeConfig; \
|
|
|
|
snap info solana; \
|
|
|
|
echo Slight delay to get more syslog output; \
|
|
|
|
sleep 2; \
|
|
|
|
sudo grep -Pzo \"\$logmarker(.|\\n)*\" /var/log/syslog \
|
|
|
|
"
|
2018-07-15 09:23:35 -07:00
|
|
|
echo "Succeeded in ${SECONDS} seconds"
|
2018-07-12 20:08:35 -07:00
|
|
|
) > "log-$vmName.txt" 2>&1 &
|
2018-07-17 19:39:43 -07:00
|
|
|
pid=$!
|
|
|
|
# Rename log file so it can be discovered later by $pid
|
|
|
|
mv "log-$vmName.txt" "log-$pid.txt"
|
2018-07-13 12:17:39 -07:00
|
|
|
|
|
|
|
if $leader; then
|
|
|
|
echo Waiting for leader...
|
|
|
|
# Wait for the leader to initialize before starting the validators
|
|
|
|
# TODO: Remove this limitation eventually.
|
2018-07-17 19:39:43 -07:00
|
|
|
wait_for_node "$pid"
|
2018-07-15 09:23:35 -07:00
|
|
|
|
|
|
|
echo "--- Refreshing validators"
|
|
|
|
else
|
2018-07-17 20:19:52 -07:00
|
|
|
# Slow down deployment to ~20 machines a minute to avoid triggering GCP login
|
|
|
|
# quota limits (each |ssh| counts as a login)
|
2018-07-17 19:39:43 -07:00
|
|
|
sleep 3
|
2018-07-15 09:23:35 -07:00
|
|
|
|
2018-07-17 19:39:43 -07:00
|
|
|
pids+=("$pid")
|
2018-07-13 12:17:39 -07:00
|
|
|
fi
|
|
|
|
leader=false
|
2018-07-17 20:19:52 -07:00
|
|
|
count=$((count + 1))
|
2018-07-12 19:47:07 -07:00
|
|
|
done
|
|
|
|
|
2018-07-15 09:23:35 -07:00
|
|
|
echo --- Waiting for validators
|
2018-07-17 19:39:43 -07:00
|
|
|
for pid in "${pids[@]}"; do
|
|
|
|
wait_for_node "$pid"
|
2018-07-12 20:08:35 -07:00
|
|
|
done
|
|
|
|
|
2018-07-16 09:42:54 -07:00
|
|
|
echo "--- $publicUrl sanity test"
|
2018-07-20 09:23:08 -07:00
|
|
|
(
|
|
|
|
set -x
|
|
|
|
USE_SNAP=1 ci/testnet-sanity.sh $publicUrl ${#vmlist[@]}
|
|
|
|
)
|
2018-07-13 22:13:13 -07:00
|
|
|
|
2018-07-20 17:01:36 -07:00
|
|
|
client_run \
|
|
|
|
"Starting client on " \
|
|
|
|
"\
|
|
|
|
set -x;
|
|
|
|
sudo snap install solana --$SOLANA_SNAP_CHANNEL --devmode; \
|
|
|
|
snap info solana; \
|
|
|
|
tmux new -s solana -d \" \
|
2018-07-21 15:40:42 -07:00
|
|
|
/snap/bin/solana.bench-tps $SOLANA_NET_URL ${#vmlist[@]} --loop -s 3600 2>&1 | tee /tmp/solana.log; \
|
|
|
|
echo Error: bench-tps should never exit; \
|
2018-07-20 17:01:36 -07:00
|
|
|
bash \
|
|
|
|
\"; \
|
|
|
|
sleep 2; \
|
2018-07-20 17:45:13 -07:00
|
|
|
tmux capture-pane -t solana -p -S -100; \
|
|
|
|
tail /tmp/solana.log; \
|
2018-07-20 17:01:36 -07:00
|
|
|
"
|
|
|
|
|
2018-07-12 19:47:07 -07:00
|
|
|
exit 0
|