solana/ci/refresh-testnet.sh

110 lines
2.9 KiB
Bash
Raw Normal View History

#!/bin/bash -e
2018-07-12 19:47:07 -07:00
#
# Refreshes the Solana software running on the Testnet full nodes
#
# This script must be run by a user/machine that has successfully authenticated
# with GCP and has sufficient permission.
#
if [[ -z $SOLANA_METRICS_CONFIG ]]; then
echo Error: SOLANA_METRICS_CONFIG environment variable is unset
exit 1
fi
2018-07-16 09:42:54 -07:00
# Default to edge channel. To select the beta channel:
# export SOLANA_SNAP_CHANNEL=beta
2018-07-12 19:47:07 -07:00
if [[ -z $SOLANA_SNAP_CHANNEL ]]; then
2018-07-16 09:42:54 -07:00
SOLANA_SNAP_CHANNEL=edge
2018-07-12 19:47:07 -07:00
fi
2018-07-16 09:42:54 -07:00
case $SOLANA_SNAP_CHANNEL in
edge)
resourcePrefix=master-testnet-solana-com
;;
beta)
resourcePrefix=testnet-solana-com
;;
*)
echo Error: Unknown SOLANA_SNAP_CHANNEL=$SOLANA_SNAP_CHANNEL
exit 1
;;
esac
publicUrl=${resourcePrefix//-/.}
vmlist=("$resourcePrefix":us-west1-b) # Leader is hard coded as the first entry
validatorNamePrefix=$resourcePrefix-validator-
2018-07-12 19:47:07 -07:00
2018-07-16 09:42:54 -07:00
echo "--- Available validators for $publicUrl"
filter="name~^$validatorNamePrefix"
gcloud compute instances list --filter="$filter"
2018-07-12 19:47:07 -07:00
while read -r vmName vmZone status; do
if [[ $status != RUNNING ]]; then
echo "Warning: $vmName is not RUNNING, ignoring it."
continue
fi
vmlist+=("$vmName:$vmZone")
2018-07-16 09:42:54 -07:00
done < <(gcloud compute instances list --filter="$filter" --format 'value(name,zone,status)')
2018-07-12 20:08:35 -07:00
2018-07-16 09:42:54 -07:00
echo "--- Refreshing leader for $publicUrl"
leader=true
logfiles=()
2018-07-12 19:47:07 -07:00
for info in "${vmlist[@]}"; do
vmName=${info%:*}
vmZone=${info#*:}
2018-07-12 20:08:35 -07:00
echo "Starting refresh for $vmName"
2018-07-12 19:47:07 -07:00
(
SECONDS=0
echo "--- $vmName in zone $vmZone"
if $leader; then
nodeConfig="mode=leader+drone enable-cuda=1 metrics-config=$SOLANA_METRICS_CONFIG"
else
nodeConfig="mode=validator metrics-config=$SOLANA_METRICS_CONFIG"
fi
2018-07-12 20:08:35 -07:00
cat > "autogen-refresh-$vmName.sh" <<EOF
set -x
sudo snap remove solana
2018-07-16 09:42:54 -07:00
sudo snap install solana --$SOLANA_SNAP_CHANNEL --devmode
2018-07-13 11:27:07 -07:00
sudo snap set solana $nodeConfig
2018-07-12 20:08:35 -07:00
snap info solana
sudo snap logs solana -n200
EOF
2018-07-12 19:47:07 -07:00
set -x
2018-07-12 20:08:35 -07:00
gcloud compute scp --zone "$vmZone" "autogen-refresh-$vmName.sh" "$vmName":
2018-07-12 19:47:07 -07:00
gcloud compute ssh "$vmName" --zone "$vmZone" \
--ssh-flag="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -t" \
2018-07-12 20:08:35 -07:00
--command="bash ./autogen-refresh-$vmName.sh"
echo "Succeeded in ${SECONDS} seconds"
2018-07-12 20:08:35 -07:00
) > "log-$vmName.txt" 2>&1 &
if $leader; then
echo Waiting for leader...
# Wait for the leader to initialize before starting the validators
# TODO: Remove this limitation eventually.
wait
cat "log-$vmName.txt"
echo "--- Refreshing validators"
else
# Slow down deployment to ~30 machines a minute to avoid triggering GCP login
# quota limits (the previous |scp| and |ssh| each count as a login)
sleep 2
logfiles+=("log-$vmName.txt")
fi
leader=false
2018-07-12 19:47:07 -07:00
done
echo --- Waiting for validators
2018-07-12 20:08:35 -07:00
wait
for log in "${logfiles[@]}"; do
cat "$log"
2018-07-12 20:08:35 -07:00
done
2018-07-16 09:42:54 -07:00
echo "--- $publicUrl sanity test"
USE_SNAP=1 ./multinode-demo/test/wallet-sanity.sh $publicUrl
2018-07-12 19:47:07 -07:00
exit 0