Rewrite ci/testnet-{deploy,sanity}.sh in terms of net/ primitives
This commit is contained in:
parent
e175c9dea9
commit
66ff602659
|
@ -1,484 +1,103 @@
|
||||||
#!/bin/bash -e
|
#!/bin/bash -e
|
||||||
#
|
|
||||||
# Deploys the Solana software running on the testnet full nodes
|
|
||||||
#
|
|
||||||
# This script must be run by a user/machine that has successfully authenticated
|
|
||||||
# with GCP and has sufficient permission.
|
|
||||||
#
|
|
||||||
|
|
||||||
here=$(dirname "$0")
|
cd "$(dirname "$0")"/..
|
||||||
metrics_write_datapoint="$here"/../scripts/metrics-write-datapoint.sh
|
|
||||||
|
|
||||||
# shellcheck source=scripts/gcloud.sh
|
zone=
|
||||||
source "$here"/../scripts/gcloud.sh
|
leaderAddress=
|
||||||
|
clientNodeCount=0
|
||||||
|
publicNetwork=false
|
||||||
|
snapChannel=edge
|
||||||
|
delete=false
|
||||||
|
|
||||||
# TODO: Switch over to rolling updates
|
usage() {
|
||||||
ROLLING_UPDATE=false
|
exitcode=0
|
||||||
#ROLLING_UPDATE=true
|
if [[ -n "$1" ]]; then
|
||||||
|
exitcode=1
|
||||||
|
echo "Error: $*"
|
||||||
|
fi
|
||||||
|
cat <<EOF
|
||||||
|
usage: $0 [name] [zone] [options...]
|
||||||
|
|
||||||
if [[ -z $SOLANA_METRICS_CONFIG ]]; then
|
Deploys a CD testnet
|
||||||
echo Error: SOLANA_METRICS_CONFIG environment variable is unset
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
# shellcheck source=scripts/configure-metrics.sh
|
name - name of the network
|
||||||
source "$here"/../scripts/configure-metrics.sh
|
zone - GCE to deploy the network into
|
||||||
|
|
||||||
# Default to edge channel. To select the beta channel:
|
options:
|
||||||
# export SOLANA_SNAP_CHANNEL=beta
|
-s edge|beta|stable - Deploy the specified Snap release channel
|
||||||
if [[ -z $SOLANA_SNAP_CHANNEL ]]; then
|
(default: $snapChannel)
|
||||||
SOLANA_SNAP_CHANNEL=edge
|
-c [number] - Number of client nodes (default: $clientNodeCount)
|
||||||
fi
|
-P - Use public network IP addresses (default: $publicNetwork)
|
||||||
|
-a [address] - Set the leader node's external IP address to this GCE address
|
||||||
|
-d - Delete the network
|
||||||
|
|
||||||
# Select default network URL based on SOLANA_SNAP_CHANNEL if SOLANA_NET_ENTRYPOINT is
|
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
|
||||||
# unspecified
|
metrics
|
||||||
if [[ -z $SOLANA_NET_ENTRYPOINT ]]; then
|
EOF
|
||||||
case $SOLANA_SNAP_CHANNEL in
|
exit $exitcode
|
||||||
edge)
|
}
|
||||||
SOLANA_NET_ENTRYPOINT=master.testnet.solana.com
|
|
||||||
unset SOLANA_NET_NAME
|
netName=$1
|
||||||
|
zone=$2
|
||||||
|
[[ -n $netName ]] || usage
|
||||||
|
[[ -n $zone ]] || usage "Zone not specified"
|
||||||
|
shift 2
|
||||||
|
|
||||||
|
while getopts "h?p:Pc:s:a:d" opt; do
|
||||||
|
case $opt in
|
||||||
|
h | \?)
|
||||||
|
usage
|
||||||
;;
|
;;
|
||||||
beta)
|
P)
|
||||||
SOLANA_NET_ENTRYPOINT=testnet.solana.com
|
publicNetwork=true
|
||||||
unset SOLANA_NET_NAME
|
;;
|
||||||
|
c)
|
||||||
|
clientNodeCount=$OPTARG
|
||||||
|
;;
|
||||||
|
s)
|
||||||
|
case $OPTARG in
|
||||||
|
edge|beta|stable)
|
||||||
|
snapChannel=$OPTARG
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo Error: Unknown SOLANA_SNAP_CHANNEL=$SOLANA_SNAP_CHANNEL
|
usage "Invalid snap channel: $OPTARG"
|
||||||
exit 1
|
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
;;
|
||||||
|
a)
|
||||||
|
leaderAddress=$OPTARG
|
||||||
|
;;
|
||||||
|
d)
|
||||||
|
delete=true
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
usage "Error: unhandled option: $opt"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
|
||||||
|
gce_create_args=(
|
||||||
|
-a "$leaderAddress"
|
||||||
|
-c "$clientNodeCount"
|
||||||
|
-g
|
||||||
|
-p "$netName"
|
||||||
|
-z "$zone"
|
||||||
|
)
|
||||||
|
|
||||||
|
if $publicNetwork; then
|
||||||
|
gce_create_args+=(-P)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -z $SOLANA_NET_NAME ]]; then
|
set -x
|
||||||
SOLANA_NET_NAME=${SOLANA_NET_ENTRYPOINT//./-}
|
|
||||||
|
time net/gce.sh delete -p "$netName" -y
|
||||||
|
if $delete; then
|
||||||
|
exit 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
: ${SOLANA_NET_NAME:?$SOLANA_NET_ENTRYPOINT}
|
time net/gce.sh create "${gce_create_args[@]}"
|
||||||
netBasename=${SOLANA_NET_NAME/-testnet-solana-com/}
|
net/init-metrics.sh -e
|
||||||
if [[ $netBasename != testnet ]]; then
|
time net/net.sh start -s "$snapChannel"
|
||||||
netBasename="testnet-$netBasename"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Figure installation command
|
|
||||||
SNAP_INSTALL_CMD="\
|
|
||||||
for i in {1..3}; do \
|
|
||||||
sudo snap install solana --$SOLANA_SNAP_CHANNEL --devmode && break;
|
|
||||||
sleep 1; \
|
|
||||||
done \
|
|
||||||
"
|
|
||||||
LOCAL_SNAP=$1
|
|
||||||
if [[ -n $LOCAL_SNAP ]]; then
|
|
||||||
if [[ ! -f $LOCAL_SNAP ]]; then
|
|
||||||
echo "Error: $LOCAL_SNAP is not a file"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
SNAP_INSTALL_CMD="sudo snap install ~/solana_local.snap --devmode --dangerous"
|
|
||||||
fi
|
|
||||||
SNAP_INSTALL_CMD="sudo snap remove solana; $SNAP_INSTALL_CMD"
|
|
||||||
|
|
||||||
EARLYOOM_INSTALL_CMD="\
|
|
||||||
wget --retry-connrefused --waitretry=1 \
|
|
||||||
--read-timeout=20 --timeout=15 --tries=5 \
|
|
||||||
-O install-earlyoom.sh \
|
|
||||||
https://raw.githubusercontent.com/solana-labs/solana/master/scripts/install-earlyoom.sh; \
|
|
||||||
bash install-earlyoom.sh \
|
|
||||||
"
|
|
||||||
SNAP_INSTALL_CMD="$EARLYOOM_INSTALL_CMD; $SNAP_INSTALL_CMD"
|
|
||||||
|
|
||||||
# `export SKIP_INSTALL=1` to reset the network without reinstalling the snap
|
|
||||||
if [[ -n $SKIP_INSTALL ]]; then
|
|
||||||
SNAP_INSTALL_CMD="echo Install skipped"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "+++ Configuration for $netBasename"
|
|
||||||
publicUrl="$SOLANA_NET_ENTRYPOINT"
|
|
||||||
if [[ $publicUrl = testnet.solana.com ]]; then
|
|
||||||
publicIp="" # Use default value
|
|
||||||
else
|
|
||||||
publicIp=$(dig +short $publicUrl | head -n1)
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Network name: $SOLANA_NET_NAME"
|
|
||||||
echo "Network entry point URL: $publicUrl ($publicIp)"
|
|
||||||
echo "Snap channel: $SOLANA_SNAP_CHANNEL"
|
|
||||||
echo "Install command: $SNAP_INSTALL_CMD"
|
|
||||||
echo "Setup args: $SOLANA_SETUP_ARGS"
|
|
||||||
[[ -z $LOCAL_SNAP ]] || echo "Local snap: $LOCAL_SNAP"
|
|
||||||
|
|
||||||
vmlist=() # Each array element is formatted as "class:vmName:vmZone:vmPublicIp"
|
|
||||||
|
|
||||||
vm_exec() {
|
|
||||||
declare vmName=$1
|
|
||||||
declare vmZone=$2
|
|
||||||
declare vmPublicIp=$3
|
|
||||||
declare message=$4
|
|
||||||
declare cmd=$5
|
|
||||||
|
|
||||||
echo "--- $message $vmName in zone $vmZone ($vmPublicIp)"
|
|
||||||
ssh -o BatchMode=yes -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
|
|
||||||
testnet-deploy@"$vmPublicIp" "$cmd"
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# vm_foreach [cmd] [extra args to cmd]
|
|
||||||
# where
|
|
||||||
# cmd - the command to execute on each VM
|
|
||||||
# The command will receive three fixed arguments, followed by any
|
|
||||||
# additionl arguments supplied to vm_foreach:
|
|
||||||
# vmName - GCP name of the VM
|
|
||||||
# vmZone - The GCP zone the VM is located in
|
|
||||||
# vmPublicIp - The public IP address of this VM
|
|
||||||
# vmClass - The 'class' of this VM
|
|
||||||
# count - Monotonically increasing count for each
|
|
||||||
# invocation of cmd, starting at 1
|
|
||||||
# ... - Extra args to cmd..
|
|
||||||
#
|
|
||||||
#
|
|
||||||
vm_foreach() {
|
|
||||||
declare cmd=$1
|
|
||||||
shift
|
|
||||||
|
|
||||||
declare count=1
|
|
||||||
for info in "${vmlist[@]}"; do
|
|
||||||
declare vmClass vmName vmZone vmPublicIp
|
|
||||||
IFS=: read -r vmClass vmName vmZone vmPublicIp _ < <(echo "$info")
|
|
||||||
|
|
||||||
eval "$cmd" "$vmName" "$vmZone" "$vmPublicIp" "$vmClass" "$count" "$@"
|
|
||||||
count=$((count + 1))
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# vm_foreach_in_class [class] [cmd]
|
|
||||||
# where
|
|
||||||
# class - the desired VM class to operate on
|
|
||||||
# cmd - the command to execute on each VM in the desired class.
|
|
||||||
# The command will receive three arguments:
|
|
||||||
# vmName - GCP name of the VM
|
|
||||||
# vmZone - The GCP zone the VM is located in
|
|
||||||
# vmPublicIp - The public IP address of this VM
|
|
||||||
# count - Monotonically increasing count for each
|
|
||||||
# invocation of cmd, starting at 1
|
|
||||||
#
|
|
||||||
#
|
|
||||||
_run_cmd_if_class() {
|
|
||||||
declare vmName=$1
|
|
||||||
declare vmZone=$2
|
|
||||||
declare vmPublicIp=$3
|
|
||||||
declare vmClass=$4
|
|
||||||
declare count=$5
|
|
||||||
declare class=$6
|
|
||||||
declare cmd=$7
|
|
||||||
if [[ $class = "$vmClass" ]]; then
|
|
||||||
eval "$cmd" "$vmName" "$vmZone" "$vmPublicIp" "$count"
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
vm_foreach_in_class() {
|
|
||||||
declare class=$1
|
|
||||||
declare cmd=$2
|
|
||||||
vm_foreach _run_cmd_if_class "$1" "$2"
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
|
||||||
# Load all VMs matching the specified filter and tag them with the specified
|
|
||||||
# class into the `vmlist` array.
|
|
||||||
findVms() {
|
|
||||||
declare class="$1"
|
|
||||||
declare filter="$2"
|
|
||||||
|
|
||||||
gcloud_FindInstances "$filter" show
|
|
||||||
vmlist+=("${instances[@]/#/$class:}")
|
|
||||||
}
|
|
||||||
|
|
||||||
wait_for_pids() {
|
|
||||||
echo "--- Waiting for $*"
|
|
||||||
for pid in "${pids[@]}"; do
|
|
||||||
declare ok=true
|
|
||||||
wait "$pid" || ok=false
|
|
||||||
cat "log-$pid.txt"
|
|
||||||
if ! $ok; then
|
|
||||||
echo ^^^ +++
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
rm "log-$pid.txt"
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
delete_unreachable_validators() {
|
|
||||||
declare vmName=$1
|
|
||||||
declare vmZone=$2
|
|
||||||
declare vmPublicIp=$3
|
|
||||||
|
|
||||||
touch "log-$vmName.txt"
|
|
||||||
(
|
|
||||||
SECONDS=0
|
|
||||||
if ! vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Checking $vmName" uptime; then
|
|
||||||
echo "^^^ +++"
|
|
||||||
|
|
||||||
# Validators are managed by a Compute Engine Instance Group, so deleting
|
|
||||||
# one will just cause a new one to be spawned.
|
|
||||||
echo "Warning: $vmName is unreachable, deleting it"
|
|
||||||
gcloud_DeleteInstances "$vmName"
|
|
||||||
fi
|
|
||||||
echo "validator checked in ${SECONDS} seconds"
|
|
||||||
) >> "log-$vmName.txt" 2>&1 &
|
|
||||||
declare pid=$!
|
|
||||||
|
|
||||||
# Rename log file so it can be discovered later by $pid
|
|
||||||
mv "log-$vmName.txt" "log-$pid.txt"
|
|
||||||
pids+=("$pid")
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
echo "Validator nodes (unverified):"
|
|
||||||
findVms validator "name~^$SOLANA_NET_NAME-validator-"
|
|
||||||
pids=()
|
|
||||||
vm_foreach_in_class validator delete_unreachable_validators
|
|
||||||
wait_for_pids validator sanity check
|
|
||||||
vmlist=()
|
|
||||||
|
|
||||||
echo "Leader node:"
|
|
||||||
findVms leader "name=$SOLANA_NET_NAME"
|
|
||||||
[[ ${#vmlist[@]} = 1 ]] || {
|
|
||||||
echo "Unable to find $SOLANA_NET_NAME"
|
|
||||||
exit 1
|
|
||||||
}
|
|
||||||
|
|
||||||
echo "Client node(s):"
|
|
||||||
findVms client "name~^$SOLANA_NET_NAME-client"
|
|
||||||
|
|
||||||
echo "Validator nodes:"
|
|
||||||
findVms validator "name~^$SOLANA_NET_NAME-validator-"
|
|
||||||
|
|
||||||
fullnode_count=0
|
|
||||||
inc_fullnode_count() {
|
|
||||||
fullnode_count=$((fullnode_count + 1))
|
|
||||||
}
|
|
||||||
vm_foreach_in_class leader inc_fullnode_count
|
|
||||||
vm_foreach_in_class validator inc_fullnode_count
|
|
||||||
|
|
||||||
# Add "network stopping" datapoint
|
|
||||||
$metrics_write_datapoint "testnet-deploy,name=$netBasename stop=1"
|
|
||||||
|
|
||||||
client_start() {
|
|
||||||
declare vmName=$1
|
|
||||||
declare vmZone=$2
|
|
||||||
declare vmPublicIp=$3
|
|
||||||
declare count=$4
|
|
||||||
|
|
||||||
nodeConfig="\
|
|
||||||
rust-log=$RUST_LOG \
|
|
||||||
default-metrics-rate=$SOLANA_DEFAULT_METRICS_RATE \
|
|
||||||
metrics-config=$SOLANA_METRICS_CONFIG \
|
|
||||||
setup-args=$SOLANA_SETUP_ARGS \
|
|
||||||
leader-ip=$publicIp \
|
|
||||||
"
|
|
||||||
|
|
||||||
vm_exec "$vmName" "$vmZone" "$vmPublicIp" \
|
|
||||||
"Starting client $count:" \
|
|
||||||
"\
|
|
||||||
set -x;
|
|
||||||
sudo snap set solana $nodeConfig; \
|
|
||||||
snap info solana; \
|
|
||||||
sudo snap get solana; \
|
|
||||||
threadCount=\$(nproc); \
|
|
||||||
if [[ \$threadCount -gt 4 ]]; then threadCount=4; fi; \
|
|
||||||
tmux kill-session -t solana; \
|
|
||||||
tmux new -s solana -d \" \
|
|
||||||
set -x; \
|
|
||||||
sudo rm /tmp/solana.log; \
|
|
||||||
while : ; do \
|
|
||||||
/snap/bin/solana.bench-tps --num-nodes $fullnode_count --seconds 600 --sustained --threads \$threadCount 2>&1 | tee -a /tmp/solana.log; \
|
|
||||||
echo 'https://metrics.solana.com:8086/write?db=${INFLUX_DATABASE}&u=${INFLUX_USERNAME}&p=${INFLUX_PASSWORD}' \
|
|
||||||
| xargs curl --max-time 5 -XPOST --data-binary 'testnet-deploy,name=$netBasename clientexit=1'; \
|
|
||||||
echo Error: bench-tps should never exit | tee -a /tmp/solana.log; \
|
|
||||||
done; \
|
|
||||||
bash \
|
|
||||||
\"; \
|
|
||||||
sleep 2; \
|
|
||||||
tmux capture-pane -t solana -p -S -100; \
|
|
||||||
tail /tmp/solana.log; \
|
|
||||||
"
|
|
||||||
}
|
|
||||||
|
|
||||||
client_stop() {
|
|
||||||
declare vmName=$1
|
|
||||||
declare vmZone=$2
|
|
||||||
declare vmPublicIp=$3
|
|
||||||
declare count=$4
|
|
||||||
|
|
||||||
touch "log-$vmName.txt"
|
|
||||||
(
|
|
||||||
SECONDS=0
|
|
||||||
vm_exec "$vmName" "$vmZone" "$vmPublicIp" \
|
|
||||||
"Stopping client $vmName ($count):" \
|
|
||||||
"\
|
|
||||||
set -x;
|
|
||||||
tmux list-sessions; \
|
|
||||||
tmux capture-pane -t solana -p; \
|
|
||||||
tmux kill-session -t solana; \
|
|
||||||
$SNAP_INSTALL_CMD; \
|
|
||||||
sudo snap set solana metrics-config=$SOLANA_METRICS_CONFIG \
|
|
||||||
rust-log=$RUST_LOG \
|
|
||||||
default-metrics-rate=$SOLANA_DEFAULT_METRICS_RATE \
|
|
||||||
; \
|
|
||||||
"
|
|
||||||
echo "Client stopped in ${SECONDS} seconds"
|
|
||||||
) >> "log-$vmName.txt" 2>&1 &
|
|
||||||
declare pid=$!
|
|
||||||
|
|
||||||
# Rename log file so it can be discovered later by $pid
|
|
||||||
mv "log-$vmName.txt" "log-$pid.txt"
|
|
||||||
pids+=("$pid")
|
|
||||||
}
|
|
||||||
|
|
||||||
fullnode_start() {
|
|
||||||
declare class=$1
|
|
||||||
declare vmName=$2
|
|
||||||
declare vmZone=$3
|
|
||||||
declare vmPublicIp=$4
|
|
||||||
declare count=$5
|
|
||||||
|
|
||||||
touch "log-$vmName.txt"
|
|
||||||
(
|
|
||||||
SECONDS=0
|
|
||||||
commonNodeConfig="\
|
|
||||||
rust-log=$RUST_LOG \
|
|
||||||
default-metrics-rate=$SOLANA_DEFAULT_METRICS_RATE \
|
|
||||||
metrics-config=$SOLANA_METRICS_CONFIG \
|
|
||||||
setup-args=$SOLANA_SETUP_ARGS \
|
|
||||||
leader-ip=$publicIp \
|
|
||||||
"
|
|
||||||
if [[ $class = leader ]]; then
|
|
||||||
nodeConfig="mode=leader+drone $commonNodeConfig"
|
|
||||||
if [[ -n $SOLANA_CUDA ]]; then
|
|
||||||
nodeConfig="$nodeConfig enable-cuda=1"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
nodeConfig="mode=validator $commonNodeConfig"
|
|
||||||
fi
|
|
||||||
|
|
||||||
vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Starting $class $count:" \
|
|
||||||
"\
|
|
||||||
set -ex; \
|
|
||||||
logmarker='solana deploy $(date)/$RANDOM'; \
|
|
||||||
logger \"\$logmarker\"; \
|
|
||||||
$SNAP_INSTALL_CMD; \
|
|
||||||
sudo snap set solana $nodeConfig; \
|
|
||||||
snap info solana; \
|
|
||||||
sudo snap get solana; \
|
|
||||||
echo Slight delay to get more syslog output; \
|
|
||||||
sleep 2; \
|
|
||||||
sudo grep -Pzo \"\$logmarker(.|\\n)*\" /var/log/syslog \
|
|
||||||
"
|
|
||||||
echo "Succeeded in ${SECONDS} seconds"
|
|
||||||
) >> "log-$vmName.txt" 2>&1 &
|
|
||||||
declare pid=$!
|
|
||||||
|
|
||||||
# Rename log file so it can be discovered later by $pid
|
|
||||||
mv "log-$vmName.txt" "log-$pid.txt"
|
|
||||||
|
|
||||||
pids+=("$pid")
|
|
||||||
}
|
|
||||||
|
|
||||||
leader_start() {
|
|
||||||
fullnode_start leader "$@"
|
|
||||||
}
|
|
||||||
|
|
||||||
validator_start() {
|
|
||||||
fullnode_start validator "$@"
|
|
||||||
}
|
|
||||||
|
|
||||||
fullnode_stop() {
|
|
||||||
declare vmName=$1
|
|
||||||
declare vmZone=$2
|
|
||||||
declare vmPublicIp=$3
|
|
||||||
declare count=$4
|
|
||||||
|
|
||||||
touch "log-$vmName.txt"
|
|
||||||
(
|
|
||||||
SECONDS=0
|
|
||||||
# Try to ping the machine first. When a machine (validator) is restarted,
|
|
||||||
# there can be a delay between when the instance is reported as RUNNING and when
|
|
||||||
# it's reachable over the network
|
|
||||||
timeout 30s bash -c "set -o pipefail; until ping -c 3 $vmPublicIp | tr - _; do echo .; done"
|
|
||||||
vm_exec "$vmName" "$vmZone" "$vmPublicIp" "Shutting down" "\
|
|
||||||
if snap list solana; then \
|
|
||||||
sudo snap set solana mode=; \
|
|
||||||
fi"
|
|
||||||
echo "Succeeded in ${SECONDS} seconds"
|
|
||||||
) >> "log-$vmName.txt" 2>&1 &
|
|
||||||
declare pid=$!
|
|
||||||
|
|
||||||
# Rename log file so it can be discovered later by $pid
|
|
||||||
mv "log-$vmName.txt" "log-$pid.txt"
|
|
||||||
|
|
||||||
pids+=("$pid")
|
|
||||||
}
|
|
||||||
|
|
||||||
if [[ -n $LOCAL_SNAP ]]; then
|
|
||||||
echo "--- Transferring $LOCAL_SNAP to node(s)"
|
|
||||||
|
|
||||||
transfer_local_snap() {
|
|
||||||
declare vmName=$1
|
|
||||||
declare vmZone=$2
|
|
||||||
declare vmPublicIp=$3
|
|
||||||
declare vmClass=$4
|
|
||||||
declare count=$5
|
|
||||||
|
|
||||||
echo "--- $vmName in zone $vmZone ($count)"
|
|
||||||
SECONDS=0
|
|
||||||
scp -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
|
|
||||||
"$LOCAL_SNAP" testnet-deploy@"$vmPublicIp":solana_local.snap
|
|
||||||
echo "Succeeded in ${SECONDS} seconds"
|
|
||||||
}
|
|
||||||
vm_foreach transfer_local_snap
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "--- Stopping client node(s)"
|
|
||||||
pids=()
|
|
||||||
vm_foreach_in_class client client_stop
|
|
||||||
client_stop_pids=("${pids[@]}")
|
|
||||||
|
|
||||||
if ! $ROLLING_UPDATE; then
|
|
||||||
pids=()
|
|
||||||
echo "--- Shutting down all full nodes"
|
|
||||||
vm_foreach_in_class leader fullnode_stop
|
|
||||||
vm_foreach_in_class validator fullnode_stop
|
|
||||||
wait_for_pids fullnode shutdown
|
|
||||||
fi
|
|
||||||
|
|
||||||
pids=()
|
|
||||||
echo --- Starting leader node
|
|
||||||
vm_foreach_in_class leader leader_start
|
|
||||||
wait_for_pids leader
|
|
||||||
|
|
||||||
pids=()
|
|
||||||
echo --- Starting validator nodes
|
|
||||||
vm_foreach_in_class validator validator_start
|
|
||||||
wait_for_pids validators
|
|
||||||
|
|
||||||
echo "--- $publicUrl sanity test"
|
|
||||||
if [[ -z $CI ]]; then
|
|
||||||
# TODO: ssh into a node and run testnet-sanity.sh there. It's not safe to
|
|
||||||
# assume the correct Snap is installed on the current non-CI machine
|
|
||||||
echo Skipped for non-CI deploy
|
|
||||||
snapVersion=unknown
|
|
||||||
else
|
|
||||||
(
|
|
||||||
set -x
|
|
||||||
USE_SNAP=1 ci/testnet-sanity.sh $publicUrl $fullnode_count
|
|
||||||
)
|
|
||||||
IFS=\ read -r _ snapVersion _ < <(snap info solana | grep "^installed:")
|
|
||||||
snapVersion=${snapVersion/0+git./}
|
|
||||||
fi
|
|
||||||
|
|
||||||
pids=("${client_stop_pids[@]}")
|
|
||||||
wait_for_pids client shutdown
|
|
||||||
vm_foreach_in_class client client_start
|
|
||||||
|
|
||||||
# Add "network started" datapoint
|
|
||||||
$metrics_write_datapoint "testnet-deploy,name=$netBasename start=1,version=\"$snapVersion\""
|
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|
|
@ -1,78 +1,32 @@
|
||||||
#!/bin/bash -e
|
#!/bin/bash -e
|
||||||
#
|
|
||||||
# Perform a quick sanity test on the specific testnet
|
|
||||||
#
|
|
||||||
|
|
||||||
cd "$(dirname "$0")/.."
|
cd "$(dirname "$0")/.."
|
||||||
source scripts/configure-metrics.sh
|
|
||||||
|
|
||||||
NET_URL=$1
|
usage() {
|
||||||
if [[ -z $NET_URL ]]; then
|
exitcode=0
|
||||||
NET_URL=testnet.solana.com
|
if [[ -n "$1" ]]; then
|
||||||
fi
|
exitcode=1
|
||||||
|
echo "Error: $*"
|
||||||
EXPECTED_NODE_COUNT=$2
|
|
||||||
if [[ -z $EXPECTED_NODE_COUNT ]]; then
|
|
||||||
EXPECTED_NODE_COUNT=50
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "--- $NET_URL: verify ledger"
|
|
||||||
if [[ -z $NO_LEDGER_VERIFY ]]; then
|
|
||||||
if [[ -d /var/snap/solana/current/config/ledger ]]; then
|
|
||||||
# Note: here we assume this script is actually running on the leader node...
|
|
||||||
(
|
|
||||||
set -x
|
|
||||||
rm -rf /var/tmp/ledger-verify
|
|
||||||
cp -r /var/snap/solana/current/config/ledger /var/tmp/ledger-verify
|
|
||||||
solana.ledger-tool --ledger /var/tmp/ledger-verify verify
|
|
||||||
)
|
|
||||||
else
|
|
||||||
echo "^^^ +++"
|
|
||||||
echo "Ledger verify skipped"
|
|
||||||
fi
|
fi
|
||||||
else
|
cat <<EOF
|
||||||
echo "^^^ +++"
|
usage: $0 [name]
|
||||||
echo "Ledger verify skipped (NO_LEDGER_VERIFY defined)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "--- $NET_URL: wallet sanity"
|
Sanity check a CD testnet
|
||||||
(
|
|
||||||
set -x
|
|
||||||
multinode-demo/test/wallet-sanity.sh $NET_URL
|
|
||||||
)
|
|
||||||
|
|
||||||
echo "--- $NET_URL: node count"
|
name - name of the network
|
||||||
if [[ -n "$USE_SNAP" ]]; then
|
|
||||||
# TODO: Merge client.sh functionality into solana-bench-tps proper and
|
|
||||||
# remove this USE_SNAP case
|
|
||||||
cmd=solana.bench-tps
|
|
||||||
else
|
|
||||||
cmd=multinode-demo/client.sh
|
|
||||||
fi
|
|
||||||
|
|
||||||
(
|
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
|
||||||
set -x
|
metrics
|
||||||
$cmd --num-nodes "$EXPECTED_NODE_COUNT" --converge-only
|
EOF
|
||||||
)
|
exit $exitcode
|
||||||
|
}
|
||||||
|
|
||||||
echo "--- $NET_URL: validator sanity"
|
netName=$1
|
||||||
if [[ -z $NO_VALIDATOR_SANITY ]]; then
|
[[ -n $netName ]] || usage ""
|
||||||
(
|
|
||||||
./multinode-demo/setup.sh -t validator
|
set -x
|
||||||
set -e pipefail
|
net/gce.sh config -p "$netName"
|
||||||
timeout 10s ./multinode-demo/validator.sh "$NET_URL" 2>&1 | tee validator.log
|
net/init-metrics.sh -e
|
||||||
)
|
net/net.sh sanity ${NO_LEDGER_VERIFY:+-o noLedgerVerify}
|
||||||
wc -l validator.log
|
|
||||||
if grep -C100 panic validator.log; then
|
|
||||||
echo "^^^ +++"
|
|
||||||
echo "Panic observed"
|
|
||||||
exit 1
|
|
||||||
else
|
|
||||||
echo "Validator log looks ok"
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "^^^ +++"
|
|
||||||
echo "Validator sanity disabled (NO_VALIDATOR_SANITY defined)"
|
|
||||||
fi
|
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|
|
@ -215,7 +215,7 @@ create)
|
||||||
gcloud_CreateInstances "$prefix-validator" "$validatorNodeCount" "$zone" \
|
gcloud_CreateInstances "$prefix-validator" "$validatorNodeCount" "$zone" \
|
||||||
"$imageName" "$validatorMachineType" "$validatorBootDiskSize" "$validatorAccelerator" \
|
"$imageName" "$validatorMachineType" "$validatorBootDiskSize" "$validatorAccelerator" \
|
||||||
"$here/remote/remote-startup.sh" ""
|
"$here/remote/remote-startup.sh" ""
|
||||||
if [[ -n $clientNodeCount ]]; then
|
if [[ $clientNodeCount -gt 0 ]]; then
|
||||||
gcloud_CreateInstances "$prefix-client" "$clientNodeCount" "$zone" \
|
gcloud_CreateInstances "$prefix-client" "$clientNodeCount" "$zone" \
|
||||||
"$imageName" "$clientMachineType" "$clientBootDiskSize" "$clientAccelerator" \
|
"$imageName" "$clientMachineType" "$clientBootDiskSize" "$clientAccelerator" \
|
||||||
"$here/remote/remote-startup.sh" ""
|
"$here/remote/remote-startup.sh" ""
|
||||||
|
|
Loading…
Reference in New Issue