solana/ci/testnet-deploy.sh

329 lines
7.3 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
set -e
2018-08-27 20:37:35 -07:00
cd "$(dirname "$0")"/..
2018-12-22 12:34:30 -08:00
source ci/upload-ci-artifact.sh
2018-07-16 12:05:48 -07:00
zone=
bootstrapFullNodeAddress=
bootstrapFullNodeMachineType=
clientNodeCount=0
additionalFullNodeCount=10
publicNetwork=false
2019-04-26 21:09:25 -07:00
stopNetwork=false
reuseLedger=false
skipCreate=false
skipStart=false
externalNode=false
failOnValidatorBootupFailure=true
2018-11-07 13:32:48 -08:00
tarChannelOrTag=edge
delete=false
2018-09-26 10:27:31 -07:00
enableGpu=false
bootDiskType=""
2018-12-05 17:33:32 -08:00
leaderRotation=true
blockstreamer=false
deployUpdateManifest=true
fetchLogs=true
2018-08-27 20:37:35 -07:00
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 -p network-name -C cloud -z zone1 [-z zone2] ... [-z zoneN] [options...]
Deploys a CD testnet
2019-04-05 09:41:25 -07:00
mandatory arguments:
-p [network-name] - name of the network
2019-04-05 09:41:25 -07:00
-C [cloud] - cloud provider to use (gce, ec2)
-z [zone] - cloud provider zone to deploy the network into. Must specify at least one zone
options:
2018-11-07 10:33:20 -08:00
-t edge|beta|stable|vX.Y.Z - Deploy the latest tarball release for the
specified release channel (edge|beta|stable) or release tag
(vX.Y.Z)
2018-11-07 13:32:48 -08:00
(default: $tarChannelOrTag)
-n [number] - Number of additional full nodes (default: $additionalFullNodeCount)
-c [number] - Number of client bencher nodes (default: $clientNodeCount)
-u - Include a Blockstreamer (default: $blockstreamer)
-P - Use public network IP addresses (default: $publicNetwork)
-G - Enable GPU, and set count/type of GPUs to use (e.g n1-standard-16 --accelerator count=4,type=nvidia-tesla-k80)
2018-09-26 10:27:31 -07:00
-g - Enable GPU (default: $enableGpu)
2018-12-05 17:33:32 -08:00
-b - Disable leader rotation
-a [address] - Set the bootstrap fullnode's external IP address to this GCE address
-d [disk-type] - Specify a boot disk type (default None) Use pd-ssd to get ssd on GCE.
-D - Delete the network
2019-01-08 14:28:46 -08:00
-r - Reuse existing node/ledger configuration from a
2019-02-16 10:01:27 -08:00
previous |start| (ie, don't run ./multinode-demo/setup.sh).
-x - External node. Default: false
-e - Skip create. Assume the nodes have already been created
-s - Skip start. Nodes will still be created or configured, but network software will not be started.
-S - Stop network software without tearing down nodes.
-f - Discard validator nodes that didn't bootup successfully
-w - Skip time-consuming "bells and whistles" that are
unnecessary for a high-node count demo testnet
2019-01-08 14:28:46 -08:00
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
metrics
EOF
exit $exitcode
}
2019-04-05 09:41:25 -07:00
zone=()
2018-07-12 19:47:07 -07:00
while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sfew" opt; do
case $opt in
h | \?)
usage
2018-07-20 08:50:08 -07:00
;;
p)
2019-04-05 09:41:25 -07:00
netName=$OPTARG
;;
C)
cloudProvider=$OPTARG
;;
z)
zone+=("$OPTARG")
;;
P)
publicNetwork=true
;;
2018-09-07 08:05:52 -07:00
n)
additionalFullNodeCount=$OPTARG
2018-09-07 08:05:52 -07:00
;;
c)
clientNodeCount=$OPTARG
;;
t)
case $OPTARG in
2018-11-07 10:33:20 -08:00
edge|beta|stable|v*)
2018-11-07 13:32:48 -08:00
tarChannelOrTag=$OPTARG
;;
*)
usage "Invalid release channel: $OPTARG"
;;
esac
;;
2018-12-05 17:33:32 -08:00
b)
leaderRotation=false
;;
2018-09-26 10:27:31 -07:00
g)
enableGpu=true
;;
G)
enableGpu=true
bootstrapFullNodeMachineType=$OPTARG
;;
a)
bootstrapFullNodeAddress=$OPTARG
;;
d)
bootDiskType=$OPTARG
;;
D)
delete=true
2018-07-20 08:50:08 -07:00
;;
2019-01-08 14:28:46 -08:00
r)
reuseLedger=true
;;
e)
skipCreate=true
2019-01-08 14:28:46 -08:00
;;
s)
skipStart=true
;;
x)
externalNode=true
;;
f)
failOnValidatorBootupFailure=false
;;
2019-02-17 10:45:21 -08:00
u)
blockstreamer=true
2019-02-17 10:45:21 -08:00
;;
S)
stopNetwork=true
;;
w)
fetchLogs=false
deployUpdateManifest=false
;;
2018-07-20 08:50:08 -07:00
*)
usage "Error: unhandled option: $opt"
2018-07-20 08:50:08 -07:00
;;
esac
done
2019-04-05 09:41:25 -07:00
[[ -n $netName ]] || usage
[[ -n $cloudProvider ]] || usage "Cloud provider not specified"
2019-04-05 09:41:25 -07:00
[[ -n ${zone[*]} ]] || usage "At least one zone must be specified"
2019-04-05 09:41:25 -07:00
shutdown() {
exitcode=$?
set +e
2018-12-23 09:11:15 -08:00
if [[ -d net/log ]]; then
mv net/log net/log-deploy
for logfile in net/log-deploy/*; do
if [[ -f $logfile ]]; then
upload-ci-artifact "$logfile"
tail "$logfile"
fi
done
fi
exit $exitcode
}
2018-12-23 09:11:15 -08:00
rm -rf net/{log,-deploy}
trap shutdown EXIT INT
set -x
2019-04-05 09:41:25 -07:00
# Build a string to pass zone opts to $cloudProvider.sh: "-z zone1 -z zone2 ..."
2019-04-05 12:06:58 -07:00
zone_args=()
2019-04-05 09:41:25 -07:00
for val in "${zone[@]}"; do
2019-04-05 12:40:28 -07:00
zone_args+=("-z $val")
2019-04-05 09:41:25 -07:00
done
if $stopNetwork; then
skipCreate=true
fi
2019-04-26 20:12:27 -07:00
if $delete; then
skipCreate=false
2019-04-26 20:12:27 -07:00
fi
# Create the network
if ! $skipCreate; then
2019-01-08 14:28:46 -08:00
echo "--- $cloudProvider.sh delete"
# shellcheck disable=SC2068
2019-04-05 12:40:28 -07:00
time net/"$cloudProvider".sh delete ${zone_args[@]} -p "$netName" ${externalNode:+-x}
2019-01-08 14:28:46 -08:00
if $delete; then
exit 0
fi
2019-01-08 14:28:46 -08:00
echo "--- $cloudProvider.sh create"
create_args=(
-p "$netName"
-a "$bootstrapFullNodeAddress"
-c "$clientNodeCount"
-n "$additionalFullNodeCount"
)
# shellcheck disable=SC2206
2019-04-05 12:40:28 -07:00
create_args+=(${zone_args[@]})
if $blockstreamer; then
2019-02-17 10:45:21 -08:00
create_args+=(-u)
fi
if [[ -n $bootDiskType ]]; then
create_args+=(-d "$bootDiskType")
fi
2019-01-08 14:28:46 -08:00
if $enableGpu; then
if [[ -z $bootstrapFullNodeMachineType ]]; then
create_args+=(-g)
else
create_args+=(-G "$bootstrapFullNodeMachineType")
fi
fi
if ! $leaderRotation; then
create_args+=(-b)
fi
if $publicNetwork; then
create_args+=(-P)
fi
if $externalNode; then
create_args+=(-x)
fi
if ! $failOnValidatorBootupFailure; then
create_args+=(-f)
fi
2019-01-08 14:28:46 -08:00
time net/"$cloudProvider".sh create "${create_args[@]}"
else
echo "--- $cloudProvider.sh config"
config_args=(
-p "$netName"
)
# shellcheck disable=SC2206
2019-04-05 12:40:28 -07:00
config_args+=(${zone_args[@]})
2019-01-08 14:28:46 -08:00
if $publicNetwork; then
config_args+=(-P)
fi
if $externalNode; then
config_args+=(-x)
fi
if ! $failOnValidatorBootupFailure; then
config_args+=(-f)
fi
2019-01-08 14:28:46 -08:00
time net/"$cloudProvider".sh config "${config_args[@]}"
fi
net/init-metrics.sh -e
2018-09-06 20:38:11 -07:00
echo "+++ $cloudProvider.sh info"
net/"$cloudProvider".sh info
if $stopNetwork; then
echo --- net.sh stop
time net/net.sh stop
exit 0
fi
2018-12-23 09:11:15 -08:00
ok=true
if ! $skipStart; then
(
if $skipCreate; then
# TODO: Enable rolling updates
#op=update
op=restart
else
op=start
fi
echo "--- net.sh $op"
args=("$op" -t "$tarChannelOrTag")
if ! $publicNetwork; then
args+=(-o rejectExtraNodes)
fi
if [[ -n $NO_VALIDATOR_SANITY ]]; then
args+=(-o noValidatorSanity)
fi
if [[ -n $NO_LEDGER_VERIFY ]]; then
args+=(-o noLedgerVerify)
fi
if $reuseLedger; then
args+=(-r)
fi
if ! $failOnValidatorBootupFailure; then
args+=(-F)
fi
2019-01-08 14:28:46 -08:00
# shellcheck disable=SC2154 # SOLANA_INSTALL_UPDATE_MANIFEST_KEYPAIR_x86_64_unknown_linux_gnu comes from .buildkite/env/
if $deployUpdateManifest && [[ -n $SOLANA_INSTALL_UPDATE_MANIFEST_KEYPAIR_x86_64_unknown_linux_gnu ]]; then
echo "$SOLANA_INSTALL_UPDATE_MANIFEST_KEYPAIR_x86_64_unknown_linux_gnu" > update_manifest_keypair.json
args+=(-i update_manifest_keypair.json)
fi
# shellcheck disable=SC2086 # Don't want to double quote the $maybeXYZ variables
time net/net.sh "${args[@]}"
) || ok=false
if $fetchLogs; then
net/net.sh logs
fi
fi
2018-12-23 09:11:15 -08:00
$ok