Add on-demand functionality to all testnets (#4003)

* Add on-demand functionality to all testnets
This commit is contained in:
Dan Albert 2019-04-26 10:02:23 -06:00 committed by GitHub
parent 1684a7bd18
commit f5f5281f85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 148 additions and 26 deletions

View File

@ -53,6 +53,9 @@ Deploys a CD testnet
-D - Delete the network -D - Delete the network
-r - Reuse existing node/ledger configuration from a -r - Reuse existing node/ledger configuration from a
previous |start| (ie, don't run ./multinode-demo/setup.sh). previous |start| (ie, don't run ./multinode-demo/setup.sh).
-x - External node. Default: false
-s - Skip start. Nodes will still be created or configured, but network software will not be started.
-S - Stop network software without tearing down nodes.
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
metrics metrics
@ -62,7 +65,7 @@ EOF
zone=() zone=()
while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:" opt; do while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:S" opt; do
case $opt in case $opt in
h | \?) h | \?)
usage usage
@ -126,6 +129,9 @@ while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:" opt; do
u) u)
blockstreamer=true blockstreamer=true
;; ;;
S)
stopNetwork=true
;;
*) *)
usage "Error: unhandled option: $opt" usage "Error: unhandled option: $opt"
;; ;;
@ -162,6 +168,11 @@ for val in "${zone[@]}"; do
zone_args+=("-z $val") zone_args+=("-z $val")
done done
if $stopNetwork; then
skipSetup=true
fi
# Create the network
if ! $skipSetup; then if ! $skipSetup; then
echo "--- $cloudProvider.sh delete" echo "--- $cloudProvider.sh delete"
# shellcheck disable=SC2068 # shellcheck disable=SC2068
@ -227,6 +238,12 @@ net/init-metrics.sh -e
echo "+++ $cloudProvider.sh info" echo "+++ $cloudProvider.sh info"
net/"$cloudProvider".sh info net/"$cloudProvider".sh info
if $stopNetwork; then
echo --- net.sh stop
time net/net.sh stop
exit 0
fi
echo --- net.sh start echo --- net.sh start
maybeRejectExtraNodes= maybeRejectExtraNodes=
if ! $publicNetwork; then if ! $publicNetwork; then

View File

@ -42,20 +42,28 @@ steps:
value: "testnet-beta" value: "testnet-beta"
- label: "testnet-beta-perf" - label: "testnet-beta-perf"
value: "testnet-beta-perf" value: "testnet-beta-perf"
- label: "testnet-demo"
value: "testnet-demo"
- select: "Operation" - select: "Operation"
key: "testnet-operation" key: "testnet-operation"
default: "sanity-or-restart" default: "sanity-or-restart"
options: options:
- label: "Sanity check. Restart network on failure" - label: "Create new testnet nodes and then start network software. If nodes are already created, they will be deleted and then re-created."
value: "sanity-or-restart" value: "create-and-start"
- label: "Start (or restart) the network" - label: "Create new testnet nodes, but do not start network software. If nodes are already created, they will be deleted and then re-created."
value: "create"
- label: "Start network software on already-created testnet nodes. If software is already running, it will be restarted."
value: "start" value: "start"
- label: "Update the network software. Restart network on failure" - label: "Stop network software without deleting testnet nodes"
value: "update-or-restart"
- label: "Stop the network"
value: "stop" value: "stop"
- label: "Update the network software. Restart network software on failure"
value: "update-or-restart"
- label: "Sanity check. Restart network software on failure"
value: "sanity-or-restart"
- label: "Sanity check only" - label: "Sanity check only"
value: "sanity" value: "sanity"
- label: "Delete all nodes on a testnet. Network software will be stopped first if it is running"
value: "delete"
- command: "ci/$(basename "$0")" - command: "ci/$(basename "$0")"
agents: agents:
- "queue=$BUILDKITE_AGENT_META_DATA_QUEUE" - "queue=$BUILDKITE_AGENT_META_DATA_QUEUE"
@ -88,6 +96,10 @@ testnet|testnet-perf)
CHANNEL_BRANCH=$STABLE_CHANNEL CHANNEL_BRANCH=$STABLE_CHANNEL
: "${TESTNET_DB_HOST:=https://clocktower-f1d56615.influxcloud.net:8086}" : "${TESTNET_DB_HOST:=https://clocktower-f1d56615.influxcloud.net:8086}"
;; ;;
testnet-demo)
CHANNEL_OR_TAG=beta
CHANNEL_BRANCH=$BETA_CHANNEL
;;
*) *)
echo "Error: Invalid TESTNET=$TESTNET" echo "Error: Invalid TESTNET=$TESTNET"
exit 1 exit 1
@ -105,6 +117,7 @@ source scripts/configure-metrics.sh
if [[ -n $TESTNET_TAG ]]; then if [[ -n $TESTNET_TAG ]]; then
CHANNEL_OR_TAG=$TESTNET_TAG CHANNEL_OR_TAG=$TESTNET_TAG
else else
if [[ $BUILDKITE_BRANCH != "$CHANNEL_BRANCH" ]]; then if [[ $BUILDKITE_BRANCH != "$CHANNEL_BRANCH" ]]; then
( (
cat <<EOF cat <<EOF
@ -197,15 +210,37 @@ sanity() {
esac esac
} }
deploy() {
declare maybeCreate=$1
declare maybeStart=$2
declare maybeStop=$3
declare maybeDelete=$4
start() { # Create or recreate the nodes
declare maybeDelete=$1 if [[ -z $maybeCreate ]]; then
if [[ -z $maybeDelete ]]; then skipCreate=skip
echo "--- start $TESTNET"
else else
skipCreate=""
echo "--- create $TESTNET"
fi
# Start or restart the network software on the nodes
if [[ -z $maybeStart ]]; then
skipStart=skip
else
skipStart=""
echo "--- start $TESTNET"
fi
# Stop the nodes
if [[ -n $maybeStop ]]; then
echo "--- stop $TESTNET" echo "--- stop $TESTNET"
fi fi
declare maybeReuseLedger=$2
# Delete the nodes
if [[ -n $maybeDelete ]]; then
echo "--- delete $TESTNET"
fi
case $TESTNET in case $TESTNET in
testnet-edge) testnet-edge)
@ -213,7 +248,9 @@ start() {
set -x set -x
ci/testnet-deploy.sh -p edge-testnet-solana-com -C ec2 -z us-west-1a \ ci/testnet-deploy.sh -p edge-testnet-solana-com -C ec2 -z us-west-1a \
-t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0ccd4f2239886fa94 \ -t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0ccd4f2239886fa94 \
${maybeReuseLedger:+-r} \ ${skipCreate:+-r} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
) )
;; ;;
@ -226,7 +263,9 @@ start() {
ci/testnet-deploy.sh -p edge-perf-testnet-solana-com -C ec2 -z us-west-2b \ ci/testnet-deploy.sh -p edge-perf-testnet-solana-com -C ec2 -z us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \ -g -t "$CHANNEL_OR_TAG" -c 2 \
-b \ -b \
${maybeReuseLedger:+-r} \ ${skipCreate:+-r} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
) )
;; ;;
@ -246,19 +285,26 @@ start() {
done done
if [[ -n $EC2_NODE_COUNT ]]; then if [[ -n $EC2_NODE_COUNT ]]; then
if [[ -n $GCE_NODE_COUNT ]] || [[ -n $skipStart ]]; then
maybeSkipStart="skip"
fi
# shellcheck disable=SC2068 # shellcheck disable=SC2068
ci/testnet-deploy.sh -p beta-testnet-solana-com -C ec2 ${EC2_ZONE_ARGS[@]} \ ci/testnet-deploy.sh -p beta-testnet-solana-com -C ec2 ${EC2_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$EC2_NODE_COUNT" -c 0 -u -P -a eipalloc-0f286cf8a0771ce35 \ -t "$CHANNEL_OR_TAG" -n "$EC2_NODE_COUNT" -c 0 -u -P -a eipalloc-0f286cf8a0771ce35 \
${maybeReuseLedger:+-r} \ ${skipCreate:+-r} \
${maybeDelete:+-D} \ ${maybeSkipStart:+-s} \
${GCE_NODE_COUNT:+-s} ${maybeStop:+-S} \
${maybeDelete:+-D}
fi fi
if [[ -n $GCE_NODE_COUNT ]]; then if [[ -n $GCE_NODE_COUNT ]]; then
# shellcheck disable=SC2068 # shellcheck disable=SC2068
ci/testnet-deploy.sh -p beta-testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \ ci/testnet-deploy.sh -p beta-testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P \ -t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P \
${maybeReuseLedger:+-r} \ ${skipCreate:+-r} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D} \ ${maybeDelete:+-D} \
${EC2_NODE_COUNT:+-x} ${EC2_NODE_COUNT:+-x}
fi fi
@ -273,7 +319,9 @@ start() {
ci/testnet-deploy.sh -p beta-perf-testnet-solana-com -C ec2 -z us-west-2b \ ci/testnet-deploy.sh -p beta-perf-testnet-solana-com -C ec2 -z us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \ -g -t "$CHANNEL_OR_TAG" -c 2 \
-b \ -b \
${maybeReuseLedger:+-r} \ ${skipCreate:+-r} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
) )
;; ;;
@ -284,7 +332,9 @@ start() {
ci/testnet-deploy.sh -p testnet-solana-com -C ec2 -z us-west-1a \ ci/testnet-deploy.sh -p testnet-solana-com -C ec2 -z us-west-1a \
-t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0fa502bf95f6f18b2 \ -t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0fa502bf95f6f18b2 \
-b \ -b \
${maybeReuseLedger:+-r} \ ${skipCreate:+-r} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
#ci/testnet-deploy.sh -p testnet-solana-com -C gce -z us-east1-c \ #ci/testnet-deploy.sh -p testnet-solana-com -C gce -z us-east1-c \
# -t "$CHANNEL_OR_TAG" -n 3 -c 0 -P -a testnet-solana-com \ # -t "$CHANNEL_OR_TAG" -n 3 -c 0 -P -a testnet-solana-com \
@ -303,7 +353,9 @@ start() {
-t "$CHANNEL_OR_TAG" -c 2 \ -t "$CHANNEL_OR_TAG" -c 2 \
-b \ -b \
-d pd-ssd \ -d pd-ssd \
${maybeReuseLedger:+-r} \ ${skipCreate:+-r} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D} ${maybeDelete:+-D}
#ci/testnet-deploy.sh -p perf-testnet-solana-com -C ec2 -z us-east-1a \ #ci/testnet-deploy.sh -p perf-testnet-solana-com -C ec2 -z us-east-1a \
# -g \ # -g \
@ -312,6 +364,13 @@ start() {
# ${maybeDelete:+-D} # ${maybeDelete:+-D}
) )
;; ;;
testnet-demo)
(
set -x
echo "Demo net not yet implemented!"
exit 1
)
;;
*) *)
echo "Error: Invalid TESTNET=$TESTNET" echo "Error: Invalid TESTNET=$TESTNET"
exit 1 exit 1
@ -319,13 +378,49 @@ start() {
esac esac
} }
CREATED_LOCKFILE="${HOME}/${TESTNET}.is_created"
STARTED_LOCKFILE="${HOME}/${TESTNET}.is_started"
create-and-start() {
rm -f "${CREATED_LOCKFILE}"
rm -f "${STARTED_LOCKFILE}"
deploy create start
touch "${CREATED_LOCKFILE}"
touch "${STARTED_LOCKFILE}"
}
create() {
rm -f "${CREATED_LOCKFILE}"
rm -f "${STARTED_LOCKFILE}"
deploy create
touch "${CREATED_LOCKFILE}"
}
start() {
if [[ -f ${CREATED_LOCKFILE} ]]; then
rm -f "${STARTED_LOCKFILE}"
deploy "" start
touch "${STARTED_LOCKFILE}"
else
echo "Unable to start ${TESTNET}. Are the nodes created?
Re-run ci/testnet-manager.sh with \$TESTNET_OP=create or \$TESTNET_OP=create-and-start"
exit 1
fi
}
stop() { stop() {
start delete deploy "" ""
rm -f "${STARTED_LOCKFILE}"
}
delete() {
deploy "" "" "" delete
rm -f "${CREATED_LOCKFILE}"
rm -f "${STARTED_LOCKFILE}"
} }
case $TESTNET_OP in case $TESTNET_OP in
sanity) create-and-start)
sanity create-and-start
;;
create)
create
;; ;;
start) start)
start start
@ -333,8 +428,14 @@ start)
stop) stop)
stop stop
;; ;;
sanity)
sanity
;;
delete)
delete
;;
update-or-restart) update-or-restart)
if start "" update; then if start; then
echo Update successful echo Update successful
else else
echo "+++ Update failed, restarting the network" echo "+++ Update failed, restarting the network"
@ -352,7 +453,7 @@ sanity-or-restart)
# TODO: Restore attempt to restart the cluster before recreating it # TODO: Restore attempt to restart the cluster before recreating it
# See https://github.com/solana-labs/solana/issues/3774 # See https://github.com/solana-labs/solana/issues/3774
if false; then if false; then
if start "" update; then if start; then
echo Update successful echo Update successful
else else
echo "+++ Update failed, restarting the network" echo "+++ Update failed, restarting the network"
@ -364,6 +465,10 @@ sanity-or-restart)
fi fi
fi fi
;; ;;
*)
echo "Error: Invalid TESTNET_OP=$TESTNET_OP"
exit 1
;;
esac esac
echo --- fin echo --- fin