From 0a3972271957304e8f0dfc66fe60bd66e0e20777 Mon Sep 17 00:00:00 2001 From: Pankaj Garg Date: Fri, 5 Oct 2018 16:32:05 -0700 Subject: [PATCH] Add support to trigger testnet from a PR (#1434) * Add support for different node counts * Update variable names * Delete network even after failures * Add array for node counts * Changed number of nodes to a space separated string of numbers * Adjust number of nodes * Snap will not be published if the env variable DO_NOT_PUBLISH_SNAP is set * Address review comments * Replaced influx db URL --- ci/snap.sh | 6 ++- ci/solana-testnet.yml | 18 ++++++++ ci/testnet-automation-cleanup.sh | 9 ++++ ci/testnet-automation.sh | 75 ++++++++++++++++++++++++++++++++ net/net.sh | 2 +- 5 files changed, 107 insertions(+), 3 deletions(-) create mode 100755 ci/solana-testnet.yml create mode 100755 ci/testnet-automation-cleanup.sh create mode 100755 ci/testnet-automation.sh diff --git a/ci/snap.sh b/ci/snap.sh index e2088d4fb..b44aaa0e9 100755 --- a/ci/snap.sh +++ b/ci/snap.sh @@ -57,5 +57,7 @@ snapcraft source ci/upload_ci_artifact.sh upload_ci_artifact solana_*.snap -echo --- publish: $CHANNEL channel -$DRYRUN snapcraft push solana_*.snap --release $CHANNEL +if [[ -z $DO_NOT_PUBLISH_SNAP ]]; then + echo --- publish: $CHANNEL channel + $DRYRUN snapcraft push solana_*.snap --release $CHANNEL +fi diff --git a/ci/solana-testnet.yml b/ci/solana-testnet.yml new file mode 100755 index 000000000..a24347349 --- /dev/null +++ b/ci/solana-testnet.yml @@ -0,0 +1,18 @@ +steps: + - command: "ci/snap.sh" + label: "create snap" + + - wait + + - command: "ci/testnet-automation.sh" + label: "run testnet" + agents: + - "queue=metrics" + + - wait: ~ + continue_on_failure: true + + - command: "ci/testnet-automation-cleanup.sh" + label: "delete testnet" + agents: + - "queue=metrics" diff --git a/ci/testnet-automation-cleanup.sh b/ci/testnet-automation-cleanup.sh new file mode 100755 index 000000000..67307634e --- /dev/null +++ b/ci/testnet-automation-cleanup.sh @@ -0,0 +1,9 @@ +#!/bin/bash -e + +cd "$(dirname "$0")/.." + +echo --- find testnet configuration +net/gce.sh config -p testnet-automation + +echo --- delete testnet +net/gce.sh delete -p testnet-automation diff --git a/ci/testnet-automation.sh b/ci/testnet-automation.sh new file mode 100755 index 000000000..3638452f3 --- /dev/null +++ b/ci/testnet-automation.sh @@ -0,0 +1,75 @@ +#!/bin/bash -e + +cd "$(dirname "$0")/.." + +echo --- downloading snap from build artifacts +buildkite-agent artifact download "solana_*.snap" . + +# shellcheck disable=SC1091 +source ci/upload_ci_artifact.sh + +[[ -n $ITERATION_WAIT ]] || ITERATION_WAIT=300 +[[ -n $NUMBER_OF_NODES ]] || NUMBER_OF_NODES="10 25 50 100" + +launchTestnet() { + declare nodeCount=$1 + echo --- setup "$nodeCount" node test + net/gce.sh create \ + -n "$nodeCount" -c 2 \ + -G "n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \ + -p testnet-automation -z us-west1-b + + echo --- configure database + net/init-metrics.sh -e + + echo --- start "$nodeCount" node test + net/net.sh start -o noValidatorSanity -S solana_*.snap + + echo --- wait "$ITERATION_WAIT" seconds to complete test + sleep "$ITERATION_WAIT" + + declare q_mean_tps=' + SELECT round(mean("sum_count")) FROM ( + SELECT sum("count") AS "sum_count" + FROM "testnet-automation"."autogen"."counter-banking_stage-process_transactions" + WHERE time > now() - 300s GROUP BY time(1s) + )' + + declare q_max_tps=' + SELECT round(max("sum_count")) FROM ( + SELECT sum("count") AS "sum_count" + FROM "testnet-automation"."autogen"."counter-banking_stage-process_transactions" + WHERE time > now() - 300s GROUP BY time(1s) + )' + + declare q_mean_finality=' + SELECT round(mean("duration_ms")) + FROM "testnet-automation"."autogen"."leader-finality" + WHERE time > now() - 300s' + + declare q_max_finality=' + SELECT round(max("duration_ms")) + FROM "testnet-automation"."autogen"."leader-finality" + WHERE time > now() - 300s' + + declare q_99th_finality=' + SELECT round(percentile("duration_ms", 99)) + FROM "testnet-automation"."autogen"."leader-finality" + WHERE time > now() - 300s' + + curl -G "https://metrics.solana.com:8086/query?u=${INFLUX_USERNAME}&p=${INFLUX_PASSWORD}" \ + --data-urlencode "db=$INFLUX_DATABASE" \ + --data-urlencode "q=$q_mean_tps;$q_max_tps;$q_mean_finality;$q_max_finality;$q_99th_finality" \ + >>TPS"$nodeCount".log + + upload_ci_artifact TPS"$nodeCount".log +} + +# This is needed, because buildkite doesn't let us define an array of numbers. +# The array is defined as a space separated string of numbers +# shellcheck disable=SC2206 +nodes_count_array=($NUMBER_OF_NODES) + +for n in "${nodes_count_array[@]}"; do + launchTestnet "$n" +done diff --git a/net/net.sh b/net/net.sh index 19868fc99..1499f0087 100755 --- a/net/net.sh +++ b/net/net.sh @@ -159,7 +159,7 @@ startValidator() { declare ipAddress=$1 declare logFile="$netLogDir/validator-$ipAddress.log" - echo "--- Starting validator: $leaderIp" + echo "--- Starting validator: $ipAddress" echo "start log: $logFile" ( set -x