Push perf test results to slack app (#6371)

* Add script to publish testnet results to slack

* Obscure webhook URL

* fixup

* Replace read with cat redirection

* Turn back on net restart

* Pick nits

* Make symlink before trying to delete its contents

* Display test config in slack and pick Trents nit not to maybe rm -rf /*

* Clean up results print

* Minor nits

* Turn the test settings back up to 11

* typo

* Shellcheck

* Just a few more fields

* fix payload formatting

* Del clear-config.sh

* Mount secondary

* Add commit SHA link and Grafana time range URL

* Add fancy buttons instead of text URLs

* Tighten up test config display

* Fixup display nits

* chellsheck

* Rebase and fix typo
This commit is contained in:
Dan Albert 2019-10-21 20:00:17 -04:00 committed by GitHub
parent d1b18a5060
commit 00809a67c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 167 additions and 27 deletions

View File

@ -113,11 +113,14 @@ clear_config_dir() {
SECONDARY_DISK_MOUNT_POINT=/mnt/extra-disk
setup_secondary_mount() {
# If there is a secondary disk, symlink the config/ dir there
if [[ -d $SECONDARY_DISK_MOUNT_POINT ]] && \
[[ -w $SECONDARY_DISK_MOUNT_POINT ]]; then
mkdir -p $SECONDARY_DISK_MOUNT_POINT/config
rm -rf "$SOLANA_CONFIG_DIR"
ln -sfT $SECONDARY_DISK_MOUNT_POINT/config "$SOLANA_CONFIG_DIR"
fi
(
set -x
if [[ -d $SECONDARY_DISK_MOUNT_POINT ]] && \
[[ -w $SECONDARY_DISK_MOUNT_POINT ]]; then
mkdir -p $SECONDARY_DISK_MOUNT_POINT/config
rm -rf "$SOLANA_CONFIG_DIR"
ln -sfT $SECONDARY_DISK_MOUNT_POINT/config "$SOLANA_CONFIG_DIR"
fi
)
}

View File

@ -152,6 +152,7 @@ EOF
set -x
if [[ $skipSetup != true ]]; then
clear_config_dir "$SOLANA_CONFIG_DIR"
setup_secondary_mount
if [[ -n $internalNodesLamports ]]; then
echo "---" >> config/fullnode-balances.yml
@ -248,6 +249,7 @@ EOF
fi
if [[ $skipSetup != true ]]; then
clear_config_dir "$SOLANA_CONFIG_DIR"
setup_secondary_mount
[[ -z $internalNodesLamports ]] || net/scripts/rsync-retry.sh -vPrc \
"$entrypointIp":~/solana/config/fullnode-"$nodeIndex"-identity.json config/fullnode-identity.json
fi

View File

@ -2,13 +2,14 @@ steps:
- command: "system-test/testnet-performance/testnet-automation.sh"
label: "COLO performance testnet GPU enabled"
env:
UPLOAD_RESULTS_TO_SLACK: "true"
CLOUD_PROVIDER: "colo"
TESTNET_TAG: "colo-edge-perf-gpu-enabled"
RAMP_UP_TIME: 60
TEST_DURATION: 300
RAMP_UP_TIME: 0
TEST_DURATION: 600
NUMBER_OF_VALIDATOR_NODES: 4
NUMBER_OF_CLIENT_NODES: 2
CLIENT_OPTIONS: "bench-tps=2=--tx_count 80000 --thread-batch-sleep-ms 1000"
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"
ADDITIONAL_FLAGS: ""
agents:
- "queue=colo-deploy"

View File

@ -2,6 +2,7 @@ steps:
- command: "system-test/testnet-performance/testnet-automation.sh"
label: "GCE performance testnets CPU ONLY"
env:
UPLOAD_RESULTS_TO_SLACK: "true"
CLOUD_PROVIDER: "gce"
TESTNET_TAG: "gce-edge-perf-cpu-only"
RAMP_UP_TIME: 60

View File

@ -2,14 +2,15 @@ steps:
- command: "system-test/testnet-performance/testnet-automation.sh"
label: "GCE performance testnets GPU ENABLED"
env:
UPLOAD_RESULTS_TO_SLACK: "true"
CLOUD_PROVIDER: "gce"
TESTNET_TAG: "gce-edge-perf-gpu-enabled"
RAMP_UP_TIME: 60
TEST_DURATION: 300
NUMBER_OF_VALIDATOR_NODES: 10
RAMP_UP_TIME: 0
TEST_DURATION: 600
NUMBER_OF_VALIDATOR_NODES: 50
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100"
NUMBER_OF_CLIENT_NODES: 1
CLIENT_OPTIONS: "bench-tps=1=--tx_count 80000 --thread-batch-sleep-ms 1000"
NUMBER_OF_CLIENT_NODES: 2
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"
TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a"
ADDITIONAL_FLAGS: ""
agents:

View File

@ -2,6 +2,9 @@
import sys, json
data=json.load(sys.stdin)
print[\
([result['series'][0]['columns'][1].encode(), result['series'][0]['values'][0][1]]) \
for result in data['results']]
if 'results' in data:
for result in data['results']:
print result['series'][0]['columns'][1].encode() + ': ' + str(result['series'][0]['values'][0][1])
else:
print "No results returned from CURL request"

View File

@ -7,10 +7,9 @@ set -e
# TODO: Remove all default values, force explicitness in the testcase definition
[[ -n $TEST_DURATION ]] || TEST_DURATION=300
[[ -n $RAMP_UP_TIME ]] || RAMP_UP_TIME=60
[[ -n $RAMP_UP_TIME ]] || RAMP_UP_TIME=0
[[ -n $NUMBER_OF_VALIDATOR_NODES ]] || NUMBER_OF_VALIDATOR_NODES=2
[[ -n $NUMBER_OF_CLIENT_NODES ]] || NUMBER_OF_CLIENT_NODES=1
[[ -n $TESTNET_ZONES ]] || TESTNET_ZONES="us-west1-a"
function collect_logs {
echo --- collect logs from remote nodes
@ -26,6 +25,11 @@ function collect_logs {
}
function cleanup_testnet {
FINISH_UNIX_MSECS="$(($(date +%s%N)/1000000))"
if [[ -n $UPLOAD_RESULTS_TO_SLACK ]] ; then
upload_results_to_slack
fi
(
set +e
collect_logs
@ -101,9 +105,9 @@ launchTestnet() {
echo --- start "$NUMBER_OF_VALIDATOR_NODES" node test
if [[ -n $CHANNEL ]]; then
net/net.sh start -t "$CHANNEL" "$maybeClientOptions" "$CLIENT_OPTIONS"
net/net.sh restart -t "$CHANNEL" "$maybeClientOptions" "$CLIENT_OPTIONS"
else
net/net.sh start -T solana-release*.tar.bz2 "$maybeClientOptions" "$CLIENT_OPTIONS"
net/net.sh restart -T solana-release*.tar.bz2 "$maybeClientOptions" "$CLIENT_OPTIONS"
fi
echo --- wait "$RAMP_UP_TIME" seconds for network throughput to stabilize
@ -128,27 +132,27 @@ launchTestnet() {
)'
declare q_mean_confirmation='
SELECT round(mean("duration_ms")) as "mean_confirmation"
SELECT round(mean("duration_ms")) as "mean_confirmation_ms"
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
WHERE time > now() - '"$TEST_DURATION"'s'
declare q_max_confirmation='
SELECT round(max("duration_ms")) as "max_confirmation"
SELECT round(max("duration_ms")) as "max_confirmation_ms"
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
WHERE time > now() - '"$TEST_DURATION"'s'
declare q_99th_confirmation='
SELECT round(percentile("duration_ms", 99)) as "99th_confirmation"
SELECT round(percentile("duration_ms", 99)) as "99th_percentile_confirmation_ms"
FROM "'$TESTNET_TAG'"."autogen"."validator-confirmation"
WHERE time > now() - '"$TEST_DURATION"'s'
RESULTS_FILE="$TESTNET_TAG"_SUMMARY_STATS_"$NUMBER_OF_VALIDATOR_NODES".log
curl -G "${INFLUX_HOST}/query?u=ro&p=topsecret" \
--data-urlencode "db=${TESTNET_TAG}" \
--data-urlencode "q=$q_mean_tps;$q_max_tps;$q_mean_confirmation;$q_max_confirmation;$q_99th_confirmation" |
python system-test/testnet-performance/testnet-automation-json-parser.py >>"$RESULTS_FILE"
python system-test/testnet-performance/testnet-automation-json-parser.py >>"$RESULT_FILE"
upload-ci-artifact "$RESULTS_FILE"
RESULT_DETAILS=$(<"$RESULT_FILE")
upload-ci-artifact "$RESULT_FILE"
}
cd "$(dirname "$0")/../.."
@ -169,10 +173,33 @@ fi
# shellcheck disable=SC1091
source ci/upload-ci-artifact.sh
source system-test/testnet-performance/upload_results_to_slack.sh
maybeClientOptions=${CLIENT_OPTIONS:+"-c"}
maybeMachineType=${VALIDATOR_NODE_MACHINE_TYPE:+"-G"}
IFS=, read -r -a TESTNET_CLOUD_ZONES <<<"${TESTNET_ZONES}"
RESULT_FILE="$TESTNET_TAG"_SUMMARY_STATS_"$NUMBER_OF_VALIDATOR_NODES".log
rm -f $RESULT_FILE
RESULT_DETAILS="Test failed to finish"
TEST_PARAMS_TO_DISPLAY=(CLOUD_PROVIDER \
NUMBER_OF_VALIDATOR_NODES \
VALIDATOR_NODE_MACHINE_TYPE \
NUMBER_OF_CLIENT_NODES \
CLIENT_OPTIONS \
TESTNET_ZONES \
TEST_DURATION \
ADDITIONAL_FLAGS)
TEST_CONFIGURATION=
for i in "${TEST_PARAMS_TO_DISPLAY[@]}" ; do
if [[ -n ${!i} ]] ; then
TEST_CONFIGURATION+="${i} = ${!i} | "
fi
done
START_UNIX_MSECS="$(($(date +%s%N)/1000000))"
launchTestnet

View File

@ -0,0 +1,102 @@
upload_results_to_slack() {
echo --- Uploading results to Slack Performance Results App
if [[ -z $SLACK_WEBHOOK_URL ]] ; then
echo "SLACK_WEBHOOOK_URL undefined"
exit 1
fi
[[ -n $BUILDKITE_MESSAGE ]] || BUILDKITE_MESSAGE="Message not defined"
if [[ -n $BUILDKITE_COMMIT ]] ; then
COMMIT_BUTTON_TEXT="$(echo "$BUILDKITE_COMMIT" | head -c 8)"
COMMIT_URL="https://github.com/solana-labs/solana/commit/${BUILDKITE_COMMIT}"
else
COMMIT_BUTTON_TEXT="Commit not defined"
COMMIT_URL="https://github.com/solana-labs/solana/commits/master"
fi
if [[ -n $BUILDKITE_BUILD_URL ]] ; then
BUILD_BUTTON_TEXT="Build Kite Job"
else
BUILD_BUTTON_TEXT="Build URL not defined"
BUILDKITE_BUILD_URL="https://buildkite.com/solana-labs/"
fi
GRAFANA_URL="https://metrics.solana.com:3000/d/testnet-${CHANNEL:-edge}/testnet-monitor-${CHANNEL:-edge}?var-testnet=${TESTNET_TAG:-testnet-automation}&from=${START_UNIX_MSECS:-0}&to=${FINISH_UNIX_MSECS:-0}"
[[ -n $RESULT_DETAILS ]] || RESULT_DETAILS="Undefined"
[[ -n $TEST_CONFIGURATION ]] || TEST_CONFIGURATION="Undefined"
payLoad="$(cat <<EOF
{
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "*New Build: $BUILDKITE_MESSAGE*"
}
},
{
"type": "actions",
"elements": [
{
"type": "button",
"text": {
"type": "plain_text",
"text": "$COMMIT_BUTTON_TEXT",
"emoji": true
},
"url": "$COMMIT_URL"
},
{
"type": "button",
"text": {
"type": "plain_text",
"text": "$BUILD_BUTTON_TEXT",
"emoji": true
},
"url": "$BUILDKITE_BUILD_URL"
},
{
"type": "button",
"text": {
"type": "plain_text",
"text": "Grafana",
"emoji": true
},
"url": "$GRAFANA_URL"
}
]
},
{
"type": "divider"
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "Test Configuration: \n\`\`\`$TEST_CONFIGURATION\`\`\`"
}
},
{
"type": "divider"
},
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "Result Details: \n\`\`\`$RESULT_DETAILS\`\`\`"
}
}
]
}
EOF
)"
curl -X POST \
-H 'Content-type: application/json' \
--data "$payLoad" \
"$SLACK_WEBHOOK_URL"
}