Add more metrics

This commit is contained in:
Michael Vines 2018-09-06 13:00:01 -07:00
parent abfff66d53
commit 03e87e4169
3 changed files with 20 additions and 15 deletions

View File

@ -110,7 +110,7 @@ done
prepareInstancesAndWriteConfigFile() { prepareInstancesAndWriteConfigFile() {
$metricsWriteDatapoint "testnet-deploy net-config-start=1" $metricsWriteDatapoint "testnet-deploy net-config-begin=1"
cat >> "$configFile" <<EOF cat >> "$configFile" <<EOF
# autogenerated at $(date) # autogenerated at $(date)
@ -183,15 +183,15 @@ EOF
case $command in case $command in
delete) delete)
$metricsWriteDatapoint "testnet-deploy net-delete-start=1" $metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
gcloud_FindInstances "name~^$prefix-" gcloud_FindInstances "name~^$prefix-"
if [[ ${#instances[@]} -eq 0 ]]; then if [[ ${#instances[@]} -eq 0 ]]; then
echo "No instances found matching '^$prefix-'" echo "No instances found matching '^$prefix-'"
exit 0 else
gcloud_DeleteInstances "$yes"
fi fi
gcloud_DeleteInstances "$yes"
rm -f "$configFile" rm -f "$configFile"
$metricsWriteDatapoint "testnet-deploy net-delete-complete=1" $metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
@ -200,7 +200,7 @@ delete)
create) create)
[[ -n $validatorNodeCount ]] || usage "Need number of nodes" [[ -n $validatorNodeCount ]] || usage "Need number of nodes"
$metricsWriteDatapoint "testnet-deploy net-create=1" $metricsWriteDatapoint "testnet-deploy net-create-begin=1"
echo "Network composition:" echo "Network composition:"
echo "Leader = $leaderMachineType (GPU=${leaderAccelerator:-none})" echo "Leader = $leaderMachineType (GPU=${leaderAccelerator:-none})"

View File

@ -188,13 +188,20 @@ startClient() {
sanity() { sanity() {
declare expectedNodeCount=$((${#validatorIpList[@]} + 1)) declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
declare ok=true
echo "--- Sanity" echo "--- Sanity"
$metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
( (
set -x set -x
# shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
ssh "${sshOptions[@]}" "$leaderIp" \ ssh "${sshOptions[@]}" "$leaderIp" \
"./solana/net/remote/remote-sanity.sh $sanityExtraArgs" "./solana/net/remote/remote-sanity.sh $sanityExtraArgs"
) || exit 1 ) || ok=false
$metricsWriteDatapoint "testnet-deploy net-sanity-complete=1"
$ok || exit 1
} }
start() { start() {
@ -231,6 +238,7 @@ start() {
esac esac
echo "Deployment started at $(date)" echo "Deployment started at $(date)"
$metricsWriteDatapoint "testnet-deploy net-start-begin=1"
SECONDS=0 SECONDS=0
declare leaderDeployTime= declare leaderDeployTime=
@ -263,7 +271,7 @@ start() {
startClient "$ipAddress" "$netLogDir/client-$ipAddress.log" startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
done done
clientDeployTime=$SECONDS clientDeployTime=$SECONDS
$metricsWriteDatapoint "testnet-deploy start=1" $metricsWriteDatapoint "testnet-deploy net-start-complete=1"
if [[ $deployMethod = "snap" ]]; then if [[ $deployMethod = "snap" ]]; then
IFS=\ read -r _ networkVersion _ < <( IFS=\ read -r _ networkVersion _ < <(
@ -303,8 +311,7 @@ stop_node() {
stop() { stop() {
SECONDS=0 SECONDS=0
$metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
$metricsWriteDatapoint "testnet-deploy stop=1"
stop_node "$leaderIp" stop_node "$leaderIp"
@ -312,6 +319,7 @@ stop() {
stop_node "$ipAddress" stop_node "$ipAddress"
done done
$metricsWriteDatapoint "testnet-deploy net-stop-complete=1"
echo "Stopping nodes took $SECONDS seconds" echo "Stopping nodes took $SECONDS seconds"
} }

View File

@ -54,16 +54,13 @@ esac
scripts/oom-monitor.sh > oom-monitor.log 2>&1 & scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
set +e
while true; do while true; do
echo "=== Client start: $(date)" >> client.log echo "=== Client start: $(date)" >> client.log
$metricsWriteDatapoint "testnet-deploy client-begin=1"
clientCommand="$solana_bench_tps --num-nodes $numNodes --seconds 600 --sustained --threads $threadCount" clientCommand="$solana_bench_tps --num-nodes $numNodes --seconds 600 --sustained --threads $threadCount"
echo "$ $clientCommand" >> client.log echo "$ $clientCommand" >> client.log
set +e
$clientCommand >> client.log 2>&1 $clientCommand >> client.log 2>&1
set -e $metricsWriteDatapoint "testnet-deploy client-complete=1"
$metricsWriteDatapoint "testnet-deploy,name=$netBasename clientexit=1"
echo Error: bench-tps should never exit | tee -a client.log
done done