From 7021e1c5842e4467a267040a0ab90edc4f595f0f Mon Sep 17 00:00:00 2001 From: Trent Nelson Date: Thu, 25 Jun 2020 23:48:58 -0600 Subject: [PATCH] net.sh: Refactor node initialization wait (#10819) * remote-node.sh: Factor out init wait to own script * remote-node.sh: Allow nodes to initialize asynchronously * testnet-automation: Plumb --async-node-init --- net/net.sh | 19 +++++++++++++++++++ net/remote/remote-node-wait-init.sh | 27 +++++++++++++++++++++++++++ net/remote/remote-node.sh | 28 ++++++++++------------------ system-test/testnet-automation.sh | 7 ++++++- 4 files changed, 62 insertions(+), 19 deletions(-) create mode 100755 net/remote/remote-node-wait-init.sh diff --git a/net/net.sh b/net/net.sh index 81364c8830..733bab2a5e 100755 --- a/net/net.sh +++ b/net/net.sh @@ -286,6 +286,7 @@ startBootstrapLeader() { \"$gpuMode\" \ \"$GEOLOCATION_API_KEY\" \ \"$maybeWarpSlot\" \ + \"$waitForNodeInit\" \ " ) >> "$logFile" 2>&1 || { @@ -356,6 +357,7 @@ startNode() { \"$gpuMode\" \ \"$GEOLOCATION_API_KEY\" \ \"$maybeWarpSlot\" \ + \"$waitForNodeInit\" \ " ) >> "$logFile" 2>&1 & declare pid=$! @@ -591,6 +593,19 @@ deploy() { fi done + if ! $waitForNodeInit; then + # Handle async init + declare startTime=$SECONDS + for ipAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}"; do + declare timeWaited=$((SECONDS - startTime)) + if [[ $timeWaited -gt 600 ]]; then + break + fi + ssh "${sshOptions[@]}" -n "$ipAddress" \ + "./solana/net/remote/remote-node-wait-init.sh $((600 - timeWaited))" + done + fi + $metricsWriteDatapoint "testnet-deploy net-validators-started=1" additionalNodeDeployTime=$SECONDS @@ -748,6 +763,7 @@ netemCommand="add" clientDelayStart=0 netLogDir= maybeWarpSlot= +waitForNodeInit=true command=$1 [[ -n $command ]] || usage @@ -854,6 +870,9 @@ while [[ -n $1 ]]; do elif [[ $1 == --warp-slot ]]; then maybeWarpSlot="$1 $2" shift 2 + elif [[ $1 == --async-node-init ]]; then + waitForNodeInit=false + shift 1 else usage "Unknown long option: $1" fi diff --git a/net/remote/remote-node-wait-init.sh b/net/remote/remote-node-wait-init.sh new file mode 100755 index 0000000000..6134a34ab0 --- /dev/null +++ b/net/remote/remote-node-wait-init.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +set -e +set -x + +initCompleteFile=init-complete-node.log +waitTime=${1:=600} + +waitForNodeToInit() { + declare hostname + hostname=$(hostname) + echo "--- waiting for $hostname to boot up" + declare startTime=$SECONDS + while [[ ! -r $initCompleteFile ]]; do + declare timeWaited=$((SECONDS - startTime)) + if [[ $timeWaited -ge $waitTime ]]; then + echo "^^^ +++" + echo "Error: $initCompleteFile not found in $timeWaited seconds" + exit 1 + fi + echo "Waiting for $initCompleteFile ($timeWaited) on $hostname..." + sleep 5 + done + echo "$hostname booted up" +} + +cd ~/solana +waitForNodeToInit diff --git a/net/remote/remote-node.sh b/net/remote/remote-node.sh index b062ef867d..17407341bc 100755 --- a/net/remote/remote-node.sh +++ b/net/remote/remote-node.sh @@ -27,6 +27,7 @@ extraNodeArgs="${18}" gpuMode="${19:-auto}" GEOLOCATION_API_KEY="${20}" maybeWarpSlot="${21}" +waitForNodeInit="${22}" set +x missing() { @@ -92,22 +93,6 @@ case "$gpuMode" in ;; esac -waitForNodeToInit() { - hostname=$(hostname) - echo "--- waiting for $hostname to boot up" - SECONDS= - while [[ ! -r $initCompleteFile ]]; do - if [[ $SECONDS -ge 600 ]]; then - echo "^^^ +++" - echo "Error: $initCompleteFile not found in $SECONDS seconds" - exit 1 - fi - echo "Waiting for $initCompleteFile ($SECONDS) on $hostname..." - sleep 5 - done - echo "$hostname booted up" -} - case $deployMethod in local|tar|skip) PATH="$HOME"/.cargo/bin:"$PATH" @@ -258,7 +243,11 @@ cat >> ~/solana/on-reboot <> ~/solana/on-reboot <