net.sh: Refactor node initialization wait (#10819)

* remote-node.sh: Factor out init wait to own script

* remote-node.sh: Allow nodes to initialize asynchronously

* testnet-automation: Plumb --async-node-init
This commit is contained in:
Trent Nelson 2020-06-25 23:48:58 -06:00 committed by GitHub
parent 50b3fa83a0
commit 7021e1c584
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 62 additions and 19 deletions

View File

@ -286,6 +286,7 @@ startBootstrapLeader() {
\"$gpuMode\" \
\"$GEOLOCATION_API_KEY\" \
\"$maybeWarpSlot\" \
\"$waitForNodeInit\" \
"
) >> "$logFile" 2>&1 || {
@ -356,6 +357,7 @@ startNode() {
\"$gpuMode\" \
\"$GEOLOCATION_API_KEY\" \
\"$maybeWarpSlot\" \
\"$waitForNodeInit\" \
"
) >> "$logFile" 2>&1 &
declare pid=$!
@ -591,6 +593,19 @@ deploy() {
fi
done
if ! $waitForNodeInit; then
# Handle async init
declare startTime=$SECONDS
for ipAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}"; do
declare timeWaited=$((SECONDS - startTime))
if [[ $timeWaited -gt 600 ]]; then
break
fi
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote-node-wait-init.sh $((600 - timeWaited))"
done
fi
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
additionalNodeDeployTime=$SECONDS
@ -748,6 +763,7 @@ netemCommand="add"
clientDelayStart=0
netLogDir=
maybeWarpSlot=
waitForNodeInit=true
command=$1
[[ -n $command ]] || usage
@ -854,6 +870,9 @@ while [[ -n $1 ]]; do
elif [[ $1 == --warp-slot ]]; then
maybeWarpSlot="$1 $2"
shift 2
elif [[ $1 == --async-node-init ]]; then
waitForNodeInit=false
shift 1
else
usage "Unknown long option: $1"
fi

View File

@ -0,0 +1,27 @@
#!/usr/bin/env bash
set -e
set -x
initCompleteFile=init-complete-node.log
waitTime=${1:=600}
waitForNodeToInit() {
declare hostname
hostname=$(hostname)
echo "--- waiting for $hostname to boot up"
declare startTime=$SECONDS
while [[ ! -r $initCompleteFile ]]; do
declare timeWaited=$((SECONDS - startTime))
if [[ $timeWaited -ge $waitTime ]]; then
echo "^^^ +++"
echo "Error: $initCompleteFile not found in $timeWaited seconds"
exit 1
fi
echo "Waiting for $initCompleteFile ($timeWaited) on $hostname..."
sleep 5
done
echo "$hostname booted up"
}
cd ~/solana
waitForNodeToInit

View File

@ -27,6 +27,7 @@ extraNodeArgs="${18}"
gpuMode="${19:-auto}"
GEOLOCATION_API_KEY="${20}"
maybeWarpSlot="${21}"
waitForNodeInit="${22}"
set +x
missing() {
@ -92,22 +93,6 @@ case "$gpuMode" in
;;
esac
waitForNodeToInit() {
hostname=$(hostname)
echo "--- waiting for $hostname to boot up"
SECONDS=
while [[ ! -r $initCompleteFile ]]; do
if [[ $SECONDS -ge 600 ]]; then
echo "^^^ +++"
echo "Error: $initCompleteFile not found in $SECONDS seconds"
exit 1
fi
echo "Waiting for $initCompleteFile ($SECONDS) on $hostname..."
sleep 5
done
echo "$hostname booted up"
}
case $deployMethod in
local|tar|skip)
PATH="$HOME"/.cargo/bin:"$PATH"
@ -258,7 +243,11 @@ cat >> ~/solana/on-reboot <<EOF
disown
EOF
~/solana/on-reboot
waitForNodeToInit
if $waitForNodeInit; then
net/remote/remote-node-wait-init.sh 600
fi
;;
validator|blockstreamer)
if [[ $deployMethod != skip ]]; then
@ -377,7 +366,10 @@ cat >> ~/solana/on-reboot <<EOF
disown
EOF
~/solana/on-reboot
waitForNodeToInit
if $waitForNodeInit; then
net/remote/remote-node-wait-init.sh 600
fi
if [[ $skipSetup != true && $nodeType != blockstreamer ]]; then
# Wait for the validator to catch up to the bootstrap validator before

View File

@ -129,11 +129,16 @@ function launch_testnet() {
maybeWarpSlot="--warp-slot $WARP_SLOT"
fi
declare maybeAsyncNodeInit
if [[ "$ASYNC_NODE_INIT" = "true" ]]; then
maybeAsyncNodeInit="--async-node-init"
fi
# shellcheck disable=SC2068
# shellcheck disable=SC2086
"${REPO_ROOT}"/net/net.sh start $version_args \
-c idle=$NUMBER_OF_CLIENT_NODES $maybeStartAllowBootFailures \
--gpu-mode $startGpuMode $maybeWarpSlot
--gpu-mode $startGpuMode $maybeWarpSlot $maybeAsyncNodeInit
execution_step "Waiting for bootstrap validator's stake to fall below ${BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD}%"
wait_for_bootstrap_validator_stake_drop "$BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD"