net.sh: Refactor node initialization wait (#10819)
* remote-node.sh: Factor out init wait to own script * remote-node.sh: Allow nodes to initialize asynchronously * testnet-automation: Plumb --async-node-init
This commit is contained in:
parent
50b3fa83a0
commit
7021e1c584
19
net/net.sh
19
net/net.sh
|
@ -286,6 +286,7 @@ startBootstrapLeader() {
|
|||
\"$gpuMode\" \
|
||||
\"$GEOLOCATION_API_KEY\" \
|
||||
\"$maybeWarpSlot\" \
|
||||
\"$waitForNodeInit\" \
|
||||
"
|
||||
|
||||
) >> "$logFile" 2>&1 || {
|
||||
|
@ -356,6 +357,7 @@ startNode() {
|
|||
\"$gpuMode\" \
|
||||
\"$GEOLOCATION_API_KEY\" \
|
||||
\"$maybeWarpSlot\" \
|
||||
\"$waitForNodeInit\" \
|
||||
"
|
||||
) >> "$logFile" 2>&1 &
|
||||
declare pid=$!
|
||||
|
@ -591,6 +593,19 @@ deploy() {
|
|||
fi
|
||||
done
|
||||
|
||||
if ! $waitForNodeInit; then
|
||||
# Handle async init
|
||||
declare startTime=$SECONDS
|
||||
for ipAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}"; do
|
||||
declare timeWaited=$((SECONDS - startTime))
|
||||
if [[ $timeWaited -gt 600 ]]; then
|
||||
break
|
||||
fi
|
||||
ssh "${sshOptions[@]}" -n "$ipAddress" \
|
||||
"./solana/net/remote/remote-node-wait-init.sh $((600 - timeWaited))"
|
||||
done
|
||||
fi
|
||||
|
||||
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
|
||||
additionalNodeDeployTime=$SECONDS
|
||||
|
||||
|
@ -748,6 +763,7 @@ netemCommand="add"
|
|||
clientDelayStart=0
|
||||
netLogDir=
|
||||
maybeWarpSlot=
|
||||
waitForNodeInit=true
|
||||
|
||||
command=$1
|
||||
[[ -n $command ]] || usage
|
||||
|
@ -854,6 +870,9 @@ while [[ -n $1 ]]; do
|
|||
elif [[ $1 == --warp-slot ]]; then
|
||||
maybeWarpSlot="$1 $2"
|
||||
shift 2
|
||||
elif [[ $1 == --async-node-init ]]; then
|
||||
waitForNodeInit=false
|
||||
shift 1
|
||||
else
|
||||
usage "Unknown long option: $1"
|
||||
fi
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
#!/usr/bin/env bash
|
||||
set -e
|
||||
set -x
|
||||
|
||||
initCompleteFile=init-complete-node.log
|
||||
waitTime=${1:=600}
|
||||
|
||||
waitForNodeToInit() {
|
||||
declare hostname
|
||||
hostname=$(hostname)
|
||||
echo "--- waiting for $hostname to boot up"
|
||||
declare startTime=$SECONDS
|
||||
while [[ ! -r $initCompleteFile ]]; do
|
||||
declare timeWaited=$((SECONDS - startTime))
|
||||
if [[ $timeWaited -ge $waitTime ]]; then
|
||||
echo "^^^ +++"
|
||||
echo "Error: $initCompleteFile not found in $timeWaited seconds"
|
||||
exit 1
|
||||
fi
|
||||
echo "Waiting for $initCompleteFile ($timeWaited) on $hostname..."
|
||||
sleep 5
|
||||
done
|
||||
echo "$hostname booted up"
|
||||
}
|
||||
|
||||
cd ~/solana
|
||||
waitForNodeToInit
|
|
@ -27,6 +27,7 @@ extraNodeArgs="${18}"
|
|||
gpuMode="${19:-auto}"
|
||||
GEOLOCATION_API_KEY="${20}"
|
||||
maybeWarpSlot="${21}"
|
||||
waitForNodeInit="${22}"
|
||||
set +x
|
||||
|
||||
missing() {
|
||||
|
@ -92,22 +93,6 @@ case "$gpuMode" in
|
|||
;;
|
||||
esac
|
||||
|
||||
waitForNodeToInit() {
|
||||
hostname=$(hostname)
|
||||
echo "--- waiting for $hostname to boot up"
|
||||
SECONDS=
|
||||
while [[ ! -r $initCompleteFile ]]; do
|
||||
if [[ $SECONDS -ge 600 ]]; then
|
||||
echo "^^^ +++"
|
||||
echo "Error: $initCompleteFile not found in $SECONDS seconds"
|
||||
exit 1
|
||||
fi
|
||||
echo "Waiting for $initCompleteFile ($SECONDS) on $hostname..."
|
||||
sleep 5
|
||||
done
|
||||
echo "$hostname booted up"
|
||||
}
|
||||
|
||||
case $deployMethod in
|
||||
local|tar|skip)
|
||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||
|
@ -258,7 +243,11 @@ cat >> ~/solana/on-reboot <<EOF
|
|||
disown
|
||||
EOF
|
||||
~/solana/on-reboot
|
||||
waitForNodeToInit
|
||||
|
||||
if $waitForNodeInit; then
|
||||
net/remote/remote-node-wait-init.sh 600
|
||||
fi
|
||||
|
||||
;;
|
||||
validator|blockstreamer)
|
||||
if [[ $deployMethod != skip ]]; then
|
||||
|
@ -377,7 +366,10 @@ cat >> ~/solana/on-reboot <<EOF
|
|||
disown
|
||||
EOF
|
||||
~/solana/on-reboot
|
||||
waitForNodeToInit
|
||||
|
||||
if $waitForNodeInit; then
|
||||
net/remote/remote-node-wait-init.sh 600
|
||||
fi
|
||||
|
||||
if [[ $skipSetup != true && $nodeType != blockstreamer ]]; then
|
||||
# Wait for the validator to catch up to the bootstrap validator before
|
||||
|
|
|
@ -129,11 +129,16 @@ function launch_testnet() {
|
|||
maybeWarpSlot="--warp-slot $WARP_SLOT"
|
||||
fi
|
||||
|
||||
declare maybeAsyncNodeInit
|
||||
if [[ "$ASYNC_NODE_INIT" = "true" ]]; then
|
||||
maybeAsyncNodeInit="--async-node-init"
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC2068
|
||||
# shellcheck disable=SC2086
|
||||
"${REPO_ROOT}"/net/net.sh start $version_args \
|
||||
-c idle=$NUMBER_OF_CLIENT_NODES $maybeStartAllowBootFailures \
|
||||
--gpu-mode $startGpuMode $maybeWarpSlot
|
||||
--gpu-mode $startGpuMode $maybeWarpSlot $maybeAsyncNodeInit
|
||||
|
||||
execution_step "Waiting for bootstrap validator's stake to fall below ${BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD}%"
|
||||
wait_for_bootstrap_validator_stake_drop "$BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD"
|
||||
|
|
Loading…
Reference in New Issue