net.sh: Refactor node initialization wait (#10819)
* remote-node.sh: Factor out init wait to own script * remote-node.sh: Allow nodes to initialize asynchronously * testnet-automation: Plumb --async-node-init
This commit is contained in:
parent
50b3fa83a0
commit
7021e1c584
19
net/net.sh
19
net/net.sh
|
@ -286,6 +286,7 @@ startBootstrapLeader() {
|
||||||
\"$gpuMode\" \
|
\"$gpuMode\" \
|
||||||
\"$GEOLOCATION_API_KEY\" \
|
\"$GEOLOCATION_API_KEY\" \
|
||||||
\"$maybeWarpSlot\" \
|
\"$maybeWarpSlot\" \
|
||||||
|
\"$waitForNodeInit\" \
|
||||||
"
|
"
|
||||||
|
|
||||||
) >> "$logFile" 2>&1 || {
|
) >> "$logFile" 2>&1 || {
|
||||||
|
@ -356,6 +357,7 @@ startNode() {
|
||||||
\"$gpuMode\" \
|
\"$gpuMode\" \
|
||||||
\"$GEOLOCATION_API_KEY\" \
|
\"$GEOLOCATION_API_KEY\" \
|
||||||
\"$maybeWarpSlot\" \
|
\"$maybeWarpSlot\" \
|
||||||
|
\"$waitForNodeInit\" \
|
||||||
"
|
"
|
||||||
) >> "$logFile" 2>&1 &
|
) >> "$logFile" 2>&1 &
|
||||||
declare pid=$!
|
declare pid=$!
|
||||||
|
@ -591,6 +593,19 @@ deploy() {
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
if ! $waitForNodeInit; then
|
||||||
|
# Handle async init
|
||||||
|
declare startTime=$SECONDS
|
||||||
|
for ipAddress in "${validatorIpList[@]}" "${blockstreamerIpList[@]}"; do
|
||||||
|
declare timeWaited=$((SECONDS - startTime))
|
||||||
|
if [[ $timeWaited -gt 600 ]]; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
ssh "${sshOptions[@]}" -n "$ipAddress" \
|
||||||
|
"./solana/net/remote/remote-node-wait-init.sh $((600 - timeWaited))"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
|
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
|
||||||
additionalNodeDeployTime=$SECONDS
|
additionalNodeDeployTime=$SECONDS
|
||||||
|
|
||||||
|
@ -748,6 +763,7 @@ netemCommand="add"
|
||||||
clientDelayStart=0
|
clientDelayStart=0
|
||||||
netLogDir=
|
netLogDir=
|
||||||
maybeWarpSlot=
|
maybeWarpSlot=
|
||||||
|
waitForNodeInit=true
|
||||||
|
|
||||||
command=$1
|
command=$1
|
||||||
[[ -n $command ]] || usage
|
[[ -n $command ]] || usage
|
||||||
|
@ -854,6 +870,9 @@ while [[ -n $1 ]]; do
|
||||||
elif [[ $1 == --warp-slot ]]; then
|
elif [[ $1 == --warp-slot ]]; then
|
||||||
maybeWarpSlot="$1 $2"
|
maybeWarpSlot="$1 $2"
|
||||||
shift 2
|
shift 2
|
||||||
|
elif [[ $1 == --async-node-init ]]; then
|
||||||
|
waitForNodeInit=false
|
||||||
|
shift 1
|
||||||
else
|
else
|
||||||
usage "Unknown long option: $1"
|
usage "Unknown long option: $1"
|
||||||
fi
|
fi
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
initCompleteFile=init-complete-node.log
|
||||||
|
waitTime=${1:=600}
|
||||||
|
|
||||||
|
waitForNodeToInit() {
|
||||||
|
declare hostname
|
||||||
|
hostname=$(hostname)
|
||||||
|
echo "--- waiting for $hostname to boot up"
|
||||||
|
declare startTime=$SECONDS
|
||||||
|
while [[ ! -r $initCompleteFile ]]; do
|
||||||
|
declare timeWaited=$((SECONDS - startTime))
|
||||||
|
if [[ $timeWaited -ge $waitTime ]]; then
|
||||||
|
echo "^^^ +++"
|
||||||
|
echo "Error: $initCompleteFile not found in $timeWaited seconds"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
echo "Waiting for $initCompleteFile ($timeWaited) on $hostname..."
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
echo "$hostname booted up"
|
||||||
|
}
|
||||||
|
|
||||||
|
cd ~/solana
|
||||||
|
waitForNodeToInit
|
|
@ -27,6 +27,7 @@ extraNodeArgs="${18}"
|
||||||
gpuMode="${19:-auto}"
|
gpuMode="${19:-auto}"
|
||||||
GEOLOCATION_API_KEY="${20}"
|
GEOLOCATION_API_KEY="${20}"
|
||||||
maybeWarpSlot="${21}"
|
maybeWarpSlot="${21}"
|
||||||
|
waitForNodeInit="${22}"
|
||||||
set +x
|
set +x
|
||||||
|
|
||||||
missing() {
|
missing() {
|
||||||
|
@ -92,22 +93,6 @@ case "$gpuMode" in
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
waitForNodeToInit() {
|
|
||||||
hostname=$(hostname)
|
|
||||||
echo "--- waiting for $hostname to boot up"
|
|
||||||
SECONDS=
|
|
||||||
while [[ ! -r $initCompleteFile ]]; do
|
|
||||||
if [[ $SECONDS -ge 600 ]]; then
|
|
||||||
echo "^^^ +++"
|
|
||||||
echo "Error: $initCompleteFile not found in $SECONDS seconds"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Waiting for $initCompleteFile ($SECONDS) on $hostname..."
|
|
||||||
sleep 5
|
|
||||||
done
|
|
||||||
echo "$hostname booted up"
|
|
||||||
}
|
|
||||||
|
|
||||||
case $deployMethod in
|
case $deployMethod in
|
||||||
local|tar|skip)
|
local|tar|skip)
|
||||||
PATH="$HOME"/.cargo/bin:"$PATH"
|
PATH="$HOME"/.cargo/bin:"$PATH"
|
||||||
|
@ -258,7 +243,11 @@ cat >> ~/solana/on-reboot <<EOF
|
||||||
disown
|
disown
|
||||||
EOF
|
EOF
|
||||||
~/solana/on-reboot
|
~/solana/on-reboot
|
||||||
waitForNodeToInit
|
|
||||||
|
if $waitForNodeInit; then
|
||||||
|
net/remote/remote-node-wait-init.sh 600
|
||||||
|
fi
|
||||||
|
|
||||||
;;
|
;;
|
||||||
validator|blockstreamer)
|
validator|blockstreamer)
|
||||||
if [[ $deployMethod != skip ]]; then
|
if [[ $deployMethod != skip ]]; then
|
||||||
|
@ -377,7 +366,10 @@ cat >> ~/solana/on-reboot <<EOF
|
||||||
disown
|
disown
|
||||||
EOF
|
EOF
|
||||||
~/solana/on-reboot
|
~/solana/on-reboot
|
||||||
waitForNodeToInit
|
|
||||||
|
if $waitForNodeInit; then
|
||||||
|
net/remote/remote-node-wait-init.sh 600
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ $skipSetup != true && $nodeType != blockstreamer ]]; then
|
if [[ $skipSetup != true && $nodeType != blockstreamer ]]; then
|
||||||
# Wait for the validator to catch up to the bootstrap validator before
|
# Wait for the validator to catch up to the bootstrap validator before
|
||||||
|
|
|
@ -129,11 +129,16 @@ function launch_testnet() {
|
||||||
maybeWarpSlot="--warp-slot $WARP_SLOT"
|
maybeWarpSlot="--warp-slot $WARP_SLOT"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
declare maybeAsyncNodeInit
|
||||||
|
if [[ "$ASYNC_NODE_INIT" = "true" ]]; then
|
||||||
|
maybeAsyncNodeInit="--async-node-init"
|
||||||
|
fi
|
||||||
|
|
||||||
# shellcheck disable=SC2068
|
# shellcheck disable=SC2068
|
||||||
# shellcheck disable=SC2086
|
# shellcheck disable=SC2086
|
||||||
"${REPO_ROOT}"/net/net.sh start $version_args \
|
"${REPO_ROOT}"/net/net.sh start $version_args \
|
||||||
-c idle=$NUMBER_OF_CLIENT_NODES $maybeStartAllowBootFailures \
|
-c idle=$NUMBER_OF_CLIENT_NODES $maybeStartAllowBootFailures \
|
||||||
--gpu-mode $startGpuMode $maybeWarpSlot
|
--gpu-mode $startGpuMode $maybeWarpSlot $maybeAsyncNodeInit
|
||||||
|
|
||||||
execution_step "Waiting for bootstrap validator's stake to fall below ${BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD}%"
|
execution_step "Waiting for bootstrap validator's stake to fall below ${BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD}%"
|
||||||
wait_for_bootstrap_validator_stake_drop "$BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD"
|
wait_for_bootstrap_validator_stake_drop "$BOOTSTRAP_VALIDATOR_MAX_STAKE_THRESHOLD"
|
||||||
|
|
Loading…
Reference in New Issue