diff --git a/multinode-demo/fullnode.sh b/multinode-demo/fullnode.sh index ef7b82951f..d9f10f6e1e 100755 --- a/multinode-demo/fullnode.sh +++ b/multinode-demo/fullnode.sh @@ -220,6 +220,9 @@ while [[ -n $1 ]]; do elif [[ $1 = --no-voting ]]; then args+=("$1") shift + elif [[ $1 = --skip-ledger-verify ]]; then + args+=("$1") + shift elif [[ $1 = --no-sigverify ]]; then args+=("$1") shift diff --git a/net/net.sh b/net/net.sh index 7fb094a31f..dfbda6ba88 100755 --- a/net/net.sh +++ b/net/net.sh @@ -24,6 +24,8 @@ Operate a configured testnet restart - Shortcut for stop then start update - Live update all network nodes logs - Fetch remote logs from each network node + startnode- Start an individual node (previously stopped with stopNode) + stopnode - Stop an individual node start/update-specific options: -T [tarFilename] - Deploy the specified release tarball @@ -62,6 +64,10 @@ Operate a configured testnet - A YML file with a list of account pubkeys and corresponding stakes for external nodes --no-snapshot - If set, disables booting validators from a snapshot + --skip-ledger-verify + - If set, validators will skip verifying + the ledger they already have saved to disk at + boot (results in a much faster boot) sanity/start/update-specific options: -F - Discard validator nodes that didn't bootup successfully -o noLedgerVerify - Skip ledger verification @@ -75,6 +81,9 @@ Operate a configured testnet logs-specific options: none + startnode/stopnode-specific options: + -i [ip address] - IP Address of the node to start or stop + Note: if RUST_LOG is set in the environment it will be propogated into the network nodes. EOF @@ -89,6 +98,7 @@ skipSetup=false updateNodes=false customPrograms= updatePlatforms= +nodeAddress= numBenchTpsClients=0 numBenchExchangeClients=0 benchTpsExtraArgs= @@ -100,6 +110,7 @@ externalPrimordialAccountsFile= remoteExternalPrimordialAccountsFile= stakeNodesInGenesisBlock= maybeNoSnapshot="" +maybeSkipLedgerVerify="" command=$1 [[ -n $command ]] || usage @@ -120,6 +131,9 @@ while [[ -n $1 ]]; do elif [[ $1 = --no-snapshot ]]; then maybeNoSnapshot="$1" shift 1 + elif [[ $1 = --skip-ledger-verify ]]; then + maybeSkipLedgerVerify="$1" + shift 1 elif [[ $1 = --deploy-update ]]; then updatePlatforms="$updatePlatforms $2" shift 2 @@ -320,8 +334,8 @@ startCommon() { startBootstrapLeader() { declare ipAddress=$1 - declare logFile="$2" - declare nodeIndex="$3" + declare nodeIndex="$2" + declare logFile="$3" echo "--- Starting bootstrap leader: $ipAddress" echo "start log: $logFile" @@ -344,6 +358,7 @@ startBootstrapLeader() { ;; esac + # shellcheck disable=SC2086 # Don't want to double quote "$maybeNoSnapshot $maybeSkipLedgerVerify" ssh "${sshOptions[@]}" -n "$ipAddress" \ "./solana/net/remote/remote-node.sh \ $deployMethod \ @@ -359,7 +374,7 @@ startBootstrapLeader() { $numBenchTpsClients \"$benchTpsExtraArgs\" \ $numBenchExchangeClients \"$benchExchangeExtraArgs\" \ \"$genesisOptions\" \ - $maybeNoSnapshot \ + "$maybeNoSnapshot $maybeSkipLedgerVerify" \ " ) >> "$logFile" 2>&1 || { cat "$logFile" @@ -374,6 +389,16 @@ startNode() { declare nodeIndex="$3" declare logFile="$netLogDir/fullnode-$ipAddress.log" + if [[ -z $nodeType ]]; then + echo nodeType not specified + exit 1 + fi + + if [[ -z $nodeIndex ]]; then + echo nodeIndex not specified + exit 1 + fi + echo "--- Starting $nodeType: $ipAddress" echo "start log: $logFile" ( @@ -408,8 +433,10 @@ startNode() { \"$remoteExternalPrimordialAccountsFile\" \ \"$stakeNodesInGenesisBlock\" \ $nodeIndex \ + $numBenchTpsClients \"$benchTpsExtraArgs\" \ + $numBenchExchangeClients \"$benchExchangeExtraArgs\" \ \"$genesisOptions\" \ - $maybeNoSnapshot \ + \"$maybeNoSnapshot $maybeSkipLedgerVerify\" \ " ) >> "$logFile" 2>&1 & declare pid=$! @@ -495,7 +522,35 @@ deployUpdate() { ) || ok=false $ok || exit 1 done +} +getNodeType() { + echo "getNodeType: $nodeAddress" + [[ -n $nodeAddress ]] || { + echo "Error: nodeAddress not set" + exit 1 + } + nodeIndex=0 # <-- global + nodeType=validator # <-- global + + for ipAddress in "${fullnodeIpList[@]}" b "${blockstreamerIpList[@]}" r "${replicatorIpList[@]}"; do + if [[ $ipAddress = b ]]; then + nodeType=blockstreamer + continue + elif [[ $ipAddress = r ]]; then + nodeType=replicator + continue + fi + + if [[ $ipAddress = "$nodeAddress" ]]; then + echo "getNodeType: $nodeType ($nodeIndex)" + return + fi + ((nodeIndex = nodeIndex + 1)) + done + + echo "Error: Unknown node: $nodeAddress" + exit 1 } start() { @@ -539,23 +594,14 @@ start() { fi declare bootstrapLeader=true - declare nodeType=validator - declare loopCount=0 - for ipAddress in "${fullnodeIpList[@]}" b "${blockstreamerIpList[@]}" r "${replicatorIpList[@]}"; do - if [[ $ipAddress = b ]]; then - nodeType=blockstreamer - continue - elif [[ $ipAddress = r ]]; then - nodeType=replicator - continue - fi - if $updateNodes; then - stopNode "$ipAddress" true - fi + for nodeAddress in "${fullnodeIpList[@]}" "${blockstreamerIpList[@]}" "${replicatorIpList[@]}"; do + nodeType= + nodeIndex= + getNodeType if $bootstrapLeader; then SECONDS=0 declare bootstrapNodeDeployTime= - startBootstrapLeader "$ipAddress" "$netLogDir/bootstrap-leader-$ipAddress.log" $loopCount + startBootstrapLeader "$nodeAddress" $nodeIndex "$netLogDir/bootstrap-leader-$ipAddress.log" bootstrapNodeDeployTime=$SECONDS $metricsWriteDatapoint "testnet-deploy net-bootnode-leader-started=1" @@ -563,15 +609,18 @@ start() { SECONDS=0 pids=() else - startNode "$ipAddress" $nodeType $loopCount + startNode "$ipAddress" $nodeType $nodeIndex # Stagger additional node start time. If too many nodes start simultaneously # the bootstrap node gets more rsync requests from the additional nodes than # it can handle. - ((loopCount++ % 2 == 0)) && sleep 2 + if ((nodeIndex % 2 == 0)); then + sleep 2 + fi fi done + for pid in "${pids[@]}"; do declare ok=true wait "$pid" || ok=false @@ -723,10 +772,21 @@ stop) stop ;; stopnode) + if [[ -z $nodeAddress ]]; then + usage "node address (-i) not specified" + exit 1 + fi stopNode "$nodeAddress" true ;; startnode) - startNode "$nodeAddress" validator + if [[ -z $nodeAddress ]]; then + usage "node address (-i) not specified" + exit 1 + fi + nodeType= + nodeIndex= + getNodeType + startNode "$nodeAddress" $nodeType $nodeIndex ;; logs) fetchRemoteLog() { diff --git a/net/remote/remote-node.sh b/net/remote/remote-node.sh index d75d889654..3f3dafa70e 100755 --- a/net/remote/remote-node.sh +++ b/net/remote/remote-node.sh @@ -19,7 +19,7 @@ benchTpsExtraArgs="${12}" numBenchExchangeClients="${13}" benchExchangeExtraArgs="${14}" genesisOptions="${15}" -noSnapshot="${16}" +extraNodeArgs="${16}" set +x export RUST_LOG @@ -171,7 +171,8 @@ local|tar) args+=(--no-airdrop) fi args+=(--init-complete-file "$initCompleteFile") - args+=("$noSnapshot") + # shellcheck disable=SC2206 # Don't want to double quote $extraNodeArgs + args+=($extraNodeArgs) nohup ./multinode-demo/validator.sh --bootstrap-leader "${args[@]}" > fullnode.log 2>&1 & waitForNodeToInit ;; @@ -249,7 +250,8 @@ local|tar) fi args+=(--init-complete-file "$initCompleteFile") - args+=("$noSnapshot") + # shellcheck disable=SC2206 # Don't want to double quote $extraNodeArgs + args+=($extraNodeArgs) nohup ./multinode-demo/validator.sh "${args[@]}" > fullnode.log 2>&1 & waitForNodeToInit ;; @@ -267,7 +269,8 @@ local|tar) if [[ $skipSetup != true ]]; then ./multinode-demo/clear-config.sh fi - args+=("$noSnapshot") + # shellcheck disable=SC2206 # Don't want to double quote $extraNodeArgs + args+=($extraNodeArgs) nohup ./multinode-demo/replicator.sh "${args[@]}" > fullnode.log 2>&1 & sleep 1 ;;