#!/usr/bin/env bash set -e here=$(dirname "$0") SOLANA_ROOT="$(cd "$here"/..; pwd)" # shellcheck source=net/common.sh source "$here"/common.sh usage() { exitcode=0 if [[ -n "$1" ]]; then exitcode=1 echo "Error: $*" fi cat <> "$logFile" 2>&1 || { cat "$logFile" echo "^^^ +++" exit 1 } } startNode() { declare ipAddress=$1 declare nodeType=$2 declare logFile="$netLogDir/fullnode-$ipAddress.log" echo "--- Starting $nodeType: $ipAddress" echo "start log: $logFile" ( set -x startCommon "$ipAddress" ssh "${sshOptions[@]}" -n "$ipAddress" \ "./solana/net/remote/remote-node.sh \ $deployMethod \ $nodeType \ $entrypointIp \ $((${#fullnodeIpList[@]} + ${#blockstreamerIpList[@]})) \ \"$RUST_LOG\" \ $skipSetup \ $failOnValidatorBootupFailure \ \"$genesisOptions\" \ " ) >> "$logFile" 2>&1 & declare pid=$! ln -sf "fullnode-$ipAddress.log" "$netLogDir/fullnode-$pid.log" pids+=("$pid") } startClient() { declare ipAddress=$1 declare clientToRun="$2" declare logFile="$netLogDir/client-$clientToRun-$ipAddress.log" echo "--- Starting client: $ipAddress - $clientToRun" echo "start log: $logFile" ( set -x startCommon "$ipAddress" ssh "${sshOptions[@]}" -f "$ipAddress" \ "./solana/net/remote/remote-client.sh $deployMethod $entrypointIp \ $clientToRun \"$RUST_LOG\" \"$benchTpsExtraArgs\" \"$benchExchangeExtraArgs\"" ) >> "$logFile" 2>&1 || { cat "$logFile" echo "^^^ +++" exit 1 } } sanity() { $metricsWriteDatapoint "testnet-deploy net-sanity-begin=1" declare ok=true declare bootstrapLeader=${fullnodeIpList[0]} declare blockstreamer=${blockstreamerIpList[0]} annotateBlockexplorerUrl echo "--- Sanity: $bootstrapLeader" ( set -x # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally ssh "${sshOptions[@]}" "$bootstrapLeader" \ "./solana/net/remote/remote-sanity.sh $sanityExtraArgs \"$RUST_LOG\"" ) || ok=false $ok || exit 1 if [[ -n $blockstreamer ]]; then # If there's a blockstreamer node run a reduced sanity check on it as well echo "--- Sanity: $blockstreamer" ( set -x # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally ssh "${sshOptions[@]}" "$blockstreamer" \ "./solana/net/remote/remote-sanity.sh $sanityExtraArgs -o noLedgerVerify -o noValidatorSanity \"$RUST_LOG\"" ) || ok=false $ok || exit 1 fi $metricsWriteDatapoint "testnet-deploy net-sanity-complete=1" } deployUpdate() { if [[ -z $updateManifestKeypairFile ]]; then return fi [[ $deployMethod = tar ]] || exit 1 [[ -n $updateDownloadUrl ]] || exit 1 declare ok=true declare bootstrapLeader=${fullnodeIpList[0]} echo "--- Deploying solana-install update: $updateDownloadUrl" ( set -x timeout 30s scp "${sshOptions[@]}" \ "$updateManifestKeypairFile" "$bootstrapLeader:solana/update_manifest_keypair.json" # shellcheck disable=SC2029 # remote-deploy-update.sh args are expanded on client side intentionally ssh "${sshOptions[@]}" "$bootstrapLeader" \ "./solana/net/remote/remote-deploy-update.sh $updateDownloadUrl \"$RUST_LOG\"" ) || ok=false $ok || exit 1 } start() { case $deployMethod in tar) if [[ -n $releaseChannel ]]; then rm -f "$SOLANA_ROOT"/solana-release.tar.bz2 updateDownloadUrl=http://release.solana.com/"$releaseChannel"/solana-release-x86_64-unknown-linux-gnu.tar.bz2 ( set -x curl -o "$SOLANA_ROOT"/solana-release.tar.bz2 "$updateDownloadUrl" ) tarballFilename="$SOLANA_ROOT"/solana-release.tar.bz2 else if [[ -n $updateManifestKeypairFile ]]; then echo "Error: -i argument was provided but -t was not" exit 1 fi fi ( set -x rm -rf "$SOLANA_ROOT"/solana-release (cd "$SOLANA_ROOT"; tar jxv) < "$tarballFilename" cat "$SOLANA_ROOT"/solana-release/version.yml ) ;; local) build ;; *) usage "Internal error: invalid deployMethod: $deployMethod" ;; esac echo "Deployment started at $(date)" if $updateNodes; then $metricsWriteDatapoint "testnet-deploy net-update-begin=1" else $metricsWriteDatapoint "testnet-deploy net-start-begin=1" fi declare bootstrapLeader=true declare nodeType=fullnode declare loopCount=0 for ipAddress in "${fullnodeIpList[@]}" - "${blockstreamerIpList[@]}"; do if [[ $ipAddress = - ]]; then nodeType=blockstreamer continue fi if $updateNodes; then stopNode "$ipAddress" true fi if $bootstrapLeader; then SECONDS=0 declare bootstrapNodeDeployTime= startBootstrapLeader "$ipAddress" "$netLogDir/bootstrap-leader-$ipAddress.log" bootstrapNodeDeployTime=$SECONDS $metricsWriteDatapoint "testnet-deploy net-bootnode-leader-started=1" bootstrapLeader=false SECONDS=0 pids=() else startNode "$ipAddress" $nodeType # Stagger additional node start time. If too many nodes start simultaneously # the bootstrap node gets more rsync requests from the additional nodes than # it can handle. ((loopCount++ % 2 == 0)) && sleep 2 fi done for pid in "${pids[@]}"; do declare ok=true wait "$pid" || ok=false if ! $ok; then echo "+++ fullnode failed to start" cat "$netLogDir/fullnode-$pid.log" if $failOnValidatorBootupFailure; then exit 1 else echo "Failure is non-fatal" fi fi done $metricsWriteDatapoint "testnet-deploy net-fullnodes-started=1" additionalNodeDeployTime=$SECONDS annotateBlockexplorerUrl if $updateNodes; then for ipAddress in "${clientIpList[@]}"; do stopNode "$ipAddress" true done fi sanity SECONDS=0 for ((i=0; i < "$numClients" && i < "$numClientsRequested"; i++)) do if [[ $i -lt "$numBenchTpsClients" ]]; then startClient "${clientIpList[$i]}" "solana-bench-tps" else startClient "${clientIpList[$i]}" "solana-bench-exchange" fi done clientDeployTime=$SECONDS if $updateNodes; then $metricsWriteDatapoint "testnet-deploy net-update-complete=1" else $metricsWriteDatapoint "testnet-deploy net-start-complete=1" fi declare networkVersion=unknown case $deployMethod in tar) networkVersion="$( ( set -o pipefail grep "^commit: " "$SOLANA_ROOT"/solana-release/version.yml | head -n1 | cut -d\ -f2 ) || echo "tar-unknown" )" ;; local) networkVersion="$(git rev-parse HEAD || echo local-unknown)" ;; *) usage "Internal error: invalid deployMethod: $deployMethod" ;; esac $metricsWriteDatapoint "testnet-deploy version=\"${networkVersion:0:9}\"" deployUpdate echo echo "+++ Deployment Successful" echo "Bootstrap leader deployment took $bootstrapNodeDeployTime seconds" echo "Additional fullnode deployment (${#fullnodeIpList[@]} full nodes, ${#blockstreamerIpList[@]} blockstreamer nodes) took $additionalNodeDeployTime seconds" echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds" echo "Network start logs in $netLogDir:" ls -l "$netLogDir" } stopNode() { local ipAddress=$1 local block=$2 declare logFile="$netLogDir/stop-fullnode-$ipAddress.log" echo "--- Stopping node: $ipAddress" echo "stop log: $logFile" ( set -x # shellcheck disable=SC2029 # It's desired that PS4 be expanded on the client side ssh "${sshOptions[@]}" "$ipAddress" " PS4=\"$PS4\" set -x ! tmux list-sessions || tmux kill-session for pid in solana/{net-stats,oom-monitor}.pid; do pgid=\$(ps opgid= \$(cat \$pid) | tr -d '[:space:]') sudo kill -- -\$pgid done for pattern in node solana- remote-; do pkill -9 \$pattern done " ) >> "$logFile" 2>&1 & declare pid=$! ln -sf "stop-fullnode-$ipAddress.log" "$netLogDir/stop-fullnode-$pid.log" if $block; then wait $pid else pids+=("$pid") fi } stop() { SECONDS=0 $metricsWriteDatapoint "testnet-deploy net-stop-begin=1" declare loopCount=0 pids=() for ipAddress in "${fullnodeIpList[@]}" "${blockstreamerIpList[@]}" "${clientIpList[@]}"; do stopNode "$ipAddress" false # Stagger additional node stop time to avoid too many concurrent ssh # sessions ((loopCount++ % 4 == 0)) && sleep 2 done echo --- Waiting for nodes to finish stopping for pid in "${pids[@]}"; do echo -n "$pid " wait "$pid" || true done echo $metricsWriteDatapoint "testnet-deploy net-stop-complete=1" echo "Stopping nodes took $SECONDS seconds" } case $command in restart) stop start ;; start) start ;; update) skipSetup=true updateNodes=true start ;; sanity) sanity ;; stop) stop ;; logs) fetchRemoteLog() { declare ipAddress=$1 declare log=$2 echo "--- fetching $log from $ipAddress" ( set -x timeout 30s scp "${sshOptions[@]}" \ "$ipAddress":solana/"$log".log "$netLogDir"/remote-"$log"-"$ipAddress".log ) || echo "failed to fetch log" } fetchRemoteLog "${fullnodeIpList[0]}" drone for ipAddress in "${fullnodeIpList[@]}"; do fetchRemoteLog "$ipAddress" fullnode done for ipAddress in "${clientIpList[@]}"; do fetchRemoteLog "$ipAddress" client done for ipAddress in "${blockstreamerIpList[@]}"; do fetchRemoteLog "$ipAddress" fullnode done ;; *) echo "Internal error: Unknown command: $command" usage exit 1 esac