#!/usr/bin/env bash set -e here=$(dirname "$0") SOLANA_ROOT="$(cd "$here"/..; pwd)" # shellcheck source=net/common.sh source "$here"/common.sh usage() { exitcode=0 if [[ -n "$1" ]]; then exitcode=1 echo "Error: $*" fi cat <> "$logFile" 2>&1 || { cat "$logFile" echo "^^^ +++" exit 1 } } startNode() { declare ipAddress=$1 declare logFile="$netLogDir/fullnode-$ipAddress.log" echo "--- Starting fullnode: $ipAddress" echo "start log: $logFile" ( set -x startCommon "$ipAddress" ssh "${sshOptions[@]}" -n "$ipAddress" \ "./solana/net/remote/remote-node.sh \ $deployMethod \ fullnode \ $publicNetwork \ $entrypointIp \ ${#fullnodeIpList[@]} \ \"$RUST_LOG\" \ $skipSetup \ $leaderRotation \ " ) >> "$logFile" 2>&1 & declare pid=$! ln -sfT "fullnode-$ipAddress.log" "$netLogDir/fullnode-$pid.log" pids+=("$pid") } startClient() { declare ipAddress=$1 declare logFile="$2" echo "--- Starting client: $ipAddress" echo "start log: $logFile" ( set -x startCommon "$ipAddress" ssh "${sshOptions[@]}" -f "$ipAddress" \ "./solana/net/remote/remote-client.sh $deployMethod $entrypointIp \"$RUST_LOG\"" ) >> "$logFile" 2>&1 || { cat "$logFile" echo "^^^ +++" exit 1 } } sanity() { declare ok=true echo "--- Sanity" $metricsWriteDatapoint "testnet-deploy net-sanity-begin=1" declare host=${fullnodeIpList[0]} ( set -x # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally ssh "${sshOptions[@]}" "$host" \ "./solana/net/remote/remote-sanity.sh $sanityExtraArgs \"$RUST_LOG\"" ) || ok=false $metricsWriteDatapoint "testnet-deploy net-sanity-complete=1" $ok || exit 1 } start() { case $deployMethod in snap) if [[ -n $snapChannel ]]; then rm -f "$SOLANA_ROOT"/solana_*.snap if [[ $(uname) != Linux ]]; then ( set -x SOLANA_DOCKER_RUN_NOSETUID=1 "$SOLANA_ROOT"/ci/docker-run.sh ubuntu:18.04 bash -c " set -ex; apt-get -qq update; apt-get -qq -y install snapd; until snap download --channel=$snapChannel solana; do sleep 1; done " ) else ( cd "$SOLANA_ROOT" until snap download --channel="$snapChannel" solana; do sleep 1 done ) fi snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)" [[ -r $snapFilename ]] || { echo "Error: Snap not readable: $snapFilename" exit 1 } fi ;; tar) if [[ -n $releaseChannel ]]; then rm -f "$SOLANA_ROOT"/solana-release.tar.bz2 ( set -x curl -o "$SOLANA_ROOT"/solana-release.tar.bz2 http://solana-release.s3.amazonaws.com/"$releaseChannel"/solana-release.tar.bz2 ) tarballFilename="$SOLANA_ROOT"/solana-release.tar.bz2 fi ( set -x rm -rf "$SOLANA_ROOT"/solana-release (cd "$SOLANA_ROOT"; tar jxv) < "$tarballFilename" cat "$SOLANA_ROOT"/solana-release/version.txt ) ;; local) build ;; *) usage "Internal error: invalid deployMethod: $deployMethod" ;; esac echo "Deployment started at $(date)" if $updateNodes; then $metricsWriteDatapoint "testnet-deploy net-update-begin=1" else $metricsWriteDatapoint "testnet-deploy net-start-begin=1" fi bootstrapLeader=true for ipAddress in "${fullnodeIpList[@]}"; do if $updateNodes; then stopNode "$ipAddress" fi if $bootstrapLeader; then SECONDS=0 declare bootstrapNodeDeployTime= startBootstrapLeader "$ipAddress" "$netLogDir/bootstrap-leader-$ipAddress.log" bootstrapNodeDeployTime=$SECONDS $metricsWriteDatapoint "testnet-deploy net-bootnode-leader-started=1" bootstrapLeader=false SECONDS=0 pids=() loopCount=0 else startNode "$ipAddress" # Stagger additional node start time. If too many nodes start simultaneously # the bootstrap node gets more rsync requests from the additional nodes than # it can handle. ((loopCount++ % 2 == 0)) && sleep 2 fi done for pid in "${pids[@]}"; do declare ok=true wait "$pid" || ok=false if ! $ok; then cat "$netLogDir/fullnode-$pid.log" echo ^^^ +++ exit 1 fi done $metricsWriteDatapoint "testnet-deploy net-fullnodes-started=1" additionalNodeDeployTime=$SECONDS if $updateNodes; then for ipAddress in "${clientIpList[@]}"; do stopNode "$ipAddress" done fi sanity SECONDS=0 for ipAddress in "${clientIpList[@]}"; do startClient "$ipAddress" "$netLogDir/client-$ipAddress.log" done clientDeployTime=$SECONDS if $updateNodes; then $metricsWriteDatapoint "testnet-deploy net-update-complete=1" else $metricsWriteDatapoint "testnet-deploy net-start-complete=1" fi declare networkVersion=unknown case $deployMethod in snap) IFS=\ read -r _ networkVersion _ < <( ssh "${sshOptions[@]}" "${fullnodeIpList[0]}" \ "snap info solana | grep \"^installed:\"" ) networkVersion=${networkVersion/0+git./} ;; tar) networkVersion="$( tail -n1 "$SOLANA_ROOT"/solana-release/version.txt || echo "tar-unknown" )" ;; local) networkVersion="$(git rev-parse HEAD || echo local-unknown)" ;; *) usage "Internal error: invalid deployMethod: $deployMethod" ;; esac $metricsWriteDatapoint "testnet-deploy version=\"${networkVersion:0:9}\"" echo echo "+++ Deployment Successful" echo "Bootstrap leader deployment took $bootstrapNodeDeployTime seconds" echo "Additional fullnode deployment (${#fullnodeIpList[@]} instances) took $additionalNodeDeployTime seconds" echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds" echo "Network start logs in $netLogDir:" ls -l "$netLogDir" } stopNode() { local ipAddress=$1 echo "--- Stopping node: $ipAddress" ( set -x # shellcheck disable=SC2029 # It's desired that PS4 be expanded on the client side ssh "${sshOptions[@]}" "$ipAddress" " PS4=\"$PS4\" set -x if snap list solana; then sudo snap set solana mode= fi ! tmux list-sessions || tmux kill-session for pid in solana/{net-stats,oom-monitor}.pid; do pgid=\$(ps opgid= \$(cat \$pid) | tr -d '[:space:]') sudo kill -- -\$pgid done for pattern in solana- remote-; do pkill -9 \$pattern done " ) || true } stop() { SECONDS=0 $metricsWriteDatapoint "testnet-deploy net-stop-begin=1" for ipAddress in "${fullnodeIpList[@]}" "${clientIpList[@]}"; do stopNode "$ipAddress" done $metricsWriteDatapoint "testnet-deploy net-stop-complete=1" echo "Stopping nodes took $SECONDS seconds" } case $command in restart) stop start ;; start) start ;; update) $leaderRotation || { echo Warning: unable to update because leader rotation is disabled exit 1 } skipSetup=true updateNodes=true start ;; sanity) sanity ;; stop) stop ;; logs) fetchRemoteLog() { declare ipAddress=$1 declare log=$2 echo "--- fetching $log from $ipAddress" ( set -x timeout 30s scp "${sshOptions[@]}" \ "$ipAddress":solana/"$log".log "$netLogDir"/remote-"$log"-"$ipAddress".log ) || echo "failed to fetch log" } fetchRemoteLog "${fullnodeIpList[0]}" drone for ipAddress in "${fullnodeIpList[@]}"; do fetchRemoteLog "$ipAddress" fullnode done for ipAddress in "${clientIpList[@]}"; do fetchRemoteLog "$ipAddress" client done ;; *) echo "Internal error: Unknown command: $command" exit 1 esac