#!/bin/bash -e here=$(dirname "$0") SOLANA_ROOT="$(cd "$here"/..; pwd)" # shellcheck source=net/common.sh source "$here"/common.sh usage() { exitcode=0 if [[ -n "$1" ]]; then exitcode=1 echo "Error: $*" fi cat <> "$logFile" 2>&1 || { cat "$logFile" echo "^^^ +++" exit 1 } } startValidator() { declare ipAddress=$1 declare logFile="$netLogDir/validator-$ipAddress.log" echo "--- Starting validator: $leaderIp" echo "start log: $logFile" ( set -x startCommon "$ipAddress" ssh "${sshOptions[@]}" -n "$ipAddress" \ "./solana/net/remote/remote-node.sh $deployMethod validator $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\"" ) >> "$logFile" 2>&1 & declare pid=$! ln -sfT "validator-$ipAddress.log" "$netLogDir/validator-$pid.log" pids+=("$pid") } startClient() { declare ipAddress=$1 declare logFile="$2" echo "--- Starting client: $ipAddress" echo "start log: $logFile" ( set -x startCommon "$ipAddress" ssh "${sshOptions[@]}" -f "$ipAddress" \ "./solana/net/remote/remote-client.sh $deployMethod $entrypointIp $expectedNodeCount \"$RUST_LOG\"" ) >> "$logFile" 2>&1 || { cat "$logFile" echo "^^^ +++" exit 1 } } sanity() { declare expectedNodeCount=$((${#validatorIpList[@]} + 1)) declare ok=true echo "--- Sanity" $metricsWriteDatapoint "testnet-deploy net-sanity-begin=1" ( set -x # shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally ssh "${sshOptions[@]}" "$leaderIp" \ "./solana/net/remote/remote-sanity.sh $sanityExtraArgs" ) || ok=false $metricsWriteDatapoint "testnet-deploy net-sanity-complete=1" $ok || exit 1 } start() { case $deployMethod in snap) if [[ -n $snapChannel ]]; then rm -f "$SOLANA_ROOT"/solana_*.snap if [[ $(uname) != Linux ]]; then ( set -x SOLANA_DOCKER_RUN_NOSETUID=1 "$SOLANA_ROOT"/ci/docker-run.sh ubuntu:18.04 bash -c " set -ex; apt-get -qq update; apt-get -qq -y install snapd; snap download --channel=$snapChannel solana; " ) else ( cd "$SOLANA_ROOT" snap download --channel="$snapChannel" solana ) fi snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)" [[ -r $snapFilename ]] || { echo "Error: Snap not readable: $snapFilename" exit 1 } fi ;; local) build ;; *) usage "Internal error: invalid deployMethod: $deployMethod" ;; esac echo "Deployment started at $(date)" $metricsWriteDatapoint "testnet-deploy net-start-begin=1" SECONDS=0 declare leaderDeployTime= startLeader "$leaderIp" "$netLogDir/leader-$leaderIp.log" leaderDeployTime=$SECONDS $metricsWriteDatapoint "testnet-deploy net-leader-started=1" SECONDS=0 pids=() loopCount=0 for ipAddress in "${validatorIpList[@]}"; do startValidator "$ipAddress" # Staggering validator startup time. If too many validators # bootup simultaneously, leader node gets more rsync requests # from the validators than it can handle. ((loopCount++ % 2 == 0)) && sleep 2 done for pid in "${pids[@]}"; do declare ok=true wait "$pid" || ok=false if ! $ok; then cat "$netLogDir/validator-$pid.log" echo ^^^ +++ exit 1 fi done $metricsWriteDatapoint "testnet-deploy net-validators-started=1" validatorDeployTime=$SECONDS sanity SECONDS=0 for ipAddress in "${clientIpList[@]}"; do startClient "$ipAddress" "$netLogDir/client-$ipAddress.log" done clientDeployTime=$SECONDS $metricsWriteDatapoint "testnet-deploy net-start-complete=1" if [[ $deployMethod = "snap" ]]; then declare networkVersion=unknown IFS=\ read -r _ networkVersion _ < <( ssh "${sshOptions[@]}" "$leaderIp" \ "snap info solana | grep \"^installed:\"" ) networkVersion=${networkVersion/0+git./} $metricsWriteDatapoint "testnet-deploy version=\"$networkVersion\"" fi echo echo "+++ Deployment Successful" echo "Leader deployment took $leaderDeployTime seconds" echo "Validator deployment (${#validatorIpList[@]} instances) took $validatorDeployTime seconds" echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds" echo "Network start logs in $netLogDir:" ls -l "$netLogDir" } stopNode() { local ipAddress=$1 echo "--- Stopping node: $ipAddress" ( set -x ssh "${sshOptions[@]}" "$ipAddress" " set -x if snap list solana; then sudo snap set solana mode= sudo snap remove solana fi ! tmux list-sessions || tmux kill-session for pattern in solana- remote- oom-monitor net-stats; do pkill -9 \$pattern done " ) || true } stop() { SECONDS=0 $metricsWriteDatapoint "testnet-deploy net-stop-begin=1" stopNode "$leaderIp" for ipAddress in "${validatorIpList[@]}" "${clientIpList[@]}"; do stopNode "$ipAddress" done $metricsWriteDatapoint "testnet-deploy net-stop-complete=1" echo "Stopping nodes took $SECONDS seconds" } case $command in restart) stop start ;; start) start ;; sanity) sanity ;; stop) stop ;; *) echo "Internal error: Unknown command: $command" exit 1 esac