solana/net/net.sh

361 lines
8.4 KiB
Bash
Raw Normal View History

#!/bin/bash -e
here=$(dirname "$0")
SOLANA_ROOT="$(cd "$here"/..; pwd)"
# shellcheck source=net/common.sh
source "$here"/common.sh
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
2018-09-04 15:16:25 -07:00
usage: $0 [start|stop|restart|sanity] [command-specific options]
2018-09-03 21:15:55 -07:00
Operate a configured testnet
2018-09-04 09:21:03 -07:00
start - Start the network
sanity - Sanity check the network
stop - Stop the network
restart - Shortcut for stop then start
2018-09-03 21:15:55 -07:00
start-specific options:
2018-09-04 23:01:07 -07:00
-S [snapFilename] - Deploy the specified Snap file
2018-09-03 22:33:40 -07:00
-s edge|beta|stable - Deploy the latest Snap on the specified Snap release channel
2018-09-04 23:01:07 -07:00
-f [cargoFeatures] - List of |cargo --feaures=| to activate
2018-09-06 10:08:57 -07:00
(ignored if -s or -S is specified)
2018-09-03 21:15:55 -07:00
Note: if RUST_LOG is set in the environment it will be propogated into the
network nodes.
2018-09-04 09:21:03 -07:00
sanity/start-specific options:
2018-09-03 22:33:40 -07:00
-o noLedgerVerify - Skip ledger verification
2018-09-06 15:22:03 -07:00
-o noValidatorSanity - Skip validator sanity
-o rejectExtraNodes - Require the exact number of nodes
2018-09-03 22:33:40 -07:00
2018-09-03 21:15:55 -07:00
stop-specific options:
none
EOF
exit $exitcode
}
2018-09-03 21:15:55 -07:00
snapChannel=
snapFilename=
deployMethod=local
2018-09-03 22:33:40 -07:00
sanityExtraArgs=
2018-09-04 23:01:07 -07:00
cargoFeatures=
2018-09-03 21:15:55 -07:00
command=$1
[[ -n $command ]] || usage
shift
2018-09-04 23:01:07 -07:00
while getopts "h?S:s:o:f:" opt; do
case $opt in
h | \?)
usage
;;
2018-09-03 21:15:55 -07:00
S)
snapFilename=$OPTARG
[[ -f $snapFilename ]] || usage "Snap not readable: $snapFilename"
deployMethod=snap
;;
s)
case $OPTARG in
edge|beta|stable)
snapChannel=$OPTARG
2018-09-04 09:21:03 -07:00
deployMethod=snap
2018-09-03 21:15:55 -07:00
;;
*)
usage "Invalid snap channel: $OPTARG"
;;
esac
;;
2018-09-04 23:01:07 -07:00
f)
cargoFeatures=$OPTARG
;;
2018-09-03 22:33:40 -07:00
o)
case $OPTARG in
noLedgerVerify|noValidatorSanity|rejectExtraNodes)
2018-09-03 22:33:40 -07:00
sanityExtraArgs="$sanityExtraArgs -o $OPTARG"
;;
*)
echo "Error: unknown option: $OPTARG"
exit 1
;;
esac
;;
*)
usage "Error: unhandled option: $opt"
;;
esac
done
loadConfigFile
2018-09-04 09:21:03 -07:00
expectedNodeCount=$((${#validatorIpList[@]} + 1))
build() {
declare MAYBE_DOCKER=
if [[ $(uname) != Linux ]]; then
MAYBE_DOCKER="ci/docker-run.sh solanalabs/rust"
fi
SECONDS=0
(
cd "$SOLANA_ROOT"
2018-09-04 09:21:03 -07:00
echo "--- Build started at $(date)"
set -x
rm -rf farf
2018-09-04 23:01:07 -07:00
$MAYBE_DOCKER cargo install --features="$cargoFeatures" --root farf
2018-10-09 15:54:15 -07:00
./scripts/install-native-programs.sh farf/
)
echo "Build took $SECONDS seconds"
}
2018-09-07 08:49:22 -07:00
startCommon() {
declare ipAddress=$1
2018-09-04 15:16:25 -07:00
test -d "$SOLANA_ROOT"
ssh "${sshOptions[@]}" "$ipAddress" "mkdir -p ~/solana ~/.cargo/bin"
2018-09-04 15:16:25 -07:00
rsync -vPrc -e "ssh ${sshOptions[*]}" \
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
"$ipAddress":~/solana/
}
startLeader() {
declare ipAddress=$1
declare logFile="$2"
2018-09-04 09:21:03 -07:00
echo "--- Starting leader: $leaderIp"
2018-09-08 13:48:17 -07:00
echo "start log: $logFile"
2018-09-03 21:15:55 -07:00
# Deploy local binaries to leader. Validators and clients later fetch the
# binaries from the leader.
(
set -x
2018-09-07 08:49:22 -07:00
startCommon "$ipAddress" || exit 1
2018-09-03 21:15:55 -07:00
case $deployMethod in
snap)
2018-09-04 15:16:25 -07:00
rsync -vPrc -e "ssh ${sshOptions[*]}" "$snapFilename" "$ipAddress:~/solana/solana.snap"
2018-09-03 21:15:55 -07:00
;;
local)
2018-09-04 15:16:25 -07:00
rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/"
2018-09-03 21:15:55 -07:00
;;
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
2018-09-04 09:21:03 -07:00
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote-node.sh $deployMethod leader $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
2018-09-04 15:16:25 -07:00
) >> "$logFile" 2>&1 || {
cat "$logFile"
echo "^^^ +++"
exit 1
}
}
startValidator() {
declare ipAddress=$1
2018-09-08 13:48:17 -07:00
declare logFile="$netLogDir/validator-$ipAddress.log"
echo "--- Starting validator: $ipAddress"
2018-09-08 13:48:17 -07:00
echo "start log: $logFile"
(
set -x
2018-09-07 08:49:22 -07:00
startCommon "$ipAddress"
2018-09-04 09:21:03 -07:00
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote-node.sh $deployMethod validator $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
2018-09-08 13:48:17 -07:00
) >> "$logFile" 2>&1 &
2018-09-04 09:21:03 -07:00
declare pid=$!
ln -sfT "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
pids+=("$pid")
}
startClient() {
declare ipAddress=$1
declare logFile="$2"
2018-09-04 15:16:25 -07:00
echo "--- Starting client: $ipAddress"
2018-09-08 13:48:17 -07:00
echo "start log: $logFile"
2018-09-03 22:33:40 -07:00
(
set -x
2018-09-07 08:49:22 -07:00
startCommon "$ipAddress"
2018-09-03 22:33:40 -07:00
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote-client.sh $deployMethod $entrypointIp $expectedNodeCount \"$RUST_LOG\""
2018-09-04 15:16:25 -07:00
) >> "$logFile" 2>&1 || {
cat "$logFile"
echo "^^^ +++"
exit 1
}
}
2018-09-03 22:33:40 -07:00
sanity() {
declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
2018-09-06 13:00:01 -07:00
declare ok=true
2018-09-04 09:21:03 -07:00
echo "--- Sanity"
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
2018-09-03 22:33:40 -07:00
(
set -x
2018-09-04 14:36:35 -07:00
# shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
2018-09-03 22:33:40 -07:00
ssh "${sshOptions[@]}" "$leaderIp" \
2018-09-04 14:36:35 -07:00
"./solana/net/remote/remote-sanity.sh $sanityExtraArgs"
2018-09-06 13:00:01 -07:00
) || ok=false
$metricsWriteDatapoint "testnet-deploy net-sanity-complete=1"
$ok || exit 1
2018-09-03 22:33:40 -07:00
}
2018-09-03 21:15:55 -07:00
2018-09-03 22:33:40 -07:00
start() {
2018-09-03 21:15:55 -07:00
case $deployMethod in
snap)
if [[ -n $snapChannel ]]; then
2018-09-04 09:21:03 -07:00
rm -f "$SOLANA_ROOT"/solana_*.snap
2018-09-03 21:15:55 -07:00
if [[ $(uname) != Linux ]]; then
2018-09-04 09:21:03 -07:00
(
set -x
SOLANA_DOCKER_RUN_NOSETUID=1 "$SOLANA_ROOT"/ci/docker-run.sh ubuntu:18.04 bash -c "
set -ex;
apt-get -qq update;
apt-get -qq -y install snapd;
snap download --channel=$snapChannel solana;
"
)
else
(
cd "$SOLANA_ROOT"
snap download --channel="$snapChannel" solana
)
2018-09-03 21:15:55 -07:00
fi
2018-09-04 09:21:03 -07:00
snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)"
[[ -r $snapFilename ]] || {
echo "Error: Snap not readable: $snapFilename"
exit 1
}
2018-09-03 21:15:55 -07:00
fi
;;
local)
build
;;
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
echo "Deployment started at $(date)"
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-start-begin=1"
2018-09-03 21:15:55 -07:00
SECONDS=0
declare leaderDeployTime=
startLeader "$leaderIp" "$netLogDir/leader-$leaderIp.log"
leaderDeployTime=$SECONDS
$metricsWriteDatapoint "testnet-deploy net-leader-started=1"
2018-09-03 21:15:55 -07:00
SECONDS=0
2018-09-04 09:21:03 -07:00
pids=()
loopCount=0
for ipAddress in "${validatorIpList[@]}"; do
2018-09-04 09:21:03 -07:00
startValidator "$ipAddress"
# Staggering validator startup time. If too many validators
# bootup simultaneously, leader node gets more rsync requests
# from the validators than it can handle.
((loopCount++ % 2 == 0)) && sleep 2
2018-09-04 09:21:03 -07:00
done
for pid in "${pids[@]}"; do
declare ok=true
wait "$pid" || ok=false
if ! $ok; then
cat "$netLogDir/validator-$pid.log"
echo ^^^ +++
exit 1
fi
done
2018-09-04 09:21:03 -07:00
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
validatorDeployTime=$SECONDS
2018-09-03 22:33:40 -07:00
sanity
2018-09-03 21:15:55 -07:00
SECONDS=0
for ipAddress in "${clientIpList[@]}"; do
startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
done
clientDeployTime=$SECONDS
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-start-complete=1"
2018-09-03 21:15:55 -07:00
if [[ $deployMethod = "snap" ]]; then
declare networkVersion=unknown
2018-09-03 21:15:55 -07:00
IFS=\ read -r _ networkVersion _ < <(
ssh "${sshOptions[@]}" "$leaderIp" \
"snap info solana | grep \"^installed:\""
)
networkVersion=${networkVersion/0+git./}
2018-09-06 12:14:32 -07:00
$metricsWriteDatapoint "testnet-deploy version=\"$networkVersion\""
2018-09-03 21:15:55 -07:00
fi
echo
2018-09-08 14:12:32 -07:00
echo "+++ Deployment Successful"
echo "Leader deployment took $leaderDeployTime seconds"
echo "Validator deployment (${#validatorIpList[@]} instances) took $validatorDeployTime seconds"
echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
echo "Network start logs in $netLogDir:"
ls -l "$netLogDir"
}
2018-09-07 08:49:22 -07:00
stopNode() {
local ipAddress=$1
2018-09-04 09:21:03 -07:00
echo "--- Stopping node: $ipAddress"
(
set -x
ssh "${sshOptions[@]}" "$ipAddress" "
2018-09-07 08:34:42 -07:00
set -x
2018-09-03 21:15:55 -07:00
if snap list solana; then
2018-09-07 08:34:42 -07:00
sudo snap set solana mode=
sudo snap remove solana
fi
! tmux list-sessions || tmux kill-session
for pattern in solana- remote- oom-monitor net-stats; do
2018-09-07 08:34:42 -07:00
pkill -9 \$pattern
done
"
) || true
}
stop() {
SECONDS=0
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
2018-09-03 21:15:55 -07:00
2018-09-07 08:49:22 -07:00
stopNode "$leaderIp"
for ipAddress in "${validatorIpList[@]}" "${clientIpList[@]}"; do
2018-09-07 08:49:22 -07:00
stopNode "$ipAddress"
done
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-stop-complete=1"
echo "Stopping nodes took $SECONDS seconds"
}
2018-09-03 22:33:40 -07:00
case $command in
2018-09-04 09:21:03 -07:00
restart)
2018-09-03 22:33:40 -07:00
stop
start
;;
2018-09-04 09:21:03 -07:00
start)
start
;;
2018-09-03 22:33:40 -07:00
sanity)
sanity
;;
stop)
stop
;;
*)
echo "Internal error: Unknown command: $command"
exit 1
esac