solana/net/net.sh

870 lines
25 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
set -e
here=$(dirname "$0")
SOLANA_ROOT="$(cd "$here"/..; pwd)"
# shellcheck source=net/common.sh
source "$here"/common.sh
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
2018-09-04 15:16:25 -07:00
usage: $0 [start|stop|restart|sanity] [command-specific options]
2018-09-03 21:15:55 -07:00
Operate a configured testnet
2018-09-04 09:21:03 -07:00
start - Start the network
sanity - Sanity check the network
stop - Stop the network
restart - Shortcut for stop then start
update - Live update all network nodes
logs - Fetch remote logs from each network node
startnode- Start an individual node (previously stopped with stopNode)
stopnode - Stop an individual node
2018-09-03 21:15:55 -07:00
start/update-specific options:
-T [tarFilename] - Deploy the specified release tarball
-t edge|beta|stable|vX.Y.Z - Deploy the latest tarball release for the
specified release channel (edge|beta|stable) or release tag
(vX.Y.Z)
--deploy-update linux|osx|windows - Deploy the tarball using 'solana-install deploy ...' for the
given platform (multiple platforms may be specified)
(-t option must be supplied as well)
-f [cargoFeatures] - List of |cargo --feaures=| to activate
(ignored if -s or -S is specified)
-r - Reuse existing node/ledger configuration from a
previous |start| (ie, don't run ./multinode-demo/setup.sh).
-d / --debug - Build/deploy the testnet with debug binaries
-D /path/to/programs - Deploy custom programs from this location
-c clientType=numClients=extraArgs - Number of clientTypes to start. This options can be specified
more than once. Defaults to bench-tps for all clients if not
specified.
Valid client types are:
bench-tps
bench-exchange
User can optionally provide extraArgs that are transparently
supplied to the client program as command line parameters.
For example,
-c bench-tps=2="--tx_count 25000"
This will start 2 bench-tps clients, and supply "--tx_count 25000"
to the bench-tps client.
-n NUM_FULL_NODES - Number of fullnodes to apply command to.
2018-09-03 21:15:55 -07:00
--hashes-per-tick NUM_HASHES|sleep|auto
- Override the default --hashes-per-tick for the cluster
--no-airdrop
- If set, disables airdrops. Nodes must be funded in genesis block when airdrops are disabled.
--lamports NUM_LAMPORTS_TO_MINT
- Override the default 100000000000000 lamports minted in genesis
--internal-nodes-stake-lamports NUM_LAMPORTS_PER_NODE
- Amount to stake internal nodes.
--internal-nodes-lamports NUM_LAMPORTS_PER_NODE
- Amount to fund internal nodes in genesis block.
--external-accounts-file FILE_PATH
- A YML file with a list of account pubkeys and corresponding lamport balances in genesis block for external nodes
--no-snapshot-fetch
- If set, disables booting validators from a snapshot
--skip-ledger-verify
- If set, validators will skip verifying
the ledger they already have saved to disk at
boot (results in a much faster boot)
--no-deploy
- Don't deploy new software, use the
existing deployment
sanity/start/update-specific options:
-F - Discard validator nodes that didn't bootup successfully
2018-09-03 22:33:40 -07:00
-o noLedgerVerify - Skip ledger verification
-o noValidatorSanity - Skip fullnode sanity
-o noInstallCheck - Skip solana-install sanity
-o rejectExtraNodes - Require the exact number of nodes
2018-09-03 22:33:40 -07:00
2018-09-03 21:15:55 -07:00
stop-specific options:
none
logs-specific options:
none
startnode/stopnode-specific options:
-i [ip address] - IP Address of the node to start or stop
Note: if RUST_LOG is set in the environment it will be propogated into the
network nodes.
EOF
exit $exitcode
}
releaseChannel=
2018-09-03 21:15:55 -07:00
deployMethod=local
2018-09-03 22:33:40 -07:00
sanityExtraArgs=
2018-09-04 23:01:07 -07:00
cargoFeatures=
skipSetup=false
updateNodes=false
customPrograms=
updatePlatforms=
nodeAddress=
numBenchTpsClients=0
numBenchExchangeClients=0
benchTpsExtraArgs=
benchExchangeExtraArgs=
failOnValidatorBootupFailure=true
genesisOptions=
numFullnodesRequested=
externalPrimordialAccountsFile=
remoteExternalPrimordialAccountsFile=
internalNodesStakeLamports=
internalNodesLamports=
maybeNoSnapshot=""
maybeSkipLedgerVerify=""
maybeDisableAirdrops=""
buildProfile="--release"
debugBuild=false
2018-09-03 21:15:55 -07:00
command=$1
[[ -n $command ]] || usage
shift
shortArgs=()
while [[ -n $1 ]]; do
if [[ ${1:0:2} = -- ]]; then
if [[ $1 = --hashes-per-tick ]]; then
genesisOptions="$genesisOptions $1 $2"
shift 2
elif [[ $1 = --target-lamports-per-signature ]]; then
genesisOptions="$genesisOptions $1 $2"
shift 2
elif [[ $1 = --lamports ]]; then
genesisOptions="$genesisOptions $1 $2"
shift 2
elif [[ $1 = --no-snapshot-fetch ]]; then
maybeNoSnapshot="$1"
shift 1
elif [[ $1 = --no-deploy ]]; then
deployMethod=skip
shift 1
elif [[ $1 = --skip-ledger-verify ]]; then
maybeSkipLedgerVerify="$1"
shift 1
elif [[ $1 = --deploy-update ]]; then
updatePlatforms="$updatePlatforms $2"
shift 2
elif [[ $1 = --internal-nodes-stake-lamports ]]; then
internalNodesStakeLamports="$2"
shift 2
elif [[ $1 = --internal-nodes-lamports ]]; then
internalNodesLamports="$2"
shift 2
elif [[ $1 = --external-accounts-file ]]; then
externalPrimordialAccountsFile="$2"
remoteExternalPrimordialAccountsFile=/tmp/external-primordial-accounts.yml
shift 2
elif [[ $1 = --no-airdrop ]]; then
maybeDisableAirdrops="$1"
shift 1
elif [[ $1 = --debug ]]; then
debugBuild=true
shift 1
else
usage "Unknown long option: $1"
fi
else
shortArgs+=("$1")
shift
fi
done
while getopts "h?T:t:o:f:rD:c:Fn:i:d" opt "${shortArgs[@]}"; do
case $opt in
h | \?)
usage
;;
2018-11-07 13:32:48 -08:00
T)
tarballFilename=$OPTARG
2019-03-02 17:08:46 -08:00
[[ -r $tarballFilename ]] || usage "File not readable: $tarballFilename"
2018-11-07 13:32:48 -08:00
deployMethod=tar
;;
t)
case $OPTARG in
2018-11-06 15:02:55 -08:00
edge|beta|stable|v*)
releaseChannel=$OPTARG
deployMethod=tar
;;
*)
usage "Invalid release channel: $OPTARG"
;;
esac
;;
2018-09-04 23:01:07 -07:00
f)
cargoFeatures=$OPTARG
;;
n)
numFullnodesRequested=$OPTARG
;;
r)
skipSetup=true
;;
D)
customPrograms=$OPTARG
;;
2018-09-03 22:33:40 -07:00
o)
case $OPTARG in
noLedgerVerify|noValidatorSanity|rejectExtraNodes|noInstallCheck)
2018-09-03 22:33:40 -07:00
sanityExtraArgs="$sanityExtraArgs -o $OPTARG"
;;
*)
usage "Unknown option: $OPTARG"
2018-09-03 22:33:40 -07:00
;;
esac
;;
c)
getClientTypeAndNum() {
if ! [[ $OPTARG == *'='* ]]; then
echo "Error: Expecting tuple \"clientType=numClientType=extraArgs\" but got \"$OPTARG\""
exit 1
fi
local keyValue
IFS='=' read -ra keyValue <<< "$OPTARG"
local clientType=${keyValue[0]}
local numClients=${keyValue[1]}
local extraArgs=${keyValue[2]}
re='^[0-9]+$'
if ! [[ $numClients =~ $re ]] ; then
echo "error: numClientType must be a number but got \"$numClients\""
exit 1
fi
case $clientType in
bench-tps)
numBenchTpsClients=$numClients
benchTpsExtraArgs=$extraArgs
;;
bench-exchange)
numBenchExchangeClients=$numClients
benchExchangeExtraArgs=$extraArgs
;;
*)
echo "Unknown client type: $clientType"
exit 1
;;
esac
}
getClientTypeAndNum
;;
F)
failOnValidatorBootupFailure=false
;;
2019-06-16 23:30:11 -07:00
i)
nodeAddress=$OPTARG
;;
d)
debugBuild=true
;;
*)
usage "Error: unhandled option: $opt"
;;
esac
done
$debugBuild && buildProfile=""
loadConfigFile
if [[ -n $numFullnodesRequested ]]; then
truncatedNodeList=( "${fullnodeIpList[@]:0:$numFullnodesRequested}" )
unset fullnodeIpList
fullnodeIpList=( "${truncatedNodeList[@]}" )
fi
numClients=${#clientIpList[@]}
numClientsRequested=$((numBenchTpsClients+numBenchExchangeClients))
if [[ "$numClientsRequested" -eq 0 ]]; then
numBenchTpsClients=$numClients
numClientsRequested=$((numBenchTpsClients+numBenchExchangeClients))
else
if [[ "$numClientsRequested" -gt "$numClients" ]]; then
echo "Error: More clients requested ($numClientsRequested) then available ($numClients)"
exit 1
fi
fi
annotate() {
[[ -z $BUILDKITE ]] || {
buildkite-agent annotate "$@"
}
}
annotateBlockexplorerUrl() {
declare blockstreamer=${blockstreamerIpList[0]}
if [[ -n $blockstreamer ]]; then
annotate --style info --context blockexplorer-url "Block explorer: http://$blockstreamer/"
fi
}
build() {
supported=("18.04")
declare MAYBE_DOCKER=
if [[ $(uname) != Linux || ! " ${supported[*]} " =~ $(lsb_release -sr) ]]; then
# shellcheck source=ci/rust-version.sh
source "$SOLANA_ROOT"/ci/rust-version.sh
2019-04-15 20:27:52 -07:00
MAYBE_DOCKER="ci/docker-run.sh $rust_stable_docker_image"
fi
SECONDS=0
(
cd "$SOLANA_ROOT"
2018-09-04 09:21:03 -07:00
echo "--- Build started at $(date)"
set -x
rm -rf farf
2018-11-14 19:19:27 -08:00
if [[ -r target/perf-libs/env.sh ]]; then
# shellcheck source=/dev/null
source target/perf-libs/env.sh
fi
2018-11-16 08:04:46 -08:00
$MAYBE_DOCKER bash -c "
set -ex
scripts/cargo-install-all.sh farf \"$cargoFeatures\" \"$buildProfile\"
if [[ -n \"$customPrograms\" ]]; then
scripts/cargo-install-custom-programs.sh farf $customPrograms
fi
2018-11-16 08:04:46 -08:00
"
)
echo "Build took $SECONDS seconds"
}
2018-09-07 08:49:22 -07:00
startCommon() {
declare ipAddress=$1
2018-09-04 15:16:25 -07:00
test -d "$SOLANA_ROOT"
2019-01-08 22:11:31 -08:00
if $skipSetup; then
ssh "${sshOptions[@]}" "$ipAddress" "
set -x;
mkdir -p ~/solana/config{,-local}
rm -rf ~/config{,-local};
mv ~/solana/config{,-local} ~;
rm -rf ~/solana;
mkdir -p ~/solana ~/.cargo/bin;
mv ~/config{,-local} ~/solana/
"
else
ssh "${sshOptions[@]}" "$ipAddress" "
set -x;
rm -rf ~/solana;
mkdir -p ~/.cargo/bin
"
fi
[[ -z "$externalNodeSshKey" ]] || ssh-copy-id -f -i "$externalNodeSshKey" "${sshOptions[@]}" "solana@$ipAddress"
2018-09-04 15:16:25 -07:00
rsync -vPrc -e "ssh ${sshOptions[*]}" \
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
"$ipAddress":~/solana/
}
startBootstrapLeader() {
declare ipAddress=$1
declare nodeIndex="$2"
declare logFile="$3"
echo "--- Starting bootstrap leader: $ipAddress"
2018-09-08 13:48:17 -07:00
echo "start log: $logFile"
# Deploy local binaries to bootstrap fullnode. Other fullnodes and clients later fetch the
# binaries from it
(
set -x
2018-09-07 08:49:22 -07:00
startCommon "$ipAddress" || exit 1
[[ -z "$externalPrimordialAccountsFile" ]] || rsync -vPrc -e "ssh ${sshOptions[*]}" "$externalPrimordialAccountsFile" \
"$ipAddress:$remoteExternalPrimordialAccountsFile"
2018-09-03 21:15:55 -07:00
case $deployMethod in
tar)
rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/solana-release/bin/* "$ipAddress:~/.cargo/bin/"
;;
2018-09-03 21:15:55 -07:00
local)
2018-09-04 15:16:25 -07:00
rsync -vPrc -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/"
2018-09-03 21:15:55 -07:00
;;
skip)
;;
2018-09-03 21:15:55 -07:00
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
# shellcheck disable=SC2086 # Don't want to double quote "$maybeNoSnapshot $maybeSkipLedgerVerify"
2018-09-04 09:21:03 -07:00
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote-node.sh \
$deployMethod \
bootstrap-leader \
$entrypointIp \
$((${#fullnodeIpList[@]} + ${#blockstreamerIpList[@]} + ${#replicatorIpList[@]})) \
\"$RUST_LOG\" \
$skipSetup \
$failOnValidatorBootupFailure \
\"$remoteExternalPrimordialAccountsFile\" \
\"$maybeDisableAirdrops\" \
\"$internalNodesStakeLamports\" \
\"$internalNodesLamports\" \
$nodeIndex \
$numBenchTpsClients \"$benchTpsExtraArgs\" \
$numBenchExchangeClients \"$benchExchangeExtraArgs\" \
\"$genesisOptions\" \
"$maybeNoSnapshot $maybeSkipLedgerVerify" \
"
2018-09-04 15:16:25 -07:00
) >> "$logFile" 2>&1 || {
cat "$logFile"
echo "^^^ +++"
exit 1
}
}
startNode() {
declare ipAddress=$1
declare nodeType=$2
declare nodeIndex="$3"
declare logFile="$netLogDir/fullnode-$ipAddress.log"
if [[ -z $nodeType ]]; then
echo nodeType not specified
exit 1
fi
if [[ -z $nodeIndex ]]; then
echo nodeIndex not specified
exit 1
fi
echo "--- Starting $nodeType: $ipAddress"
2018-09-08 13:48:17 -07:00
echo "start log: $logFile"
(
set -x
2018-09-07 08:49:22 -07:00
startCommon "$ipAddress"
if [[ $nodeType = blockstreamer ]] && [[ -n $letsEncryptDomainName ]]; then
#
# Create/renew TLS certificate
#
declare localArchive=~/letsencrypt-"$letsEncryptDomainName".tgz
if [[ -r "$localArchive" ]]; then
timeout 30s scp "${sshOptions[@]}" "$localArchive" "$ipAddress:letsencrypt.tgz"
fi
ssh "${sshOptions[@]}" -n "$ipAddress" \
"sudo -H /certbot-restore.sh $letsEncryptDomainName maintainers@solana.com"
rm -f letsencrypt.tgz
timeout 30s scp "${sshOptions[@]}" "$ipAddress:/letsencrypt.tgz" letsencrypt.tgz
test -s letsencrypt.tgz # Ensure non-empty before overwriting $localArchive
cp letsencrypt.tgz "$localArchive"
fi
2018-09-04 09:21:03 -07:00
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote-node.sh \
$deployMethod \
$nodeType \
$entrypointIp \
$((${#fullnodeIpList[@]} + ${#blockstreamerIpList[@]} + ${#replicatorIpList[@]})) \
\"$RUST_LOG\" \
$skipSetup \
$failOnValidatorBootupFailure \
\"$remoteExternalPrimordialAccountsFile\" \
\"$maybeDisableAirdrops\" \
\"$internalNodesStakeLamports\" \
\"$internalNodesLamports\" \
$nodeIndex \
$numBenchTpsClients \"$benchTpsExtraArgs\" \
$numBenchExchangeClients \"$benchExchangeExtraArgs\" \
\"$genesisOptions\" \
\"$maybeNoSnapshot $maybeSkipLedgerVerify\" \
"
2018-09-08 13:48:17 -07:00
) >> "$logFile" 2>&1 &
2018-09-04 09:21:03 -07:00
declare pid=$!
ln -sf "fullnode-$ipAddress.log" "$netLogDir/fullnode-$pid.log"
2018-09-04 09:21:03 -07:00
pids+=("$pid")
}
startClient() {
declare ipAddress=$1
declare clientToRun="$2"
declare clientIndex="$3"
declare logFile="$netLogDir/client-$clientToRun-$ipAddress.log"
echo "--- Starting client: $ipAddress - $clientToRun"
2018-09-08 13:48:17 -07:00
echo "start log: $logFile"
2018-09-03 22:33:40 -07:00
(
set -x
2018-09-07 08:49:22 -07:00
startCommon "$ipAddress"
2018-09-03 22:33:40 -07:00
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote-client.sh $deployMethod $entrypointIp \
$clientToRun \"$RUST_LOG\" \"$benchTpsExtraArgs\" \"$benchExchangeExtraArgs\" $clientIndex"
2018-09-04 15:16:25 -07:00
) >> "$logFile" 2>&1 || {
cat "$logFile"
echo "^^^ +++"
exit 1
}
}
2018-09-03 22:33:40 -07:00
sanity() {
declare skipBlockstreamerSanity=$1
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
2019-04-09 17:16:10 -07:00
declare ok=true
2019-04-09 16:20:44 -07:00
declare bootstrapLeader=${fullnodeIpList[0]}
declare blockstreamer=${blockstreamerIpList[0]}
2019-04-09 17:16:10 -07:00
annotateBlockexplorerUrl
2019-04-09 17:16:10 -07:00
echo "--- Sanity: $bootstrapLeader"
2018-09-03 22:33:40 -07:00
(
set -x
2018-09-04 14:36:35 -07:00
# shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
2019-04-09 16:20:44 -07:00
ssh "${sshOptions[@]}" "$bootstrapLeader" \
"./solana/net/remote/remote-sanity.sh $bootstrapLeader $sanityExtraArgs \"$RUST_LOG\""
2019-04-09 17:16:10 -07:00
) || ok=false
$ok || exit 1
2019-04-09 16:20:44 -07:00
if [[ -z $skipBlockstreamerSanity && -n $blockstreamer ]]; then
2019-04-09 16:20:44 -07:00
# If there's a blockstreamer node run a reduced sanity check on it as well
2019-04-09 17:16:10 -07:00
echo "--- Sanity: $blockstreamer"
(
set -x
2019-04-09 16:20:44 -07:00
# shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
ssh "${sshOptions[@]}" "$blockstreamer" \
"./solana/net/remote/remote-sanity.sh $blockstreamer $sanityExtraArgs -o noLedgerVerify -o noValidatorSanity \"$RUST_LOG\""
2019-04-09 17:16:10 -07:00
) || ok=false
$ok || exit 1
fi
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-sanity-complete=1"
2018-09-03 22:33:40 -07:00
}
2018-09-03 21:15:55 -07:00
deployUpdate() {
if [[ -z $updatePlatforms ]]; then
return
fi
[[ $deployMethod = tar ]] || exit 1
declare ok=true
declare bootstrapLeader=${fullnodeIpList[0]}
for updatePlatform in $updatePlatforms; do
echo "--- Deploying solana-install update: $updatePlatform"
(
set -x
scripts/solana-install-update-manifest-keypair.sh "$updatePlatform"
timeout 30s scp "${sshOptions[@]}" \
update_manifest_keypair.json "$bootstrapLeader:solana/update_manifest_keypair.json"
# shellcheck disable=SC2029 # remote-deploy-update.sh args are expanded on client side intentionally
ssh "${sshOptions[@]}" "$bootstrapLeader" \
"./solana/net/remote/remote-deploy-update.sh $releaseChannel $updatePlatform"
) || ok=false
$ok || exit 1
done
}
getNodeType() {
echo "getNodeType: $nodeAddress"
[[ -n $nodeAddress ]] || {
echo "Error: nodeAddress not set"
exit 1
}
nodeIndex=0 # <-- global
nodeType=validator # <-- global
for ipAddress in "${fullnodeIpList[@]}" b "${blockstreamerIpList[@]}" r "${replicatorIpList[@]}"; do
if [[ $ipAddress = b ]]; then
nodeType=blockstreamer
continue
elif [[ $ipAddress = r ]]; then
nodeType=replicator
continue
fi
if [[ $ipAddress = "$nodeAddress" ]]; then
echo "getNodeType: $nodeType ($nodeIndex)"
return
fi
((nodeIndex = nodeIndex + 1))
done
echo "Error: Unknown node: $nodeAddress"
exit 1
}
prepare_deploy() {
2018-09-03 21:15:55 -07:00
case $deployMethod in
tar)
if [[ -n $releaseChannel ]]; then
rm -f "$SOLANA_ROOT"/solana-release.tar.bz2
declare updateDownloadUrl=http://release.solana.com/"$releaseChannel"/solana-release-x86_64-unknown-linux-gnu.tar.bz2
(
set -x
2019-07-02 08:37:10 -07:00
curl --retry 5 --retry-delay 2 --retry-connrefused \
-o "$SOLANA_ROOT"/solana-release.tar.bz2 "$updateDownloadUrl"
)
tarballFilename="$SOLANA_ROOT"/solana-release.tar.bz2
else
if [[ -n $updatePlatforms ]]; then
echo "Error: --deploy-update argument was provided but -t was not"
exit 1
fi
fi
(
set -x
rm -rf "$SOLANA_ROOT"/solana-release
(cd "$SOLANA_ROOT"; tar jxv) < "$tarballFilename"
cat "$SOLANA_ROOT"/solana-release/version.yml
)
;;
2018-09-03 21:15:55 -07:00
local)
build
;;
skip)
;;
2018-09-03 21:15:55 -07:00
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
}
2018-09-03 21:15:55 -07:00
deploy() {
echo "Deployment started at $(date)"
if $updateNodes; then
$metricsWriteDatapoint "testnet-deploy net-update-begin=1"
else
$metricsWriteDatapoint "testnet-deploy net-start-begin=1"
fi
declare bootstrapLeader=true
for nodeAddress in "${fullnodeIpList[@]}" "${blockstreamerIpList[@]}" "${replicatorIpList[@]}"; do
nodeType=
nodeIndex=
getNodeType
if $bootstrapLeader; then
SECONDS=0
declare bootstrapNodeDeployTime=
startBootstrapLeader "$nodeAddress" $nodeIndex "$netLogDir/bootstrap-leader-$ipAddress.log"
bootstrapNodeDeployTime=$SECONDS
$metricsWriteDatapoint "testnet-deploy net-bootnode-leader-started=1"
bootstrapLeader=false
SECONDS=0
pids=()
else
startNode "$ipAddress" $nodeType $nodeIndex
# Stagger additional node start time. If too many nodes start simultaneously
# the bootstrap node gets more rsync requests from the additional nodes than
# it can handle.
if ((nodeIndex % 2 == 0)); then
sleep 2
fi
fi
2018-09-04 09:21:03 -07:00
done
2018-09-04 09:21:03 -07:00
for pid in "${pids[@]}"; do
declare ok=true
wait "$pid" || ok=false
if ! $ok; then
echo "+++ fullnode failed to start"
cat "$netLogDir/fullnode-$pid.log"
if $failOnValidatorBootupFailure; then
exit 1
else
echo "Failure is non-fatal"
fi
2018-09-04 09:21:03 -07:00
fi
done
2018-09-04 09:21:03 -07:00
$metricsWriteDatapoint "testnet-deploy net-fullnodes-started=1"
additionalNodeDeployTime=$SECONDS
annotateBlockexplorerUrl
if $updateNodes; then
for ipAddress in "${clientIpList[@]}"; do
2019-04-30 10:39:52 -07:00
stopNode "$ipAddress" true
done
fi
sanity skipBlockstreamerSanity # skip sanity on blockstreamer node, it may not
# have caught up to the bootstrap leader yet
2018-09-03 22:33:40 -07:00
2018-09-03 21:15:55 -07:00
SECONDS=0
for ((i=0; i < "$numClients" && i < "$numClientsRequested"; i++)) do
if [[ $i -lt "$numBenchTpsClients" ]]; then
startClient "${clientIpList[$i]}" "solana-bench-tps" "$i"
else
startClient "${clientIpList[$i]}" "solana-bench-exchange" $((i-numBenchTpsClients))
fi
done
clientDeployTime=$SECONDS
if $updateNodes; then
$metricsWriteDatapoint "testnet-deploy net-update-complete=1"
else
$metricsWriteDatapoint "testnet-deploy net-start-complete=1"
fi
declare networkVersion=unknown
case $deployMethod in
tar)
networkVersion="$(
(
set -o pipefail
2019-04-25 11:13:45 -07:00
grep "^commit: " "$SOLANA_ROOT"/solana-release/version.yml | head -n1 | cut -d\ -f2
) || echo "tar-unknown"
)"
;;
local)
networkVersion="$(git rev-parse HEAD || echo local-unknown)"
;;
skip)
;;
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
$metricsWriteDatapoint "testnet-deploy version=\"${networkVersion:0:9}\""
2018-09-03 21:15:55 -07:00
deployUpdate
echo
2018-09-08 14:12:32 -07:00
echo "+++ Deployment Successful"
echo "Bootstrap leader deployment took $bootstrapNodeDeployTime seconds"
2019-05-31 15:27:31 -07:00
echo "Additional fullnode deployment (${#fullnodeIpList[@]} full nodes, ${#blockstreamerIpList[@]} blockstreamer nodes, ${#replicatorIpList[@]} replicators) took $additionalNodeDeployTime seconds"
echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
echo "Network start logs in $netLogDir"
}
2018-09-07 08:49:22 -07:00
stopNode() {
local ipAddress=$1
2019-04-30 10:39:52 -07:00
local block=$2
declare logFile="$netLogDir/stop-fullnode-$ipAddress.log"
2018-09-04 09:21:03 -07:00
echo "--- Stopping node: $ipAddress"
2019-04-30 10:39:52 -07:00
echo "stop log: $logFile"
(
set -x
2019-01-09 21:06:58 -08:00
# shellcheck disable=SC2029 # It's desired that PS4 be expanded on the client side
ssh "${sshOptions[@]}" "$ipAddress" "
PS4=\"$PS4\"
2018-09-07 08:34:42 -07:00
set -x
! tmux list-sessions || tmux kill-session
for pid in solana/{net-stats,fd-monitor,oom-monitor}.pid; do
pgid=\$(ps opgid= \$(cat \$pid) | tr -d '[:space:]')
if [[ -n \$pgid ]]; then
sudo kill -- -\$pgid
fi
done
for pattern in node solana- remote-; do
2018-09-07 08:34:42 -07:00
pkill -9 \$pattern
done
"
2019-04-30 10:39:52 -07:00
) >> "$logFile" 2>&1 &
declare pid=$!
ln -sf "stop-fullnode-$ipAddress.log" "$netLogDir/stop-fullnode-$pid.log"
2019-04-30 10:39:52 -07:00
if $block; then
wait $pid
else
pids+=("$pid")
fi
}
stop() {
SECONDS=0
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
2018-09-03 21:15:55 -07:00
2019-04-30 10:39:52 -07:00
declare loopCount=0
pids=()
2019-05-31 15:27:31 -07:00
for ipAddress in "${fullnodeIpList[@]}" "${blockstreamerIpList[@]}" "${replicatorIpList[@]}" "${clientIpList[@]}"; do
2019-04-30 10:39:52 -07:00
stopNode "$ipAddress" false
# Stagger additional node stop time to avoid too many concurrent ssh
# sessions
((loopCount++ % 4 == 0)) && sleep 2
done
2019-04-30 10:39:52 -07:00
echo --- Waiting for nodes to finish stopping
for pid in "${pids[@]}"; do
echo -n "$pid "
wait "$pid" || true
done
echo
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-stop-complete=1"
echo "Stopping nodes took $SECONDS seconds"
}
2018-09-03 22:33:40 -07:00
case $command in
2018-09-04 09:21:03 -07:00
restart)
prepare_deploy
2018-09-03 22:33:40 -07:00
stop
deploy
2018-09-03 22:33:40 -07:00
;;
2018-09-04 09:21:03 -07:00
start)
prepare_deploy
deploy
2018-09-04 09:21:03 -07:00
;;
update)
skipSetup=true
updateNodes=true
start
;;
2018-09-03 22:33:40 -07:00
sanity)
sanity
;;
stop)
stop
;;
2019-06-16 23:30:11 -07:00
stopnode)
if [[ -z $nodeAddress ]]; then
usage "node address (-i) not specified"
exit 1
fi
2019-06-16 23:30:11 -07:00
stopNode "$nodeAddress" true
;;
startnode)
if [[ -z $nodeAddress ]]; then
usage "node address (-i) not specified"
exit 1
fi
nodeType=
nodeIndex=
getNodeType
startNode "$nodeAddress" $nodeType $nodeIndex
2019-06-16 23:30:11 -07:00
;;
logs)
fetchRemoteLog() {
declare ipAddress=$1
declare log=$2
echo "--- fetching $log from $ipAddress"
(
set -x
timeout 30s scp "${sshOptions[@]}" \
2018-12-23 10:33:40 -08:00
"$ipAddress":solana/"$log".log "$netLogDir"/remote-"$log"-"$ipAddress".log
) || echo "failed to fetch log"
}
fetchRemoteLog "${fullnodeIpList[0]}" drone
for ipAddress in "${fullnodeIpList[@]}"; do
fetchRemoteLog "$ipAddress" fullnode
done
for ipAddress in "${clientIpList[@]}"; do
fetchRemoteLog "$ipAddress" client
done
for ipAddress in "${blockstreamerIpList[@]}"; do
fetchRemoteLog "$ipAddress" fullnode
done
2019-05-31 15:27:31 -07:00
for ipAddress in "${replicatorIpList[@]}"; do
fetchRemoteLog "$ipAddress" fullnode
done
;;
2018-09-03 22:33:40 -07:00
*)
echo "Internal error: Unknown command: $command"
usage
2018-09-03 22:33:40 -07:00
exit 1
esac