net/: startnode/stopnode now works for blockstreamer/replicator nodes (#5146)

* startnode/stopnode now works for blockstreamer/replicator nodes

* Plumb --skip-ledger-verify through net/
This commit is contained in:
Michael Vines 2019-07-17 19:26:23 -07:00 committed by GitHub
parent 6ad9dc18d8
commit b58558ea4e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 91 additions and 25 deletions

View File

@ -220,6 +220,9 @@ while [[ -n $1 ]]; do
elif [[ $1 = --no-voting ]]; then
args+=("$1")
shift
elif [[ $1 = --skip-ledger-verify ]]; then
args+=("$1")
shift
elif [[ $1 = --no-sigverify ]]; then
args+=("$1")
shift

View File

@ -24,6 +24,8 @@ Operate a configured testnet
restart - Shortcut for stop then start
update - Live update all network nodes
logs - Fetch remote logs from each network node
startnode- Start an individual node (previously stopped with stopNode)
stopnode - Stop an individual node
start/update-specific options:
-T [tarFilename] - Deploy the specified release tarball
@ -62,6 +64,10 @@ Operate a configured testnet
- A YML file with a list of account pubkeys and corresponding stakes for external nodes
--no-snapshot
- If set, disables booting validators from a snapshot
--skip-ledger-verify
- If set, validators will skip verifying
the ledger they already have saved to disk at
boot (results in a much faster boot)
sanity/start/update-specific options:
-F - Discard validator nodes that didn't bootup successfully
-o noLedgerVerify - Skip ledger verification
@ -75,6 +81,9 @@ Operate a configured testnet
logs-specific options:
none
startnode/stopnode-specific options:
-i [ip address] - IP Address of the node to start or stop
Note: if RUST_LOG is set in the environment it will be propogated into the
network nodes.
EOF
@ -89,6 +98,7 @@ skipSetup=false
updateNodes=false
customPrograms=
updatePlatforms=
nodeAddress=
numBenchTpsClients=0
numBenchExchangeClients=0
benchTpsExtraArgs=
@ -100,6 +110,7 @@ externalPrimordialAccountsFile=
remoteExternalPrimordialAccountsFile=
stakeNodesInGenesisBlock=
maybeNoSnapshot=""
maybeSkipLedgerVerify=""
command=$1
[[ -n $command ]] || usage
@ -120,6 +131,9 @@ while [[ -n $1 ]]; do
elif [[ $1 = --no-snapshot ]]; then
maybeNoSnapshot="$1"
shift 1
elif [[ $1 = --skip-ledger-verify ]]; then
maybeSkipLedgerVerify="$1"
shift 1
elif [[ $1 = --deploy-update ]]; then
updatePlatforms="$updatePlatforms $2"
shift 2
@ -320,8 +334,8 @@ startCommon() {
startBootstrapLeader() {
declare ipAddress=$1
declare logFile="$2"
declare nodeIndex="$3"
declare nodeIndex="$2"
declare logFile="$3"
echo "--- Starting bootstrap leader: $ipAddress"
echo "start log: $logFile"
@ -344,6 +358,7 @@ startBootstrapLeader() {
;;
esac
# shellcheck disable=SC2086 # Don't want to double quote "$maybeNoSnapshot $maybeSkipLedgerVerify"
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote-node.sh \
$deployMethod \
@ -359,7 +374,7 @@ startBootstrapLeader() {
$numBenchTpsClients \"$benchTpsExtraArgs\" \
$numBenchExchangeClients \"$benchExchangeExtraArgs\" \
\"$genesisOptions\" \
$maybeNoSnapshot \
"$maybeNoSnapshot $maybeSkipLedgerVerify" \
"
) >> "$logFile" 2>&1 || {
cat "$logFile"
@ -374,6 +389,16 @@ startNode() {
declare nodeIndex="$3"
declare logFile="$netLogDir/fullnode-$ipAddress.log"
if [[ -z $nodeType ]]; then
echo nodeType not specified
exit 1
fi
if [[ -z $nodeIndex ]]; then
echo nodeIndex not specified
exit 1
fi
echo "--- Starting $nodeType: $ipAddress"
echo "start log: $logFile"
(
@ -408,8 +433,10 @@ startNode() {
\"$remoteExternalPrimordialAccountsFile\" \
\"$stakeNodesInGenesisBlock\" \
$nodeIndex \
$numBenchTpsClients \"$benchTpsExtraArgs\" \
$numBenchExchangeClients \"$benchExchangeExtraArgs\" \
\"$genesisOptions\" \
$maybeNoSnapshot \
\"$maybeNoSnapshot $maybeSkipLedgerVerify\" \
"
) >> "$logFile" 2>&1 &
declare pid=$!
@ -495,7 +522,35 @@ deployUpdate() {
) || ok=false
$ok || exit 1
done
}
getNodeType() {
echo "getNodeType: $nodeAddress"
[[ -n $nodeAddress ]] || {
echo "Error: nodeAddress not set"
exit 1
}
nodeIndex=0 # <-- global
nodeType=validator # <-- global
for ipAddress in "${fullnodeIpList[@]}" b "${blockstreamerIpList[@]}" r "${replicatorIpList[@]}"; do
if [[ $ipAddress = b ]]; then
nodeType=blockstreamer
continue
elif [[ $ipAddress = r ]]; then
nodeType=replicator
continue
fi
if [[ $ipAddress = "$nodeAddress" ]]; then
echo "getNodeType: $nodeType ($nodeIndex)"
return
fi
((nodeIndex = nodeIndex + 1))
done
echo "Error: Unknown node: $nodeAddress"
exit 1
}
start() {
@ -539,23 +594,14 @@ start() {
fi
declare bootstrapLeader=true
declare nodeType=validator
declare loopCount=0
for ipAddress in "${fullnodeIpList[@]}" b "${blockstreamerIpList[@]}" r "${replicatorIpList[@]}"; do
if [[ $ipAddress = b ]]; then
nodeType=blockstreamer
continue
elif [[ $ipAddress = r ]]; then
nodeType=replicator
continue
fi
if $updateNodes; then
stopNode "$ipAddress" true
fi
for nodeAddress in "${fullnodeIpList[@]}" "${blockstreamerIpList[@]}" "${replicatorIpList[@]}"; do
nodeType=
nodeIndex=
getNodeType
if $bootstrapLeader; then
SECONDS=0
declare bootstrapNodeDeployTime=
startBootstrapLeader "$ipAddress" "$netLogDir/bootstrap-leader-$ipAddress.log" $loopCount
startBootstrapLeader "$nodeAddress" $nodeIndex "$netLogDir/bootstrap-leader-$ipAddress.log"
bootstrapNodeDeployTime=$SECONDS
$metricsWriteDatapoint "testnet-deploy net-bootnode-leader-started=1"
@ -563,15 +609,18 @@ start() {
SECONDS=0
pids=()
else
startNode "$ipAddress" $nodeType $loopCount
startNode "$ipAddress" $nodeType $nodeIndex
# Stagger additional node start time. If too many nodes start simultaneously
# the bootstrap node gets more rsync requests from the additional nodes than
# it can handle.
((loopCount++ % 2 == 0)) && sleep 2
if ((nodeIndex % 2 == 0)); then
sleep 2
fi
fi
done
for pid in "${pids[@]}"; do
declare ok=true
wait "$pid" || ok=false
@ -723,10 +772,21 @@ stop)
stop
;;
stopnode)
if [[ -z $nodeAddress ]]; then
usage "node address (-i) not specified"
exit 1
fi
stopNode "$nodeAddress" true
;;
startnode)
startNode "$nodeAddress" validator
if [[ -z $nodeAddress ]]; then
usage "node address (-i) not specified"
exit 1
fi
nodeType=
nodeIndex=
getNodeType
startNode "$nodeAddress" $nodeType $nodeIndex
;;
logs)
fetchRemoteLog() {

View File

@ -19,7 +19,7 @@ benchTpsExtraArgs="${12}"
numBenchExchangeClients="${13}"
benchExchangeExtraArgs="${14}"
genesisOptions="${15}"
noSnapshot="${16}"
extraNodeArgs="${16}"
set +x
export RUST_LOG
@ -171,7 +171,8 @@ local|tar)
args+=(--no-airdrop)
fi
args+=(--init-complete-file "$initCompleteFile")
args+=("$noSnapshot")
# shellcheck disable=SC2206 # Don't want to double quote $extraNodeArgs
args+=($extraNodeArgs)
nohup ./multinode-demo/validator.sh --bootstrap-leader "${args[@]}" > fullnode.log 2>&1 &
waitForNodeToInit
;;
@ -249,7 +250,8 @@ local|tar)
fi
args+=(--init-complete-file "$initCompleteFile")
args+=("$noSnapshot")
# shellcheck disable=SC2206 # Don't want to double quote $extraNodeArgs
args+=($extraNodeArgs)
nohup ./multinode-demo/validator.sh "${args[@]}" > fullnode.log 2>&1 &
waitForNodeToInit
;;
@ -267,7 +269,8 @@ local|tar)
if [[ $skipSetup != true ]]; then
./multinode-demo/clear-config.sh
fi
args+=("$noSnapshot")
# shellcheck disable=SC2206 # Don't want to double quote $extraNodeArgs
args+=($extraNodeArgs)
nohup ./multinode-demo/replicator.sh "${args[@]}" > fullnode.log 2>&1 &
sleep 1
;;