Generalize net/ from leader/validator to bootstrap-fullnode/fullnode
This commit is contained in:
parent
b880dafe28
commit
04a0652614
|
@ -4,10 +4,10 @@ set -e
|
|||
cd "$(dirname "$0")"/..
|
||||
|
||||
zone=
|
||||
leaderAddress=
|
||||
leaderMachineType=
|
||||
bootstrapFullNodeAddress=
|
||||
bootstrapFullNodeMachineType=
|
||||
clientNodeCount=0
|
||||
validatorNodeCount=10
|
||||
additionalFullNodeCount=10
|
||||
publicNetwork=false
|
||||
snapChannel=edge
|
||||
tarChannelOrTag=edge
|
||||
|
@ -37,12 +37,12 @@ Deploys a CD testnet
|
|||
specified release channel (edge|beta|stable) or release tag
|
||||
(vX.Y.Z)
|
||||
(default: $tarChannelOrTag)
|
||||
-n [number] - Number of validator nodes (default: $validatorNodeCount)
|
||||
-c [number] - Number of client nodes (default: $clientNodeCount)
|
||||
-n [number] - Number of additional full nodes (default: $additionalFullNodeCount)
|
||||
-c [number] - Number of client bencher nodes (default: $clientNodeCount)
|
||||
-P - Use public network IP addresses (default: $publicNetwork)
|
||||
-G - Enable GPU, and set count/type of GPUs to use (e.g n1-standard-16 --accelerator count=4,type=nvidia-tesla-k80)
|
||||
-g - Enable GPU (default: $enableGpu)
|
||||
-a [address] - Set the leader node's external IP address to this GCE address
|
||||
-a [address] - Set the bootstrap fullnode's external IP address to this GCE address
|
||||
-d - Delete the network
|
||||
|
||||
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
|
||||
|
@ -68,7 +68,7 @@ while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do
|
|||
publicNetwork=true
|
||||
;;
|
||||
n)
|
||||
validatorNodeCount=$OPTARG
|
||||
additionalFullNodeCount=$OPTARG
|
||||
;;
|
||||
c)
|
||||
clientNodeCount=$OPTARG
|
||||
|
@ -99,10 +99,10 @@ while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do
|
|||
;;
|
||||
G)
|
||||
enableGpu=true
|
||||
leaderMachineType=$OPTARG
|
||||
bootstrapFullNodeMachineType=$OPTARG
|
||||
;;
|
||||
a)
|
||||
leaderAddress=$OPTARG
|
||||
bootstrapFullNodeAddress=$OPTARG
|
||||
;;
|
||||
d)
|
||||
delete=true
|
||||
|
@ -115,18 +115,18 @@ done
|
|||
|
||||
|
||||
create_args=(
|
||||
-a "$leaderAddress"
|
||||
-a "$bootstrapFullNodeAddress"
|
||||
-c "$clientNodeCount"
|
||||
-n "$validatorNodeCount"
|
||||
-n "$additionalFullNodeCount"
|
||||
-p "$netName"
|
||||
-z "$zone"
|
||||
)
|
||||
|
||||
if $enableGpu; then
|
||||
if [[ -z $leaderMachineType ]]; then
|
||||
if [[ -z $bootstrapFullNodeMachineType ]]; then
|
||||
create_args+=(-g)
|
||||
else
|
||||
create_args+=(-G "$leaderMachineType")
|
||||
create_args+=(-G "$bootstrapFullNodeMachineType")
|
||||
fi
|
||||
fi
|
||||
|
||||
|
|
|
@ -26,12 +26,12 @@ access to create a new InfluxDB database. Ask mvines@ for help if needed.
|
|||
|
||||
## Quick Start
|
||||
|
||||
NOTE: This example uses GCP. If you are using AWS, replace `./gce.sh` with
|
||||
NOTE: This example uses GCE. If you are using AWS EC2, replace `./gce.sh` with
|
||||
`./ec2.sh` in the commands.
|
||||
|
||||
```bash
|
||||
$ cd net/
|
||||
$ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
|
||||
$ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 additional nodes (beyond the bootstrap node), 1 client bencher (billing starts here)
|
||||
$ ./init-metrics.sh $(whoami) #<-- Configure a metrics database for the testnet
|
||||
$ ./net.sh start #<-- Deploy the network from the local workspace
|
||||
$ ./ssh.sh #<-- Details on how to ssh into any testnet node to access logs/etc
|
||||
|
|
|
@ -22,12 +22,12 @@ configFile="$netConfigDir/config"
|
|||
|
||||
entrypointIp=
|
||||
publicNetwork=
|
||||
leaderIp=
|
||||
bootstrapFullNodeIp=
|
||||
netBasename=
|
||||
sshPrivateKey=
|
||||
clientIpList=()
|
||||
sshOptions=()
|
||||
validatorIpList=()
|
||||
additionalFullNodeIps=()
|
||||
|
||||
buildSshOptions() {
|
||||
sshOptions=(
|
||||
|
@ -48,10 +48,10 @@ loadConfigFile() {
|
|||
source "$configFile"
|
||||
[[ -n "$entrypointIp" ]] || usage "Config file invalid, entrypointIp unspecified: $configFile"
|
||||
[[ -n "$publicNetwork" ]] || usage "Config file invalid, publicNetwork unspecified: $configFile"
|
||||
[[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile"
|
||||
[[ -n "$bootstrapFullNodeIp" ]] || usage "Config file invalid, bootstrapFullNodeIp unspecified: $configFile"
|
||||
[[ -n "$netBasename" ]] || usage "Config file invalid, netBasename unspecified: $configFile"
|
||||
[[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
|
||||
[[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile"
|
||||
[[ ${#additionalFullNodeIps[@]} -gt 0 ]] || usage "Config file invalid, additionalFullNodeIps unspecified: $configFile"
|
||||
|
||||
buildSshOptions
|
||||
configureMetrics
|
||||
|
|
99
net/gce.sh
99
net/gce.sh
|
@ -12,20 +12,20 @@ gce)
|
|||
# shellcheck source=net/scripts/gce-provider.sh
|
||||
source "$here"/scripts/gce-provider.sh
|
||||
|
||||
cpuLeaderMachineType=n1-standard-16
|
||||
gpuLeaderMachineType="$cpuLeaderMachineType --accelerator count=4,type=nvidia-tesla-k80"
|
||||
leaderMachineType=$cpuLeaderMachineType
|
||||
validatorMachineType=n1-standard-16
|
||||
cpuBootstrapFullNodeMachineType=n1-standard-16
|
||||
gpuBootstrapFullNodeMachineType="$cpuBootstrapFullNodeMachineType --accelerator count=4,type=nvidia-tesla-k80"
|
||||
bootstrapFullNodeMachineType=$cpuBootstrapFullNodeMachineType
|
||||
fullNodeMachineType=n1-standard-16
|
||||
clientMachineType=n1-standard-16
|
||||
;;
|
||||
ec2)
|
||||
# shellcheck source=net/scripts/ec2-provider.sh
|
||||
source "$here"/scripts/ec2-provider.sh
|
||||
|
||||
cpuLeaderMachineType=m4.4xlarge
|
||||
gpuLeaderMachineType=p2.xlarge
|
||||
leaderMachineType=$cpuLeaderMachineType
|
||||
validatorMachineType=m4.2xlarge
|
||||
cpuBootstrapFullNodeMachineType=m4.4xlarge
|
||||
gpuBootstrapFullNodeMachineType=p2.xlarge
|
||||
bootstrapFullNodeMachineType=$cpuBootstrapFullNodeMachineType
|
||||
fullNodeMachineType=m4.2xlarge
|
||||
clientMachineType=m4.2xlarge
|
||||
;;
|
||||
*)
|
||||
|
@ -35,15 +35,14 @@ esac
|
|||
|
||||
|
||||
prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
|
||||
validatorNodeCount=5
|
||||
additionalFullNodeCount=5
|
||||
clientNodeCount=1
|
||||
leaderBootDiskSizeInGb=1000
|
||||
validatorBootDiskSizeInGb=$leaderBootDiskSizeInGb
|
||||
fullNodeBootDiskSizeInGb=1000
|
||||
clientBootDiskSizeInGb=75
|
||||
|
||||
publicNetwork=false
|
||||
enableGpu=false
|
||||
leaderAddress=
|
||||
bootstrapFullNodeAddress=
|
||||
|
||||
usage() {
|
||||
exitcode=0
|
||||
|
@ -65,13 +64,13 @@ Manage testnet instances
|
|||
collisions (default: $prefix)
|
||||
|
||||
create-specific options:
|
||||
-n [number] - Number of validator nodes (default: $validatorNodeCount)
|
||||
-n [number] - Number of additional full nodes (default: $additionalFullNodeCount)
|
||||
-c [number] - Number of client nodes (default: $clientNodeCount)
|
||||
-P - Use public network IP addresses (default: $publicNetwork)
|
||||
-z [zone] - Zone for the nodes (default: $zone)
|
||||
-g - Enable GPU (default: $enableGpu)
|
||||
-G - Enable GPU, and set count/type of GPUs to use (e.g $cpuLeaderMachineType --accelerator count=4,type=nvidia-tesla-k80)
|
||||
-a [address] - Set the leader node's external IP address to this value.
|
||||
-G - Enable GPU, and set count/type of GPUs to use (e.g $cpuBootstrapFullNodeMachineType --accelerator count=4,type=nvidia-tesla-k80)
|
||||
-a [address] - Set the bootstreap full node's external IP address to this value.
|
||||
For GCE, [address] is the "name" of the desired External
|
||||
IP Address.
|
||||
For EC2, [address] is the "allocation ID" of the desired
|
||||
|
@ -107,7 +106,7 @@ while getopts "h?p:Pn:c:z:gG:a:d:" opt; do
|
|||
publicNetwork=true
|
||||
;;
|
||||
n)
|
||||
validatorNodeCount=$OPTARG
|
||||
additionalFullNodeCount=$OPTARG
|
||||
;;
|
||||
c)
|
||||
clientNodeCount=$OPTARG
|
||||
|
@ -117,14 +116,14 @@ while getopts "h?p:Pn:c:z:gG:a:d:" opt; do
|
|||
;;
|
||||
g)
|
||||
enableGpu=true
|
||||
leaderMachineType=$gpuLeaderMachineType
|
||||
bootstrapFullNodeMachineType=$gpuBootstrapFullNodeMachineType
|
||||
;;
|
||||
G)
|
||||
enableGpu=true
|
||||
leaderMachineType="$OPTARG"
|
||||
bootstrapFullNodeMachineType="$OPTARG"
|
||||
;;
|
||||
a)
|
||||
leaderAddress=$OPTARG
|
||||
bootstrapFullNodeAddress=$OPTARG
|
||||
;;
|
||||
d)
|
||||
bootDiskType=$OPTARG
|
||||
|
@ -231,7 +230,7 @@ EOF
|
|||
declare arrayName="$5"
|
||||
|
||||
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
|
||||
if [[ $arrayName = "leaderIp" ]]; then
|
||||
if [[ $arrayName = "bootstrapFullNodeIp" ]]; then
|
||||
if $publicNetwork; then
|
||||
echo "entrypointIp=$publicIp" >> "$configFile"
|
||||
else
|
||||
|
@ -262,29 +261,29 @@ EOF
|
|||
echo "$name has booted."
|
||||
}
|
||||
|
||||
echo "Looking for leader instance..."
|
||||
cloud_FindInstance "$prefix-leader"
|
||||
echo "Looking for bootstrap fullnode instance..."
|
||||
cloud_FindInstance "$prefix-bootstrap-fullnode"
|
||||
[[ ${#instances[@]} -eq 1 ]] || {
|
||||
echo "Unable to find leader"
|
||||
echo "Unable to find bootstrap fullnode"
|
||||
exit 1
|
||||
}
|
||||
|
||||
(
|
||||
declare leaderName
|
||||
declare leaderIp
|
||||
IFS=: read -r leaderName leaderIp _ < <(echo "${instances[0]}")
|
||||
declare nodeName
|
||||
declare nodeIp
|
||||
IFS=: read -r nodeName nodeIp _ < <(echo "${instances[0]}")
|
||||
|
||||
# Try to ping the machine first.
|
||||
timeout 90s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
|
||||
timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done"
|
||||
|
||||
if [[ ! -r $sshPrivateKey ]]; then
|
||||
echo "Fetching $sshPrivateKey from $leaderName"
|
||||
echo "Fetching $sshPrivateKey from $nodeName"
|
||||
|
||||
# Try to scp in a couple times, sshd may not yet be up even though the
|
||||
# machine can be pinged...
|
||||
set -x -o pipefail
|
||||
for i in $(seq 1 30); do
|
||||
if cloud_FetchFile "$leaderName" "$leaderIp" /solana-id_ecdsa "$sshPrivateKey"; then
|
||||
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey"; then
|
||||
break
|
||||
fi
|
||||
|
||||
|
@ -297,22 +296,22 @@ EOF
|
|||
fi
|
||||
)
|
||||
|
||||
echo "leaderIp=()" >> "$configFile"
|
||||
cloud_ForEachInstance recordInstanceIp leaderIp
|
||||
echo "bootstrapFullNodeIp=()" >> "$configFile"
|
||||
cloud_ForEachInstance recordInstanceIp bootstrapFullNodeIp
|
||||
cloud_ForEachInstance waitForStartupComplete
|
||||
|
||||
echo "Looking for validator instances..."
|
||||
cloud_FindInstances "$prefix-validator"
|
||||
echo "Looking for additional fullnode instances..."
|
||||
cloud_FindInstances "$prefix-fullnode"
|
||||
[[ ${#instances[@]} -gt 0 ]] || {
|
||||
echo "Unable to find validators"
|
||||
echo "Unable to find additional fullnodes"
|
||||
exit 1
|
||||
}
|
||||
echo "validatorIpList=()" >> "$configFile"
|
||||
cloud_ForEachInstance recordInstanceIp validatorIpList
|
||||
echo "additionalFullNodeIps=()" >> "$configFile"
|
||||
cloud_ForEachInstance recordInstanceIp additionalFullNodeIps
|
||||
cloud_ForEachInstance waitForStartupComplete
|
||||
|
||||
echo "clientIpList=()" >> "$configFile"
|
||||
echo "Looking for client instances..."
|
||||
echo "Looking for client bencher instances..."
|
||||
cloud_FindInstances "$prefix-client"
|
||||
[[ ${#instances[@]} -eq 0 ]] || {
|
||||
cloud_ForEachInstance recordInstanceIp clientIpList
|
||||
|
@ -326,11 +325,11 @@ EOF
|
|||
delete() {
|
||||
$metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
|
||||
|
||||
# Delete the leader node first to prevent unusual metrics on the dashboard
|
||||
# during shutdown.
|
||||
# Delete the bootstrap fullnode first to prevent unusual metrics on the dashboard
|
||||
# during shutdown (only applicable when leader rotation is disabled).
|
||||
# TODO: It would be better to fully cut-off metrics reporting before any
|
||||
# instances are deleted.
|
||||
for filter in "$prefix-leader" "$prefix-"; do
|
||||
for filter in "$prefix-bootstrap-fullnode" "$prefix-"; do
|
||||
echo "Searching for instances: $filter"
|
||||
cloud_FindInstances "$filter"
|
||||
|
||||
|
@ -352,9 +351,9 @@ delete)
|
|||
;;
|
||||
|
||||
create)
|
||||
[[ -n $validatorNodeCount ]] || usage "Need number of nodes"
|
||||
if [[ $validatorNodeCount -le 0 ]]; then
|
||||
usage "One or more validator nodes is required"
|
||||
[[ -n $additionalFullNodeCount ]] || usage "Need number of nodes"
|
||||
if [[ $additionalFullNodeCount -le 0 ]]; then
|
||||
usage "One or more additional fullnodes are required"
|
||||
fi
|
||||
|
||||
delete
|
||||
|
@ -371,8 +370,8 @@ create)
|
|||
========================================================================================
|
||||
|
||||
Network composition:
|
||||
Leader = $leaderMachineType (GPU=$enableGpu)
|
||||
Validators = $validatorNodeCount x $validatorMachineType
|
||||
Bootstrap full node = $bootstrapFullNodeMachineType (GPU=$enableGpu)
|
||||
Additional full nodes = $additionalFullNodeCount x $fullNodeMachineType
|
||||
Client(s) = $clientNodeCount x $clientMachineType
|
||||
|
||||
========================================================================================
|
||||
|
@ -435,12 +434,12 @@ touch /.instance-startup-complete
|
|||
|
||||
EOF
|
||||
|
||||
cloud_CreateInstances "$prefix" "$prefix-leader" 1 \
|
||||
"$imageName" "$leaderMachineType" "$leaderBootDiskSizeInGb" \
|
||||
"$startupScript" "$leaderAddress" "$bootDiskType"
|
||||
cloud_CreateInstances "$prefix" "$prefix-bootstrap-fullnode" 1 \
|
||||
"$imageName" "$bootstrapFullNodeMachineType" "$fullNodeBootDiskSizeInGb" \
|
||||
"$startupScript" "$bootstrapFullNodeAddress" "$bootDiskType"
|
||||
|
||||
cloud_CreateInstances "$prefix" "$prefix-validator" "$validatorNodeCount" \
|
||||
"$imageName" "$validatorMachineType" "$validatorBootDiskSizeInGb" \
|
||||
cloud_CreateInstances "$prefix" "$prefix-fullnode" "$additionalFullNodeCount" \
|
||||
"$imageName" "$fullNodeMachineType" "$fullNodeBootDiskSizeInGb" \
|
||||
"$startupScript" "" "$bootDiskType"
|
||||
|
||||
if [[ $clientNodeCount -gt 0 ]]; then
|
||||
|
|
75
net/net.sh
75
net/net.sh
|
@ -38,7 +38,7 @@ Operate a configured testnet
|
|||
|
||||
sanity/start-specific options:
|
||||
-o noLedgerVerify - Skip ledger verification
|
||||
-o noValidatorSanity - Skip validator sanity
|
||||
-o noValidatorSanity - Skip fullnode sanity
|
||||
-o rejectExtraNodes - Require the exact number of nodes
|
||||
|
||||
stop-specific options:
|
||||
|
@ -117,7 +117,7 @@ while getopts "h?S:s:T:t:o:f:" opt; do
|
|||
done
|
||||
|
||||
loadConfigFile
|
||||
expectedNodeCount=$((${#validatorIpList[@]} + 1))
|
||||
expectedNodeCount=$((${#additionalFullNodeIps[@]} + 1))
|
||||
|
||||
build() {
|
||||
declare MAYBE_DOCKER=
|
||||
|
@ -156,14 +156,14 @@ startCommon() {
|
|||
"$ipAddress":~/solana/
|
||||
}
|
||||
|
||||
startLeader() {
|
||||
startBootstrapNode() {
|
||||
declare ipAddress=$1
|
||||
declare logFile="$2"
|
||||
echo "--- Starting leader: $leaderIp"
|
||||
echo "--- Starting bootstrap full node: $bootstrapFullNodeIp"
|
||||
echo "start log: $logFile"
|
||||
|
||||
# Deploy local binaries to leader. Validators and clients later fetch the
|
||||
# binaries from the leader.
|
||||
# Deploy local binaries to bootstrap full node. Other full nodes and clients later fetch the
|
||||
# binaries from it
|
||||
(
|
||||
set -x
|
||||
startCommon "$ipAddress" || exit 1
|
||||
|
@ -183,7 +183,7 @@ startLeader() {
|
|||
esac
|
||||
|
||||
ssh "${sshOptions[@]}" -n "$ipAddress" \
|
||||
"./solana/net/remote/remote-node.sh $deployMethod leader $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
|
||||
"./solana/net/remote/remote-node.sh $deployMethod bootstrap_fullnode $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
|
||||
) >> "$logFile" 2>&1 || {
|
||||
cat "$logFile"
|
||||
echo "^^^ +++"
|
||||
|
@ -191,20 +191,20 @@ startLeader() {
|
|||
}
|
||||
}
|
||||
|
||||
startValidator() {
|
||||
startNode() {
|
||||
declare ipAddress=$1
|
||||
declare logFile="$netLogDir/validator-$ipAddress.log"
|
||||
declare logFile="$netLogDir/fullnode-$ipAddress.log"
|
||||
|
||||
echo "--- Starting validator: $ipAddress"
|
||||
echo "--- Starting full node: $ipAddress"
|
||||
echo "start log: $logFile"
|
||||
(
|
||||
set -x
|
||||
startCommon "$ipAddress"
|
||||
ssh "${sshOptions[@]}" -n "$ipAddress" \
|
||||
"./solana/net/remote/remote-node.sh $deployMethod validator $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
|
||||
"./solana/net/remote/remote-node.sh $deployMethod fullnode $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
|
||||
) >> "$logFile" 2>&1 &
|
||||
declare pid=$!
|
||||
ln -sfT "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
|
||||
ln -sfT "fullnode-$ipAddress.log" "$netLogDir/fullnode-$pid.log"
|
||||
pids+=("$pid")
|
||||
}
|
||||
|
||||
|
@ -226,13 +226,13 @@ startClient() {
|
|||
}
|
||||
|
||||
sanity() {
|
||||
declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
|
||||
declare expectedNodeCount=$((${#additionalFullNodeIps[@]} + 1))
|
||||
declare ok=true
|
||||
|
||||
echo "--- Sanity"
|
||||
$metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
|
||||
|
||||
declare host=$leaderIp # TODO: maybe use ${validatorIpList[0]} ?
|
||||
declare host=$bootstrapFullNodeIp # TODO: maybe use ${additionalFullNodeIps[0]} ?
|
||||
(
|
||||
set -x
|
||||
# shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
|
||||
|
@ -279,13 +279,18 @@ start() {
|
|||
tar)
|
||||
if [[ -n $releaseChannel ]]; then
|
||||
rm -f "$SOLANA_ROOT"/solana-release.tar.bz2
|
||||
cd "$SOLANA_ROOT"
|
||||
|
||||
set -x
|
||||
curl -o solana-release.tar.bz2 http://solana-release.s3.amazonaws.com/"$releaseChannel"/solana-release.tar.bz2
|
||||
tarballFilename=solana-release.tar.bz2
|
||||
(
|
||||
set -x
|
||||
curl -o "$SOLANA_ROOT"/solana-release.tar.bz2 http://solana-release.s3.amazonaws.com/"$releaseChannel"/solana-release.tar.bz2
|
||||
)
|
||||
tarballFilename="$SOLANA_ROOT"/solana-release.tar.bz2
|
||||
fi
|
||||
tar jxvf $tarballFilename
|
||||
(
|
||||
set -x
|
||||
rm -rf "$SOLANA_ROOT"/solana-release
|
||||
(cd "$SOLANA_ROOT"; tar jxv) < "$tarballFilename"
|
||||
cat "$SOLANA_ROOT"/solana-release/version.txt
|
||||
)
|
||||
;;
|
||||
local)
|
||||
build
|
||||
|
@ -299,20 +304,20 @@ start() {
|
|||
$metricsWriteDatapoint "testnet-deploy net-start-begin=1"
|
||||
|
||||
SECONDS=0
|
||||
declare leaderDeployTime=
|
||||
startLeader "$leaderIp" "$netLogDir/leader-$leaderIp.log"
|
||||
leaderDeployTime=$SECONDS
|
||||
declare bootstrapNodeDeployTime=
|
||||
startBootstrapNode "$bootstrapFullNodeIp" "$netLogDir/bootstrap-fullnode-$bootstrapFullNodeIp.log"
|
||||
bootstrapNodeDeployTime=$SECONDS
|
||||
$metricsWriteDatapoint "testnet-deploy net-leader-started=1"
|
||||
|
||||
SECONDS=0
|
||||
pids=()
|
||||
loopCount=0
|
||||
for ipAddress in "${validatorIpList[@]}"; do
|
||||
startValidator "$ipAddress"
|
||||
for ipAddress in "${additionalFullNodeIps[@]}"; do
|
||||
startNode "$ipAddress"
|
||||
|
||||
# Staggering validator startup time. If too many validators
|
||||
# bootup simultaneously, leader node gets more rsync requests
|
||||
# from the validators than it can handle.
|
||||
# Stagger additional node start time. If too many nodes start simultaneously
|
||||
# the bootstrap node gets more rsync requests from the additional nodes than
|
||||
# it can handle.
|
||||
((loopCount++ % 2 == 0)) && sleep 2
|
||||
done
|
||||
|
||||
|
@ -320,14 +325,14 @@ start() {
|
|||
declare ok=true
|
||||
wait "$pid" || ok=false
|
||||
if ! $ok; then
|
||||
cat "$netLogDir/validator-$pid.log"
|
||||
cat "$netLogDir/fullnode-$pid.log"
|
||||
echo ^^^ +++
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
|
||||
validatorDeployTime=$SECONDS
|
||||
additionalNodeDeployTime=$SECONDS
|
||||
|
||||
sanity
|
||||
|
||||
|
@ -342,7 +347,7 @@ start() {
|
|||
case $deployMethod in
|
||||
snap)
|
||||
IFS=\ read -r _ networkVersion _ < <(
|
||||
ssh "${sshOptions[@]}" "$leaderIp" \
|
||||
ssh "${sshOptions[@]}" "$bootstrapFullNodeIp" \
|
||||
"snap info solana | grep \"^installed:\""
|
||||
)
|
||||
networkVersion=${networkVersion/0+git./}
|
||||
|
@ -363,8 +368,8 @@ start() {
|
|||
|
||||
echo
|
||||
echo "+++ Deployment Successful"
|
||||
echo "Leader deployment took $leaderDeployTime seconds"
|
||||
echo "Validator deployment (${#validatorIpList[@]} instances) took $validatorDeployTime seconds"
|
||||
echo "Bootstrap full node deployment took $bootstrapNodeDeployTime seconds"
|
||||
echo "Additional full node deployment (${#additionalFullNodeIps[@]} instances) took $additionalNodeDeployTime seconds"
|
||||
echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
|
||||
echo "Network start logs in $netLogDir:"
|
||||
ls -l "$netLogDir"
|
||||
|
@ -394,9 +399,9 @@ stop() {
|
|||
SECONDS=0
|
||||
$metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
|
||||
|
||||
stopNode "$leaderIp"
|
||||
stopNode "$bootstrapFullNodeIp"
|
||||
|
||||
for ipAddress in "${validatorIpList[@]}" "${clientIpList[@]}"; do
|
||||
for ipAddress in "${additionalFullNodeIps[@]}" "${clientIpList[@]}"; do
|
||||
stopNode "$ipAddress"
|
||||
done
|
||||
|
||||
|
|
|
@ -3,12 +3,14 @@ set -e
|
|||
|
||||
cd "$(dirname "$0")"/../..
|
||||
|
||||
set -x
|
||||
deployMethod="$1"
|
||||
nodeType="$2"
|
||||
publicNetwork="$3"
|
||||
entrypointIp="$4"
|
||||
numNodes="$5"
|
||||
RUST_LOG="$6"
|
||||
set +x
|
||||
export RUST_LOG=${RUST_LOG:-solana=warn} # if RUST_LOG is unset, default to warn
|
||||
|
||||
missing() {
|
||||
|
@ -40,7 +42,7 @@ fi
|
|||
case $deployMethod in
|
||||
snap)
|
||||
SECONDS=0
|
||||
[[ $nodeType = leader ]] ||
|
||||
[[ $nodeType = bootstrap_fullnode ]] ||
|
||||
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/solana/solana.snap" .
|
||||
sudo snap install solana.snap --devmode --dangerous
|
||||
|
||||
|
@ -61,13 +63,13 @@ snap)
|
|||
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
||||
fi
|
||||
|
||||
if [[ $nodeType = leader ]]; then
|
||||
if [[ $nodeType = bootstrap-fullnode ]]; then
|
||||
nodeConfig="mode=leader+drone $commonNodeConfig"
|
||||
ln -sf -T /var/snap/solana/current/leader/current leader.log
|
||||
ln -sf -T /var/snap/solana/current/leader/current fullnode.log
|
||||
ln -sf -T /var/snap/solana/current/drone/current drone.log
|
||||
else
|
||||
nodeConfig="mode=validator $commonNodeConfig"
|
||||
ln -sf -T /var/snap/solana/current/validator/current validator.log
|
||||
ln -sf -T /var/snap/solana/current/validator/current fullnode.log
|
||||
fi
|
||||
|
||||
logmarker="solana deploy $(date)/$RANDOM"
|
||||
|
@ -96,7 +98,7 @@ local|tar)
|
|||
scripts/net-stats.sh > net-stats.log 2>&1 &
|
||||
|
||||
case $nodeType in
|
||||
leader)
|
||||
bootstrap_fullnode)
|
||||
if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-fullnode-cuda ]]; then
|
||||
echo Selecting solana-fullnode-cuda
|
||||
export SOLANA_CUDA=1
|
||||
|
@ -104,9 +106,10 @@ local|tar)
|
|||
./multinode-demo/setup.sh -t leader $setupArgs
|
||||
./multinode-demo/drone.sh > drone.log 2>&1 &
|
||||
./multinode-demo/leader.sh > leader.log 2>&1 &
|
||||
ln -sTf leader.log fullnode.log
|
||||
;;
|
||||
validator)
|
||||
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/solana*" ~/.cargo/bin/
|
||||
fullnode)
|
||||
net/scripts/rsync-retry.sh -vPrc "$entrypointIp":~/.cargo/bin/ ~/.cargo/bin/
|
||||
|
||||
if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-fullnode-cuda ]]; then
|
||||
echo Selecting solana-fullnode-cuda
|
||||
|
@ -114,7 +117,8 @@ local|tar)
|
|||
fi
|
||||
|
||||
./multinode-demo/setup.sh -t validator $setupArgs
|
||||
./multinode-demo/validator.sh "$entrypointIp":~/solana "$entrypointIp:8001" >validator.log 2>&1 &
|
||||
./multinode-demo/validator.sh "$entrypointIp":~/solana "$entrypointIp:8001" > validator.log 2>&1 &
|
||||
ln -sTf validator.log fullnode.log
|
||||
;;
|
||||
*)
|
||||
echo "Error: unknown node type: $nodeType"
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#!/usr/bin/env bash
|
||||
set -e
|
||||
#
|
||||
# This script is to be run on the leader node
|
||||
# This script is to be run on the bootstrap full node
|
||||
#
|
||||
|
||||
cd "$(dirname "$0")"/../..
|
||||
|
@ -139,18 +139,18 @@ if $validatorSanity; then
|
|||
(
|
||||
set -x -o pipefail
|
||||
./multinode-demo/setup.sh -t validator || exit $?
|
||||
timeout 10s ./multinode-demo/validator.sh "$entrypointRsyncUrl" "$entrypointIp:8001" 2>&1 | tee validator.log
|
||||
timeout 10s ./multinode-demo/validator.sh "$entrypointRsyncUrl" "$entrypointIp:8001" 2>&1 | tee validator-sanity.log
|
||||
) || {
|
||||
exitcode=$?
|
||||
[[ $exitcode -eq 124 ]] || exit $exitcode
|
||||
}
|
||||
wc -l validator.log
|
||||
if grep -C100 panic validator.log; then
|
||||
wc -l validator-sanity.log
|
||||
if grep -C100 panic validator-sanity.log; then
|
||||
echo "^^^ +++"
|
||||
echo "Panic observed"
|
||||
exit 1
|
||||
else
|
||||
echo "Validator log looks ok"
|
||||
echo "Validator sanity log looks ok"
|
||||
fi
|
||||
else
|
||||
echo "^^^ +++"
|
||||
|
|
10
net/ssh.sh
10
net/ssh.sh
|
@ -49,12 +49,12 @@ printNode() {
|
|||
printf " %-25s | For logs run: $0 $ip tail -f solana/$nodeType.log\n" "$0 $ip"
|
||||
}
|
||||
|
||||
echo Leader:
|
||||
printNode leader "$leaderIp"
|
||||
echo Bootstrap full node:
|
||||
printNode fullnode "$bootstrapFullNodeIp"
|
||||
echo
|
||||
echo Validators:
|
||||
for ipAddress in "${validatorIpList[@]}"; do
|
||||
printNode validator "$ipAddress"
|
||||
echo Additional full nodes:
|
||||
for ipAddress in "${additionalFullNodeIps[@]}"; do
|
||||
printNode fullnode "$ipAddress"
|
||||
done
|
||||
echo
|
||||
echo Clients:
|
||||
|
|
Loading…
Reference in New Issue