Generalize net/ from leader/validator to bootstrap-fullnode/fullnode
This commit is contained in:
parent
b880dafe28
commit
04a0652614
|
@ -4,10 +4,10 @@ set -e
|
||||||
cd "$(dirname "$0")"/..
|
cd "$(dirname "$0")"/..
|
||||||
|
|
||||||
zone=
|
zone=
|
||||||
leaderAddress=
|
bootstrapFullNodeAddress=
|
||||||
leaderMachineType=
|
bootstrapFullNodeMachineType=
|
||||||
clientNodeCount=0
|
clientNodeCount=0
|
||||||
validatorNodeCount=10
|
additionalFullNodeCount=10
|
||||||
publicNetwork=false
|
publicNetwork=false
|
||||||
snapChannel=edge
|
snapChannel=edge
|
||||||
tarChannelOrTag=edge
|
tarChannelOrTag=edge
|
||||||
|
@ -37,12 +37,12 @@ Deploys a CD testnet
|
||||||
specified release channel (edge|beta|stable) or release tag
|
specified release channel (edge|beta|stable) or release tag
|
||||||
(vX.Y.Z)
|
(vX.Y.Z)
|
||||||
(default: $tarChannelOrTag)
|
(default: $tarChannelOrTag)
|
||||||
-n [number] - Number of validator nodes (default: $validatorNodeCount)
|
-n [number] - Number of additional full nodes (default: $additionalFullNodeCount)
|
||||||
-c [number] - Number of client nodes (default: $clientNodeCount)
|
-c [number] - Number of client bencher nodes (default: $clientNodeCount)
|
||||||
-P - Use public network IP addresses (default: $publicNetwork)
|
-P - Use public network IP addresses (default: $publicNetwork)
|
||||||
-G - Enable GPU, and set count/type of GPUs to use (e.g n1-standard-16 --accelerator count=4,type=nvidia-tesla-k80)
|
-G - Enable GPU, and set count/type of GPUs to use (e.g n1-standard-16 --accelerator count=4,type=nvidia-tesla-k80)
|
||||||
-g - Enable GPU (default: $enableGpu)
|
-g - Enable GPU (default: $enableGpu)
|
||||||
-a [address] - Set the leader node's external IP address to this GCE address
|
-a [address] - Set the bootstrap fullnode's external IP address to this GCE address
|
||||||
-d - Delete the network
|
-d - Delete the network
|
||||||
|
|
||||||
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
|
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
|
||||||
|
@ -68,7 +68,7 @@ while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do
|
||||||
publicNetwork=true
|
publicNetwork=true
|
||||||
;;
|
;;
|
||||||
n)
|
n)
|
||||||
validatorNodeCount=$OPTARG
|
additionalFullNodeCount=$OPTARG
|
||||||
;;
|
;;
|
||||||
c)
|
c)
|
||||||
clientNodeCount=$OPTARG
|
clientNodeCount=$OPTARG
|
||||||
|
@ -99,10 +99,10 @@ while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do
|
||||||
;;
|
;;
|
||||||
G)
|
G)
|
||||||
enableGpu=true
|
enableGpu=true
|
||||||
leaderMachineType=$OPTARG
|
bootstrapFullNodeMachineType=$OPTARG
|
||||||
;;
|
;;
|
||||||
a)
|
a)
|
||||||
leaderAddress=$OPTARG
|
bootstrapFullNodeAddress=$OPTARG
|
||||||
;;
|
;;
|
||||||
d)
|
d)
|
||||||
delete=true
|
delete=true
|
||||||
|
@ -115,18 +115,18 @@ done
|
||||||
|
|
||||||
|
|
||||||
create_args=(
|
create_args=(
|
||||||
-a "$leaderAddress"
|
-a "$bootstrapFullNodeAddress"
|
||||||
-c "$clientNodeCount"
|
-c "$clientNodeCount"
|
||||||
-n "$validatorNodeCount"
|
-n "$additionalFullNodeCount"
|
||||||
-p "$netName"
|
-p "$netName"
|
||||||
-z "$zone"
|
-z "$zone"
|
||||||
)
|
)
|
||||||
|
|
||||||
if $enableGpu; then
|
if $enableGpu; then
|
||||||
if [[ -z $leaderMachineType ]]; then
|
if [[ -z $bootstrapFullNodeMachineType ]]; then
|
||||||
create_args+=(-g)
|
create_args+=(-g)
|
||||||
else
|
else
|
||||||
create_args+=(-G "$leaderMachineType")
|
create_args+=(-G "$bootstrapFullNodeMachineType")
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
|
@ -26,12 +26,12 @@ access to create a new InfluxDB database. Ask mvines@ for help if needed.
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
NOTE: This example uses GCP. If you are using AWS, replace `./gce.sh` with
|
NOTE: This example uses GCE. If you are using AWS EC2, replace `./gce.sh` with
|
||||||
`./ec2.sh` in the commands.
|
`./ec2.sh` in the commands.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
$ cd net/
|
$ cd net/
|
||||||
$ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 validators, 1 client (billing starts here)
|
$ ./gce.sh create -n 5 -c 1 #<-- Create a GCE testnet with 5 additional nodes (beyond the bootstrap node), 1 client bencher (billing starts here)
|
||||||
$ ./init-metrics.sh $(whoami) #<-- Configure a metrics database for the testnet
|
$ ./init-metrics.sh $(whoami) #<-- Configure a metrics database for the testnet
|
||||||
$ ./net.sh start #<-- Deploy the network from the local workspace
|
$ ./net.sh start #<-- Deploy the network from the local workspace
|
||||||
$ ./ssh.sh #<-- Details on how to ssh into any testnet node to access logs/etc
|
$ ./ssh.sh #<-- Details on how to ssh into any testnet node to access logs/etc
|
||||||
|
|
|
@ -22,12 +22,12 @@ configFile="$netConfigDir/config"
|
||||||
|
|
||||||
entrypointIp=
|
entrypointIp=
|
||||||
publicNetwork=
|
publicNetwork=
|
||||||
leaderIp=
|
bootstrapFullNodeIp=
|
||||||
netBasename=
|
netBasename=
|
||||||
sshPrivateKey=
|
sshPrivateKey=
|
||||||
clientIpList=()
|
clientIpList=()
|
||||||
sshOptions=()
|
sshOptions=()
|
||||||
validatorIpList=()
|
additionalFullNodeIps=()
|
||||||
|
|
||||||
buildSshOptions() {
|
buildSshOptions() {
|
||||||
sshOptions=(
|
sshOptions=(
|
||||||
|
@ -48,10 +48,10 @@ loadConfigFile() {
|
||||||
source "$configFile"
|
source "$configFile"
|
||||||
[[ -n "$entrypointIp" ]] || usage "Config file invalid, entrypointIp unspecified: $configFile"
|
[[ -n "$entrypointIp" ]] || usage "Config file invalid, entrypointIp unspecified: $configFile"
|
||||||
[[ -n "$publicNetwork" ]] || usage "Config file invalid, publicNetwork unspecified: $configFile"
|
[[ -n "$publicNetwork" ]] || usage "Config file invalid, publicNetwork unspecified: $configFile"
|
||||||
[[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile"
|
[[ -n "$bootstrapFullNodeIp" ]] || usage "Config file invalid, bootstrapFullNodeIp unspecified: $configFile"
|
||||||
[[ -n "$netBasename" ]] || usage "Config file invalid, netBasename unspecified: $configFile"
|
[[ -n "$netBasename" ]] || usage "Config file invalid, netBasename unspecified: $configFile"
|
||||||
[[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
|
[[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
|
||||||
[[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile"
|
[[ ${#additionalFullNodeIps[@]} -gt 0 ]] || usage "Config file invalid, additionalFullNodeIps unspecified: $configFile"
|
||||||
|
|
||||||
buildSshOptions
|
buildSshOptions
|
||||||
configureMetrics
|
configureMetrics
|
||||||
|
|
99
net/gce.sh
99
net/gce.sh
|
@ -12,20 +12,20 @@ gce)
|
||||||
# shellcheck source=net/scripts/gce-provider.sh
|
# shellcheck source=net/scripts/gce-provider.sh
|
||||||
source "$here"/scripts/gce-provider.sh
|
source "$here"/scripts/gce-provider.sh
|
||||||
|
|
||||||
cpuLeaderMachineType=n1-standard-16
|
cpuBootstrapFullNodeMachineType=n1-standard-16
|
||||||
gpuLeaderMachineType="$cpuLeaderMachineType --accelerator count=4,type=nvidia-tesla-k80"
|
gpuBootstrapFullNodeMachineType="$cpuBootstrapFullNodeMachineType --accelerator count=4,type=nvidia-tesla-k80"
|
||||||
leaderMachineType=$cpuLeaderMachineType
|
bootstrapFullNodeMachineType=$cpuBootstrapFullNodeMachineType
|
||||||
validatorMachineType=n1-standard-16
|
fullNodeMachineType=n1-standard-16
|
||||||
clientMachineType=n1-standard-16
|
clientMachineType=n1-standard-16
|
||||||
;;
|
;;
|
||||||
ec2)
|
ec2)
|
||||||
# shellcheck source=net/scripts/ec2-provider.sh
|
# shellcheck source=net/scripts/ec2-provider.sh
|
||||||
source "$here"/scripts/ec2-provider.sh
|
source "$here"/scripts/ec2-provider.sh
|
||||||
|
|
||||||
cpuLeaderMachineType=m4.4xlarge
|
cpuBootstrapFullNodeMachineType=m4.4xlarge
|
||||||
gpuLeaderMachineType=p2.xlarge
|
gpuBootstrapFullNodeMachineType=p2.xlarge
|
||||||
leaderMachineType=$cpuLeaderMachineType
|
bootstrapFullNodeMachineType=$cpuBootstrapFullNodeMachineType
|
||||||
validatorMachineType=m4.2xlarge
|
fullNodeMachineType=m4.2xlarge
|
||||||
clientMachineType=m4.2xlarge
|
clientMachineType=m4.2xlarge
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
|
@ -35,15 +35,14 @@ esac
|
||||||
|
|
||||||
|
|
||||||
prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
|
prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
|
||||||
validatorNodeCount=5
|
additionalFullNodeCount=5
|
||||||
clientNodeCount=1
|
clientNodeCount=1
|
||||||
leaderBootDiskSizeInGb=1000
|
fullNodeBootDiskSizeInGb=1000
|
||||||
validatorBootDiskSizeInGb=$leaderBootDiskSizeInGb
|
|
||||||
clientBootDiskSizeInGb=75
|
clientBootDiskSizeInGb=75
|
||||||
|
|
||||||
publicNetwork=false
|
publicNetwork=false
|
||||||
enableGpu=false
|
enableGpu=false
|
||||||
leaderAddress=
|
bootstrapFullNodeAddress=
|
||||||
|
|
||||||
usage() {
|
usage() {
|
||||||
exitcode=0
|
exitcode=0
|
||||||
|
@ -65,13 +64,13 @@ Manage testnet instances
|
||||||
collisions (default: $prefix)
|
collisions (default: $prefix)
|
||||||
|
|
||||||
create-specific options:
|
create-specific options:
|
||||||
-n [number] - Number of validator nodes (default: $validatorNodeCount)
|
-n [number] - Number of additional full nodes (default: $additionalFullNodeCount)
|
||||||
-c [number] - Number of client nodes (default: $clientNodeCount)
|
-c [number] - Number of client nodes (default: $clientNodeCount)
|
||||||
-P - Use public network IP addresses (default: $publicNetwork)
|
-P - Use public network IP addresses (default: $publicNetwork)
|
||||||
-z [zone] - Zone for the nodes (default: $zone)
|
-z [zone] - Zone for the nodes (default: $zone)
|
||||||
-g - Enable GPU (default: $enableGpu)
|
-g - Enable GPU (default: $enableGpu)
|
||||||
-G - Enable GPU, and set count/type of GPUs to use (e.g $cpuLeaderMachineType --accelerator count=4,type=nvidia-tesla-k80)
|
-G - Enable GPU, and set count/type of GPUs to use (e.g $cpuBootstrapFullNodeMachineType --accelerator count=4,type=nvidia-tesla-k80)
|
||||||
-a [address] - Set the leader node's external IP address to this value.
|
-a [address] - Set the bootstreap full node's external IP address to this value.
|
||||||
For GCE, [address] is the "name" of the desired External
|
For GCE, [address] is the "name" of the desired External
|
||||||
IP Address.
|
IP Address.
|
||||||
For EC2, [address] is the "allocation ID" of the desired
|
For EC2, [address] is the "allocation ID" of the desired
|
||||||
|
@ -107,7 +106,7 @@ while getopts "h?p:Pn:c:z:gG:a:d:" opt; do
|
||||||
publicNetwork=true
|
publicNetwork=true
|
||||||
;;
|
;;
|
||||||
n)
|
n)
|
||||||
validatorNodeCount=$OPTARG
|
additionalFullNodeCount=$OPTARG
|
||||||
;;
|
;;
|
||||||
c)
|
c)
|
||||||
clientNodeCount=$OPTARG
|
clientNodeCount=$OPTARG
|
||||||
|
@ -117,14 +116,14 @@ while getopts "h?p:Pn:c:z:gG:a:d:" opt; do
|
||||||
;;
|
;;
|
||||||
g)
|
g)
|
||||||
enableGpu=true
|
enableGpu=true
|
||||||
leaderMachineType=$gpuLeaderMachineType
|
bootstrapFullNodeMachineType=$gpuBootstrapFullNodeMachineType
|
||||||
;;
|
;;
|
||||||
G)
|
G)
|
||||||
enableGpu=true
|
enableGpu=true
|
||||||
leaderMachineType="$OPTARG"
|
bootstrapFullNodeMachineType="$OPTARG"
|
||||||
;;
|
;;
|
||||||
a)
|
a)
|
||||||
leaderAddress=$OPTARG
|
bootstrapFullNodeAddress=$OPTARG
|
||||||
;;
|
;;
|
||||||
d)
|
d)
|
||||||
bootDiskType=$OPTARG
|
bootDiskType=$OPTARG
|
||||||
|
@ -231,7 +230,7 @@ EOF
|
||||||
declare arrayName="$5"
|
declare arrayName="$5"
|
||||||
|
|
||||||
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
|
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
|
||||||
if [[ $arrayName = "leaderIp" ]]; then
|
if [[ $arrayName = "bootstrapFullNodeIp" ]]; then
|
||||||
if $publicNetwork; then
|
if $publicNetwork; then
|
||||||
echo "entrypointIp=$publicIp" >> "$configFile"
|
echo "entrypointIp=$publicIp" >> "$configFile"
|
||||||
else
|
else
|
||||||
|
@ -262,29 +261,29 @@ EOF
|
||||||
echo "$name has booted."
|
echo "$name has booted."
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Looking for leader instance..."
|
echo "Looking for bootstrap fullnode instance..."
|
||||||
cloud_FindInstance "$prefix-leader"
|
cloud_FindInstance "$prefix-bootstrap-fullnode"
|
||||||
[[ ${#instances[@]} -eq 1 ]] || {
|
[[ ${#instances[@]} -eq 1 ]] || {
|
||||||
echo "Unable to find leader"
|
echo "Unable to find bootstrap fullnode"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
(
|
(
|
||||||
declare leaderName
|
declare nodeName
|
||||||
declare leaderIp
|
declare nodeIp
|
||||||
IFS=: read -r leaderName leaderIp _ < <(echo "${instances[0]}")
|
IFS=: read -r nodeName nodeIp _ < <(echo "${instances[0]}")
|
||||||
|
|
||||||
# Try to ping the machine first.
|
# Try to ping the machine first.
|
||||||
timeout 90s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
|
timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done"
|
||||||
|
|
||||||
if [[ ! -r $sshPrivateKey ]]; then
|
if [[ ! -r $sshPrivateKey ]]; then
|
||||||
echo "Fetching $sshPrivateKey from $leaderName"
|
echo "Fetching $sshPrivateKey from $nodeName"
|
||||||
|
|
||||||
# Try to scp in a couple times, sshd may not yet be up even though the
|
# Try to scp in a couple times, sshd may not yet be up even though the
|
||||||
# machine can be pinged...
|
# machine can be pinged...
|
||||||
set -x -o pipefail
|
set -x -o pipefail
|
||||||
for i in $(seq 1 30); do
|
for i in $(seq 1 30); do
|
||||||
if cloud_FetchFile "$leaderName" "$leaderIp" /solana-id_ecdsa "$sshPrivateKey"; then
|
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey"; then
|
||||||
break
|
break
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -297,22 +296,22 @@ EOF
|
||||||
fi
|
fi
|
||||||
)
|
)
|
||||||
|
|
||||||
echo "leaderIp=()" >> "$configFile"
|
echo "bootstrapFullNodeIp=()" >> "$configFile"
|
||||||
cloud_ForEachInstance recordInstanceIp leaderIp
|
cloud_ForEachInstance recordInstanceIp bootstrapFullNodeIp
|
||||||
cloud_ForEachInstance waitForStartupComplete
|
cloud_ForEachInstance waitForStartupComplete
|
||||||
|
|
||||||
echo "Looking for validator instances..."
|
echo "Looking for additional fullnode instances..."
|
||||||
cloud_FindInstances "$prefix-validator"
|
cloud_FindInstances "$prefix-fullnode"
|
||||||
[[ ${#instances[@]} -gt 0 ]] || {
|
[[ ${#instances[@]} -gt 0 ]] || {
|
||||||
echo "Unable to find validators"
|
echo "Unable to find additional fullnodes"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
echo "validatorIpList=()" >> "$configFile"
|
echo "additionalFullNodeIps=()" >> "$configFile"
|
||||||
cloud_ForEachInstance recordInstanceIp validatorIpList
|
cloud_ForEachInstance recordInstanceIp additionalFullNodeIps
|
||||||
cloud_ForEachInstance waitForStartupComplete
|
cloud_ForEachInstance waitForStartupComplete
|
||||||
|
|
||||||
echo "clientIpList=()" >> "$configFile"
|
echo "clientIpList=()" >> "$configFile"
|
||||||
echo "Looking for client instances..."
|
echo "Looking for client bencher instances..."
|
||||||
cloud_FindInstances "$prefix-client"
|
cloud_FindInstances "$prefix-client"
|
||||||
[[ ${#instances[@]} -eq 0 ]] || {
|
[[ ${#instances[@]} -eq 0 ]] || {
|
||||||
cloud_ForEachInstance recordInstanceIp clientIpList
|
cloud_ForEachInstance recordInstanceIp clientIpList
|
||||||
|
@ -326,11 +325,11 @@ EOF
|
||||||
delete() {
|
delete() {
|
||||||
$metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
|
$metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
|
||||||
|
|
||||||
# Delete the leader node first to prevent unusual metrics on the dashboard
|
# Delete the bootstrap fullnode first to prevent unusual metrics on the dashboard
|
||||||
# during shutdown.
|
# during shutdown (only applicable when leader rotation is disabled).
|
||||||
# TODO: It would be better to fully cut-off metrics reporting before any
|
# TODO: It would be better to fully cut-off metrics reporting before any
|
||||||
# instances are deleted.
|
# instances are deleted.
|
||||||
for filter in "$prefix-leader" "$prefix-"; do
|
for filter in "$prefix-bootstrap-fullnode" "$prefix-"; do
|
||||||
echo "Searching for instances: $filter"
|
echo "Searching for instances: $filter"
|
||||||
cloud_FindInstances "$filter"
|
cloud_FindInstances "$filter"
|
||||||
|
|
||||||
|
@ -352,9 +351,9 @@ delete)
|
||||||
;;
|
;;
|
||||||
|
|
||||||
create)
|
create)
|
||||||
[[ -n $validatorNodeCount ]] || usage "Need number of nodes"
|
[[ -n $additionalFullNodeCount ]] || usage "Need number of nodes"
|
||||||
if [[ $validatorNodeCount -le 0 ]]; then
|
if [[ $additionalFullNodeCount -le 0 ]]; then
|
||||||
usage "One or more validator nodes is required"
|
usage "One or more additional fullnodes are required"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
delete
|
delete
|
||||||
|
@ -371,8 +370,8 @@ create)
|
||||||
========================================================================================
|
========================================================================================
|
||||||
|
|
||||||
Network composition:
|
Network composition:
|
||||||
Leader = $leaderMachineType (GPU=$enableGpu)
|
Bootstrap full node = $bootstrapFullNodeMachineType (GPU=$enableGpu)
|
||||||
Validators = $validatorNodeCount x $validatorMachineType
|
Additional full nodes = $additionalFullNodeCount x $fullNodeMachineType
|
||||||
Client(s) = $clientNodeCount x $clientMachineType
|
Client(s) = $clientNodeCount x $clientMachineType
|
||||||
|
|
||||||
========================================================================================
|
========================================================================================
|
||||||
|
@ -435,12 +434,12 @@ touch /.instance-startup-complete
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
cloud_CreateInstances "$prefix" "$prefix-leader" 1 \
|
cloud_CreateInstances "$prefix" "$prefix-bootstrap-fullnode" 1 \
|
||||||
"$imageName" "$leaderMachineType" "$leaderBootDiskSizeInGb" \
|
"$imageName" "$bootstrapFullNodeMachineType" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "$leaderAddress" "$bootDiskType"
|
"$startupScript" "$bootstrapFullNodeAddress" "$bootDiskType"
|
||||||
|
|
||||||
cloud_CreateInstances "$prefix" "$prefix-validator" "$validatorNodeCount" \
|
cloud_CreateInstances "$prefix" "$prefix-fullnode" "$additionalFullNodeCount" \
|
||||||
"$imageName" "$validatorMachineType" "$validatorBootDiskSizeInGb" \
|
"$imageName" "$fullNodeMachineType" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "" "$bootDiskType"
|
"$startupScript" "" "$bootDiskType"
|
||||||
|
|
||||||
if [[ $clientNodeCount -gt 0 ]]; then
|
if [[ $clientNodeCount -gt 0 ]]; then
|
||||||
|
|
75
net/net.sh
75
net/net.sh
|
@ -38,7 +38,7 @@ Operate a configured testnet
|
||||||
|
|
||||||
sanity/start-specific options:
|
sanity/start-specific options:
|
||||||
-o noLedgerVerify - Skip ledger verification
|
-o noLedgerVerify - Skip ledger verification
|
||||||
-o noValidatorSanity - Skip validator sanity
|
-o noValidatorSanity - Skip fullnode sanity
|
||||||
-o rejectExtraNodes - Require the exact number of nodes
|
-o rejectExtraNodes - Require the exact number of nodes
|
||||||
|
|
||||||
stop-specific options:
|
stop-specific options:
|
||||||
|
@ -117,7 +117,7 @@ while getopts "h?S:s:T:t:o:f:" opt; do
|
||||||
done
|
done
|
||||||
|
|
||||||
loadConfigFile
|
loadConfigFile
|
||||||
expectedNodeCount=$((${#validatorIpList[@]} + 1))
|
expectedNodeCount=$((${#additionalFullNodeIps[@]} + 1))
|
||||||
|
|
||||||
build() {
|
build() {
|
||||||
declare MAYBE_DOCKER=
|
declare MAYBE_DOCKER=
|
||||||
|
@ -156,14 +156,14 @@ startCommon() {
|
||||||
"$ipAddress":~/solana/
|
"$ipAddress":~/solana/
|
||||||
}
|
}
|
||||||
|
|
||||||
startLeader() {
|
startBootstrapNode() {
|
||||||
declare ipAddress=$1
|
declare ipAddress=$1
|
||||||
declare logFile="$2"
|
declare logFile="$2"
|
||||||
echo "--- Starting leader: $leaderIp"
|
echo "--- Starting bootstrap full node: $bootstrapFullNodeIp"
|
||||||
echo "start log: $logFile"
|
echo "start log: $logFile"
|
||||||
|
|
||||||
# Deploy local binaries to leader. Validators and clients later fetch the
|
# Deploy local binaries to bootstrap full node. Other full nodes and clients later fetch the
|
||||||
# binaries from the leader.
|
# binaries from it
|
||||||
(
|
(
|
||||||
set -x
|
set -x
|
||||||
startCommon "$ipAddress" || exit 1
|
startCommon "$ipAddress" || exit 1
|
||||||
|
@ -183,7 +183,7 @@ startLeader() {
|
||||||
esac
|
esac
|
||||||
|
|
||||||
ssh "${sshOptions[@]}" -n "$ipAddress" \
|
ssh "${sshOptions[@]}" -n "$ipAddress" \
|
||||||
"./solana/net/remote/remote-node.sh $deployMethod leader $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
|
"./solana/net/remote/remote-node.sh $deployMethod bootstrap_fullnode $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
|
||||||
) >> "$logFile" 2>&1 || {
|
) >> "$logFile" 2>&1 || {
|
||||||
cat "$logFile"
|
cat "$logFile"
|
||||||
echo "^^^ +++"
|
echo "^^^ +++"
|
||||||
|
@ -191,20 +191,20 @@ startLeader() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
startValidator() {
|
startNode() {
|
||||||
declare ipAddress=$1
|
declare ipAddress=$1
|
||||||
declare logFile="$netLogDir/validator-$ipAddress.log"
|
declare logFile="$netLogDir/fullnode-$ipAddress.log"
|
||||||
|
|
||||||
echo "--- Starting validator: $ipAddress"
|
echo "--- Starting full node: $ipAddress"
|
||||||
echo "start log: $logFile"
|
echo "start log: $logFile"
|
||||||
(
|
(
|
||||||
set -x
|
set -x
|
||||||
startCommon "$ipAddress"
|
startCommon "$ipAddress"
|
||||||
ssh "${sshOptions[@]}" -n "$ipAddress" \
|
ssh "${sshOptions[@]}" -n "$ipAddress" \
|
||||||
"./solana/net/remote/remote-node.sh $deployMethod validator $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
|
"./solana/net/remote/remote-node.sh $deployMethod fullnode $publicNetwork $entrypointIp $expectedNodeCount \"$RUST_LOG\""
|
||||||
) >> "$logFile" 2>&1 &
|
) >> "$logFile" 2>&1 &
|
||||||
declare pid=$!
|
declare pid=$!
|
||||||
ln -sfT "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
|
ln -sfT "fullnode-$ipAddress.log" "$netLogDir/fullnode-$pid.log"
|
||||||
pids+=("$pid")
|
pids+=("$pid")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -226,13 +226,13 @@ startClient() {
|
||||||
}
|
}
|
||||||
|
|
||||||
sanity() {
|
sanity() {
|
||||||
declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
|
declare expectedNodeCount=$((${#additionalFullNodeIps[@]} + 1))
|
||||||
declare ok=true
|
declare ok=true
|
||||||
|
|
||||||
echo "--- Sanity"
|
echo "--- Sanity"
|
||||||
$metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
|
$metricsWriteDatapoint "testnet-deploy net-sanity-begin=1"
|
||||||
|
|
||||||
declare host=$leaderIp # TODO: maybe use ${validatorIpList[0]} ?
|
declare host=$bootstrapFullNodeIp # TODO: maybe use ${additionalFullNodeIps[0]} ?
|
||||||
(
|
(
|
||||||
set -x
|
set -x
|
||||||
# shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
|
# shellcheck disable=SC2029 # remote-client.sh args are expanded on client side intentionally
|
||||||
|
@ -279,13 +279,18 @@ start() {
|
||||||
tar)
|
tar)
|
||||||
if [[ -n $releaseChannel ]]; then
|
if [[ -n $releaseChannel ]]; then
|
||||||
rm -f "$SOLANA_ROOT"/solana-release.tar.bz2
|
rm -f "$SOLANA_ROOT"/solana-release.tar.bz2
|
||||||
cd "$SOLANA_ROOT"
|
(
|
||||||
|
set -x
|
||||||
set -x
|
curl -o "$SOLANA_ROOT"/solana-release.tar.bz2 http://solana-release.s3.amazonaws.com/"$releaseChannel"/solana-release.tar.bz2
|
||||||
curl -o solana-release.tar.bz2 http://solana-release.s3.amazonaws.com/"$releaseChannel"/solana-release.tar.bz2
|
)
|
||||||
tarballFilename=solana-release.tar.bz2
|
tarballFilename="$SOLANA_ROOT"/solana-release.tar.bz2
|
||||||
fi
|
fi
|
||||||
tar jxvf $tarballFilename
|
(
|
||||||
|
set -x
|
||||||
|
rm -rf "$SOLANA_ROOT"/solana-release
|
||||||
|
(cd "$SOLANA_ROOT"; tar jxv) < "$tarballFilename"
|
||||||
|
cat "$SOLANA_ROOT"/solana-release/version.txt
|
||||||
|
)
|
||||||
;;
|
;;
|
||||||
local)
|
local)
|
||||||
build
|
build
|
||||||
|
@ -299,20 +304,20 @@ start() {
|
||||||
$metricsWriteDatapoint "testnet-deploy net-start-begin=1"
|
$metricsWriteDatapoint "testnet-deploy net-start-begin=1"
|
||||||
|
|
||||||
SECONDS=0
|
SECONDS=0
|
||||||
declare leaderDeployTime=
|
declare bootstrapNodeDeployTime=
|
||||||
startLeader "$leaderIp" "$netLogDir/leader-$leaderIp.log"
|
startBootstrapNode "$bootstrapFullNodeIp" "$netLogDir/bootstrap-fullnode-$bootstrapFullNodeIp.log"
|
||||||
leaderDeployTime=$SECONDS
|
bootstrapNodeDeployTime=$SECONDS
|
||||||
$metricsWriteDatapoint "testnet-deploy net-leader-started=1"
|
$metricsWriteDatapoint "testnet-deploy net-leader-started=1"
|
||||||
|
|
||||||
SECONDS=0
|
SECONDS=0
|
||||||
pids=()
|
pids=()
|
||||||
loopCount=0
|
loopCount=0
|
||||||
for ipAddress in "${validatorIpList[@]}"; do
|
for ipAddress in "${additionalFullNodeIps[@]}"; do
|
||||||
startValidator "$ipAddress"
|
startNode "$ipAddress"
|
||||||
|
|
||||||
# Staggering validator startup time. If too many validators
|
# Stagger additional node start time. If too many nodes start simultaneously
|
||||||
# bootup simultaneously, leader node gets more rsync requests
|
# the bootstrap node gets more rsync requests from the additional nodes than
|
||||||
# from the validators than it can handle.
|
# it can handle.
|
||||||
((loopCount++ % 2 == 0)) && sleep 2
|
((loopCount++ % 2 == 0)) && sleep 2
|
||||||
done
|
done
|
||||||
|
|
||||||
|
@ -320,14 +325,14 @@ start() {
|
||||||
declare ok=true
|
declare ok=true
|
||||||
wait "$pid" || ok=false
|
wait "$pid" || ok=false
|
||||||
if ! $ok; then
|
if ! $ok; then
|
||||||
cat "$netLogDir/validator-$pid.log"
|
cat "$netLogDir/fullnode-$pid.log"
|
||||||
echo ^^^ +++
|
echo ^^^ +++
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
|
$metricsWriteDatapoint "testnet-deploy net-validators-started=1"
|
||||||
validatorDeployTime=$SECONDS
|
additionalNodeDeployTime=$SECONDS
|
||||||
|
|
||||||
sanity
|
sanity
|
||||||
|
|
||||||
|
@ -342,7 +347,7 @@ start() {
|
||||||
case $deployMethod in
|
case $deployMethod in
|
||||||
snap)
|
snap)
|
||||||
IFS=\ read -r _ networkVersion _ < <(
|
IFS=\ read -r _ networkVersion _ < <(
|
||||||
ssh "${sshOptions[@]}" "$leaderIp" \
|
ssh "${sshOptions[@]}" "$bootstrapFullNodeIp" \
|
||||||
"snap info solana | grep \"^installed:\""
|
"snap info solana | grep \"^installed:\""
|
||||||
)
|
)
|
||||||
networkVersion=${networkVersion/0+git./}
|
networkVersion=${networkVersion/0+git./}
|
||||||
|
@ -363,8 +368,8 @@ start() {
|
||||||
|
|
||||||
echo
|
echo
|
||||||
echo "+++ Deployment Successful"
|
echo "+++ Deployment Successful"
|
||||||
echo "Leader deployment took $leaderDeployTime seconds"
|
echo "Bootstrap full node deployment took $bootstrapNodeDeployTime seconds"
|
||||||
echo "Validator deployment (${#validatorIpList[@]} instances) took $validatorDeployTime seconds"
|
echo "Additional full node deployment (${#additionalFullNodeIps[@]} instances) took $additionalNodeDeployTime seconds"
|
||||||
echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
|
echo "Client deployment (${#clientIpList[@]} instances) took $clientDeployTime seconds"
|
||||||
echo "Network start logs in $netLogDir:"
|
echo "Network start logs in $netLogDir:"
|
||||||
ls -l "$netLogDir"
|
ls -l "$netLogDir"
|
||||||
|
@ -394,9 +399,9 @@ stop() {
|
||||||
SECONDS=0
|
SECONDS=0
|
||||||
$metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
|
$metricsWriteDatapoint "testnet-deploy net-stop-begin=1"
|
||||||
|
|
||||||
stopNode "$leaderIp"
|
stopNode "$bootstrapFullNodeIp"
|
||||||
|
|
||||||
for ipAddress in "${validatorIpList[@]}" "${clientIpList[@]}"; do
|
for ipAddress in "${additionalFullNodeIps[@]}" "${clientIpList[@]}"; do
|
||||||
stopNode "$ipAddress"
|
stopNode "$ipAddress"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
|
@ -3,12 +3,14 @@ set -e
|
||||||
|
|
||||||
cd "$(dirname "$0")"/../..
|
cd "$(dirname "$0")"/../..
|
||||||
|
|
||||||
|
set -x
|
||||||
deployMethod="$1"
|
deployMethod="$1"
|
||||||
nodeType="$2"
|
nodeType="$2"
|
||||||
publicNetwork="$3"
|
publicNetwork="$3"
|
||||||
entrypointIp="$4"
|
entrypointIp="$4"
|
||||||
numNodes="$5"
|
numNodes="$5"
|
||||||
RUST_LOG="$6"
|
RUST_LOG="$6"
|
||||||
|
set +x
|
||||||
export RUST_LOG=${RUST_LOG:-solana=warn} # if RUST_LOG is unset, default to warn
|
export RUST_LOG=${RUST_LOG:-solana=warn} # if RUST_LOG is unset, default to warn
|
||||||
|
|
||||||
missing() {
|
missing() {
|
||||||
|
@ -40,7 +42,7 @@ fi
|
||||||
case $deployMethod in
|
case $deployMethod in
|
||||||
snap)
|
snap)
|
||||||
SECONDS=0
|
SECONDS=0
|
||||||
[[ $nodeType = leader ]] ||
|
[[ $nodeType = bootstrap_fullnode ]] ||
|
||||||
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/solana/solana.snap" .
|
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/solana/solana.snap" .
|
||||||
sudo snap install solana.snap --devmode --dangerous
|
sudo snap install solana.snap --devmode --dangerous
|
||||||
|
|
||||||
|
@ -61,13 +63,13 @@ snap)
|
||||||
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $nodeType = leader ]]; then
|
if [[ $nodeType = bootstrap-fullnode ]]; then
|
||||||
nodeConfig="mode=leader+drone $commonNodeConfig"
|
nodeConfig="mode=leader+drone $commonNodeConfig"
|
||||||
ln -sf -T /var/snap/solana/current/leader/current leader.log
|
ln -sf -T /var/snap/solana/current/leader/current fullnode.log
|
||||||
ln -sf -T /var/snap/solana/current/drone/current drone.log
|
ln -sf -T /var/snap/solana/current/drone/current drone.log
|
||||||
else
|
else
|
||||||
nodeConfig="mode=validator $commonNodeConfig"
|
nodeConfig="mode=validator $commonNodeConfig"
|
||||||
ln -sf -T /var/snap/solana/current/validator/current validator.log
|
ln -sf -T /var/snap/solana/current/validator/current fullnode.log
|
||||||
fi
|
fi
|
||||||
|
|
||||||
logmarker="solana deploy $(date)/$RANDOM"
|
logmarker="solana deploy $(date)/$RANDOM"
|
||||||
|
@ -96,7 +98,7 @@ local|tar)
|
||||||
scripts/net-stats.sh > net-stats.log 2>&1 &
|
scripts/net-stats.sh > net-stats.log 2>&1 &
|
||||||
|
|
||||||
case $nodeType in
|
case $nodeType in
|
||||||
leader)
|
bootstrap_fullnode)
|
||||||
if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-fullnode-cuda ]]; then
|
if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-fullnode-cuda ]]; then
|
||||||
echo Selecting solana-fullnode-cuda
|
echo Selecting solana-fullnode-cuda
|
||||||
export SOLANA_CUDA=1
|
export SOLANA_CUDA=1
|
||||||
|
@ -104,9 +106,10 @@ local|tar)
|
||||||
./multinode-demo/setup.sh -t leader $setupArgs
|
./multinode-demo/setup.sh -t leader $setupArgs
|
||||||
./multinode-demo/drone.sh > drone.log 2>&1 &
|
./multinode-demo/drone.sh > drone.log 2>&1 &
|
||||||
./multinode-demo/leader.sh > leader.log 2>&1 &
|
./multinode-demo/leader.sh > leader.log 2>&1 &
|
||||||
|
ln -sTf leader.log fullnode.log
|
||||||
;;
|
;;
|
||||||
validator)
|
fullnode)
|
||||||
net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/solana*" ~/.cargo/bin/
|
net/scripts/rsync-retry.sh -vPrc "$entrypointIp":~/.cargo/bin/ ~/.cargo/bin/
|
||||||
|
|
||||||
if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-fullnode-cuda ]]; then
|
if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-fullnode-cuda ]]; then
|
||||||
echo Selecting solana-fullnode-cuda
|
echo Selecting solana-fullnode-cuda
|
||||||
|
@ -114,7 +117,8 @@ local|tar)
|
||||||
fi
|
fi
|
||||||
|
|
||||||
./multinode-demo/setup.sh -t validator $setupArgs
|
./multinode-demo/setup.sh -t validator $setupArgs
|
||||||
./multinode-demo/validator.sh "$entrypointIp":~/solana "$entrypointIp:8001" >validator.log 2>&1 &
|
./multinode-demo/validator.sh "$entrypointIp":~/solana "$entrypointIp:8001" > validator.log 2>&1 &
|
||||||
|
ln -sTf validator.log fullnode.log
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Error: unknown node type: $nodeType"
|
echo "Error: unknown node type: $nodeType"
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -e
|
set -e
|
||||||
#
|
#
|
||||||
# This script is to be run on the leader node
|
# This script is to be run on the bootstrap full node
|
||||||
#
|
#
|
||||||
|
|
||||||
cd "$(dirname "$0")"/../..
|
cd "$(dirname "$0")"/../..
|
||||||
|
@ -139,18 +139,18 @@ if $validatorSanity; then
|
||||||
(
|
(
|
||||||
set -x -o pipefail
|
set -x -o pipefail
|
||||||
./multinode-demo/setup.sh -t validator || exit $?
|
./multinode-demo/setup.sh -t validator || exit $?
|
||||||
timeout 10s ./multinode-demo/validator.sh "$entrypointRsyncUrl" "$entrypointIp:8001" 2>&1 | tee validator.log
|
timeout 10s ./multinode-demo/validator.sh "$entrypointRsyncUrl" "$entrypointIp:8001" 2>&1 | tee validator-sanity.log
|
||||||
) || {
|
) || {
|
||||||
exitcode=$?
|
exitcode=$?
|
||||||
[[ $exitcode -eq 124 ]] || exit $exitcode
|
[[ $exitcode -eq 124 ]] || exit $exitcode
|
||||||
}
|
}
|
||||||
wc -l validator.log
|
wc -l validator-sanity.log
|
||||||
if grep -C100 panic validator.log; then
|
if grep -C100 panic validator-sanity.log; then
|
||||||
echo "^^^ +++"
|
echo "^^^ +++"
|
||||||
echo "Panic observed"
|
echo "Panic observed"
|
||||||
exit 1
|
exit 1
|
||||||
else
|
else
|
||||||
echo "Validator log looks ok"
|
echo "Validator sanity log looks ok"
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "^^^ +++"
|
echo "^^^ +++"
|
||||||
|
|
10
net/ssh.sh
10
net/ssh.sh
|
@ -49,12 +49,12 @@ printNode() {
|
||||||
printf " %-25s | For logs run: $0 $ip tail -f solana/$nodeType.log\n" "$0 $ip"
|
printf " %-25s | For logs run: $0 $ip tail -f solana/$nodeType.log\n" "$0 $ip"
|
||||||
}
|
}
|
||||||
|
|
||||||
echo Leader:
|
echo Bootstrap full node:
|
||||||
printNode leader "$leaderIp"
|
printNode fullnode "$bootstrapFullNodeIp"
|
||||||
echo
|
echo
|
||||||
echo Validators:
|
echo Additional full nodes:
|
||||||
for ipAddress in "${validatorIpList[@]}"; do
|
for ipAddress in "${additionalFullNodeIps[@]}"; do
|
||||||
printNode validator "$ipAddress"
|
printNode fullnode "$ipAddress"
|
||||||
done
|
done
|
||||||
echo
|
echo
|
||||||
echo Clients:
|
echo Clients:
|
||||||
|
|
Loading…
Reference in New Issue