solana/net/gce.sh

1019 lines
29 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
set -e
here=$(dirname "$0")
# shellcheck source=net/common.sh
source "$here"/common.sh
2018-09-16 14:46:08 -07:00
cloudProvider=$(basename "$0" .sh)
bootDiskType=""
2018-09-16 14:46:08 -07:00
case $cloudProvider in
gce)
# shellcheck source=net/scripts/gce-provider.sh
source "$here"/scripts/gce-provider.sh
2021-02-10 04:17:04 -08:00
# use n1 instead of n2 so we don't need to spin up >= 4 local SSD's
cpuBootstrapLeaderMachineType="--custom-cpu 24 --min-cpu-platform Intel%20Skylake --custom-vm-type n1"
gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=1,type=nvidia-tesla-p100"
clientMachineType="--custom-cpu 16 --custom-memory 20GB"
blockstreamerMachineType="--machine-type n1-standard-8"
selfDestructHours=8
2018-09-16 14:46:08 -07:00
;;
ec2)
# shellcheck source=net/scripts/ec2-provider.sh
source "$here"/scripts/ec2-provider.sh
cpuBootstrapLeaderMachineType=m5.4xlarge
# NOTE: At this time only the p3dn.24xlarge EC2 instance type has GPU and
# AVX-512 support. The default, p2.xlarge, does not support
# AVX-512
gpuBootstrapLeaderMachineType=p2.xlarge
clientMachineType=c5.2xlarge
blockstreamerMachineType=m5.4xlarge
selfDestructHours=0
2018-09-16 14:46:08 -07:00
;;
azure)
# shellcheck source=net/scripts/azure-provider.sh
source "$here"/scripts/azure-provider.sh
cpuBootstrapLeaderMachineType=Standard_D16s_v3
gpuBootstrapLeaderMachineType=Standard_NC12
clientMachineType=Standard_D16s_v3
blockstreamerMachineType=Standard_D16s_v3
selfDestructHours=0
;;
colo)
# shellcheck source=net/scripts/colo-provider.sh
source "$here"/scripts/colo-provider.sh
cpuBootstrapLeaderMachineType=0
gpuBootstrapLeaderMachineType=1
clientMachineType=0
blockstreamerMachineType=0
selfDestructHours=0
;;
2018-09-16 14:46:08 -07:00
*)
echo "Error: Unknown cloud provider: $cloudProvider"
;;
esac
prefix=testnet-dev-${USER//[^A-Za-z0-9]/}
additionalValidatorCount=2
clientNodeCount=0
blockstreamer=false
validatorBootDiskSizeInGb=500
2018-09-17 08:25:10 -07:00
clientBootDiskSizeInGb=75
validatorAdditionalDiskSizeInGb=
externalNodes=false
failOnValidatorBootupFailure=true
preemptible=true
evalInfo=false
tmpfsAccounts=false
defaultCustomMemoryGB="$(cloud_DefaultCustomMemoryGB)"
customMemoryGB="$defaultCustomMemoryGB"
2018-09-16 14:46:08 -07:00
publicNetwork=false
letsEncryptDomainName=
2018-09-16 14:46:08 -07:00
enableGpu=false
customMachineType=
customAddress=
zones=()
containsZone() {
local e match="$1"
shift
for e; do [[ "$e" == "$match" ]] && return 0; done
return 1
}
usage() {
exitcode=0
if [[ -n "$1" ]]; then
exitcode=1
echo "Error: $*"
fi
cat <<EOF
usage: $0 [create|config|delete] [common options] [command-specific options]
2018-09-16 14:46:08 -07:00
Manage testnet instances
create - create a new testnet (implies 'config')
config - configure the testnet and write a config file describing it
delete - delete the testnet
2019-01-16 09:38:29 -08:00
info - display information about the currently configured testnet
status - display status information of all resources
common options:
2018-09-06 10:08:34 -07:00
-p [prefix] - Optional common prefix for instance names to avoid
collisions (default: $prefix)
-z [zone] - Zone(s) for the nodes (default: $(cloud_DefaultZone))
If specified multiple times, the validators will be evenly
distributed over all specified zones and
client/blockstreamer nodes will be created in the first
zone
-x - append to the existing configuration instead of creating a
new configuration
--allow-boot-failures
- Discard from config validator nodes that didn't bootup
successfully
create-specific options:
-n [number] - Number of additional validators (default: $additionalValidatorCount)
-c [number] - Number of client nodes (default: $clientNodeCount)
-u - Include a Blockstreamer (default: $blockstreamer)
-P - Use public network IP addresses (default: $publicNetwork)
-g - Enable GPU and automatically set validator machine types to $gpuBootstrapLeaderMachineType
(default: $enableGpu)
-G - Enable GPU, and set custom GPU machine type to use
(e.g $gpuBootstrapLeaderMachineType)
-a [address] - Address to be be assigned to the Blockstreamer if present,
otherwise the bootstrap validator.
* For GCE, [address] is the "name" of the desired External
IP Address.
* For EC2, [address] is the "allocation ID" of the desired
Elastic IP.
-d [disk-type] - Specify a boot disk type (default None) Use pd-ssd to get ssd on GCE.
--letsencrypt [dns name]
- Attempt to generate a TLS certificate using this
DNS name (useful only when the -a and -P options
are also provided)
--custom-machine-type [type]
- Set a custom machine type without assuming whether or not
GPU is enabled. Set this explicitly with --enable-gpu/-g to call out the presence of GPUs.
$(
if [[ -n "$defaultCustomMemoryGB" ]]; then
echo " --custom-memory-gb"
echo " - Set memory size for custom machine type in GB (default: $defaultCustomMemoryGB)"
fi
)
--enable-gpu - Use with --custom-machine-type to specify whether or not GPUs should be used/enabled
--validator-additional-disk-size-gb [number]
- Add an additional [number] GB SSD to all validators to store the config directory.
2019-07-30 22:43:47 -07:00
If not set, config will be written to the boot disk by default.
Only supported on GCE.
--dedicated - Use dedicated instances for additional validators
(by default preemptible instances are used to reduce
cost). Note that the bootstrap validator,
blockstreamer and client nodes are always dedicated.
Set this flag on colo to prevent your testnet from being pre-empted by nightly test automation.
--self-destruct-hours [number]
- Specify lifetime of the allocated instances in hours. 0 to
disable. Only supported on GCE. (default: $selfDestructHours)
--validator-boot-disk-size-gb [number]
- Specify validator boot disk size in gb.
--client-machine-type [type]
- custom client machine type
--tmpfs-accounts - Put accounts directory on a swap-backed tmpfs volume
config-specific options:
-P - Use public network IP addresses (default: $publicNetwork)
delete-specific options:
--reclaim-preemptible-reservations
- If set, reclaims all reservations on colo nodes that were not created with --dedicated.
This behavior does not filter by testnet name or owner. Only implemented on colo.
--reclaim-all-reservations
- If set, reclaims all reservations on all colo nodes, regardless of owner, pre-emptibility, or creator.
2019-01-16 09:38:29 -08:00
info-specific options:
--eval - Output in a form that can be eval-ed by a shell: eval \$(gce.sh info --eval)
2019-01-16 09:38:29 -08:00
none
EOF
exit $exitcode
}
command=$1
[[ -n $command ]] || usage
shift
[[ $command = create || $command = config || $command = info || $command = delete || $command = status ]] ||
2019-01-16 09:38:29 -08:00
usage "Invalid command: $command"
shortArgs=()
while [[ -n $1 ]]; do
if [[ ${1:0:2} = -- ]]; then
if [[ $1 = --letsencrypt ]]; then
letsEncryptDomainName="$2"
shift 2
elif [[ $1 = --validator-additional-disk-size-gb ]]; then
validatorAdditionalDiskSizeInGb="$2"
shift 2
elif [[ $1 == --machine-type* || $1 == --custom-cpu* ]]; then # Bypass quoted long args for GPUs
shortArgs+=("$1")
shift
elif [[ $1 == --allow-boot-failures ]]; then
failOnValidatorBootupFailure=false
shift
elif [[ $1 == --dedicated ]]; then
preemptible=false
shift
elif [[ $1 == --eval ]]; then
evalInfo=true
shift
elif [[ $1 == --enable-gpu ]]; then
enableGpu=true
shift
elif [[ $1 = --custom-machine-type ]]; then
customMachineType="$2"
shift 2
elif [[ $1 = --client-machine-type ]]; then
clientMachineType="$2"
shift 2
elif [[ $1 = --validator-boot-disk-size-gb ]]; then
validatorBootDiskSizeInGb="$2"
shift 2
elif [[ $1 == --self-destruct-hours ]]; then
maybeTimeout=$2
if [[ $maybeTimeout =~ ^[0-9]+$ ]]; then
selfDestructHours=$maybeTimeout
else
echo " Invalid parameter ($maybeTimeout) to $1"
usage 1
fi
shift 2
elif [[ $1 == --reclaim-preemptible-reservations ]]; then
reclaimOnlyPreemptibleReservations=true
shift
elif [[ $1 == --reclaim-all-reservations ]]; then
reclaimAllReservations=true
shift
elif [[ $1 == --tmpfs-accounts ]]; then
tmpfsAccounts=true
shift
elif [[ $1 == --custom-memory-gb ]]; then
customMemoryGB=$2
shift 2
else
usage "Unknown long option: $1"
fi
else
shortArgs+=("$1")
shift
fi
done
while getopts "h?p:Pn:c:r:z:gG:a:d:uxf" opt "${shortArgs[@]}"; do
case $opt in
h | \?)
usage
;;
p)
2018-09-06 10:08:34 -07:00
[[ ${OPTARG//[^A-Za-z0-9-]/} == "$OPTARG" ]] || usage "Invalid prefix: \"$OPTARG\", alphanumeric only"
prefix=$OPTARG
;;
P)
publicNetwork=true
;;
n)
additionalValidatorCount=$OPTARG
;;
c)
clientNodeCount=$OPTARG
;;
z)
containsZone "$OPTARG" "${zones[@]}" || zones+=("$OPTARG")
;;
2018-09-04 08:17:41 -07:00
g)
2018-09-16 14:46:08 -07:00
enableGpu=true
2018-09-04 08:17:41 -07:00
;;
G)
enableGpu=true
customMachineType="$OPTARG"
;;
a)
customAddress=$OPTARG
;;
d)
bootDiskType=$OPTARG
;;
u)
blockstreamer=true
;;
x)
externalNodes=true
;;
*)
2018-11-07 10:03:40 -08:00
usage "unhandled option: $opt"
;;
esac
done
2019-03-27 15:42:08 -07:00
[[ ${#zones[@]} -gt 0 ]] || zones+=("$(cloud_DefaultZone)")
2018-09-06 20:57:05 -07:00
[[ -z $1 ]] || usage "Unexpected argument: $1"
if [[ $cloudProvider = ec2 ]]; then
# EC2 keys can't be retrieved from running instances like GCE keys can so save
# EC2 keys in the user's home directory so |./ec2.sh config| can at least be
# used on the same host that ran |./ec2.sh create| .
sshPrivateKey="$HOME/.ssh/solana-net-id_$prefix"
else
sshPrivateKey="$netConfigDir/id_$prefix"
fi
2018-11-07 10:03:40 -08:00
case $cloudProvider in
gce)
if [[ "$tmpfsAccounts" = "true" ]]; then
cpuBootstrapLeaderMachineType+=" --local-ssd interface=nvme"
gpuBootstrapLeaderMachineType+=" --local-ssd interface=nvme"
if [[ $customMemoryGB -lt 100 ]]; then
# shellcheck disable=SC2016 # We don't want expression expansion on these backticks
echo -e '\nWarning: At least 100GB of system RAM is recommending with `--tmpfs-accounts` (see `--custom-memory-gb`)\n'
fi
fi
cpuBootstrapLeaderMachineType+=" --custom-memory ${customMemoryGB}GB"
gpuBootstrapLeaderMachineType+=" --custom-memory ${customMemoryGB}GB"
2018-11-07 10:03:40 -08:00
;;
ec2|azure|colo)
if [[ -n $validatorAdditionalDiskSizeInGb ]] ; then
usage "--validator-additional-disk-size-gb currently only supported with cloud provider: gce"
fi
if [[ "$tmpfsAccounts" = "true" ]]; then
usage "--tmpfs-accounts only supported on cloud provider: gce"
fi
if [[ "$customMemoryGB" != "$defaultCustomMemoryGB" ]]; then
usage "--custom-memory-gb only supported on cloud provider: gce"
fi
;;
2018-11-07 10:03:40 -08:00
*)
echo "Error: Unknown cloud provider: $cloudProvider"
;;
esac
case $cloudProvider in
gce | ec2 | azure)
maybePreemptible="never preemptible"
;;
colo)
maybePreemptible=$preemptible
;;
*)
echo "Error: Unknown cloud provider: $cloudProvider"
;;
esac
if [[ $reclaimOnlyPreemptibleReservations == "true" && $reclaimAllReservations == "true" ]]; then
usage "Cannot set both --reclaim-preemptible-reservations and --reclaim-all-reservations. Set one or none"
fi
if [[ -n $reclaimAllReservations || -n $reclaimOnlyPreemptibleReservations ]]; then
forceDelete="true"
fi
if [[ -n "$customMachineType" ]] ; then
bootstrapLeaderMachineType="$customMachineType"
elif [[ "$enableGpu" = "true" ]] ; then
bootstrapLeaderMachineType="$gpuBootstrapLeaderMachineType"
else
bootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType"
fi
validatorMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
2018-09-16 14:46:08 -07:00
# cloud_ForEachInstance [cmd] [extra args to cmd]
#
# Execute a command for each element in the `instances` array
#
# cmd - The command to execute on each instance
# The command will receive arguments followed by any
# additional arguments supplied to cloud_ForEachInstance:
2018-09-16 14:46:08 -07:00
# name - name of the instance
# publicIp - The public IP address of this instance
# privateIp - The private IP address of this instance
# zone - Zone of this instance
2018-09-16 14:46:08 -07:00
# count - Monotonically increasing count for each
# invocation of cmd, starting at 1
# ... - Extra args to cmd..
#
#
cloud_ForEachInstance() {
declare cmd="$1"
shift
[[ -n $cmd ]] || { echo cloud_ForEachInstance: cmd not specified; exit 1; }
declare count=1
for info in "${instances[@]}"; do
declare name publicIp privateIp
IFS=: read -r name publicIp privateIp zone < <(echo "$info")
2018-09-16 14:46:08 -07:00
eval "$cmd" "$name" "$publicIp" "$privateIp" "$zone" "$count" "$@"
2018-09-16 14:46:08 -07:00
count=$((count + 1))
done
}
# Given a cloud provider zone, return an approximate lat,long location for the
# data center. Normal geoip lookups for cloud provider IP addresses are
# sometimes widely inaccurate.
zoneLocation() {
declare zone="$1"
case "$zone" in
us-west1*)
echo "[45.5946, -121.1787]"
;;
us-central1*)
echo "[41.2619, -95.8608]"
;;
us-east1*)
echo "[33.1960, -80.0131]"
;;
asia-east2*)
echo "[22.3193, 114.1694]"
;;
asia-northeast1*)
echo "[35.6762, 139.6503]"
;;
asia-northeast2*)
echo "[34.6937, 135.5023]"
;;
asia-south1*)
echo "[19.0760, 72.8777]"
;;
asia-southeast1*)
echo "[1.3404, 103.7090]"
;;
australia-southeast1*)
echo "[-33.8688, 151.2093]"
;;
europe-north1*)
echo "[60.5693, 27.1878]"
;;
europe-west2*)
echo "[51.5074, -0.1278]"
;;
europe-west3*)
echo "[50.1109, 8.6821]"
;;
europe-west4*)
echo "[53.4386, 6.8355]"
;;
europe-west6*)
echo "[47.3769, 8.5417]"
;;
northamerica-northeast1*)
echo "[45.5017, -73.5673]"
;;
southamerica-east1*)
echo "[-23.5505, -46.6333]"
;;
*)
;;
esac
}
2018-09-03 21:15:55 -07:00
prepareInstancesAndWriteConfigFile() {
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
if $externalNodes; then
echo "Appending to existing config file"
echo "externalNodeSshKey=$sshPrivateKey" >> "$configFile"
else
rm -f "$geoipConfigFile"
cat >> "$configFile" <<EOF
# autogenerated at $(date)
netBasename=$prefix
publicNetwork=$publicNetwork
sshPrivateKey=$sshPrivateKey
letsEncryptDomainName=$letsEncryptDomainName
export TMPFS_ACCOUNTS=$tmpfsAccounts
EOF
fi
touch "$geoipConfigFile"
2018-09-03 21:15:55 -07:00
buildSshOptions
cloud_RestartPreemptedInstances "$prefix"
2019-05-11 22:54:50 -07:00
fetchPrivateKey() {
declare nodeName
declare nodeIp
declare nodeZone
IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}")
# Make sure the machine is alive or pingable
timeout_sec=90
cloud_WaitForInstanceReady "$nodeName" "$nodeIp" "$nodeZone" "$timeout_sec"
if [[ ! -r $sshPrivateKey ]]; then
echo "Fetching $sshPrivateKey from $nodeName"
# Try to scp in a couple times, sshd may not yet be up even though the
# machine can be pinged...
(
set -o pipefail
for i in $(seq 1 60); do
2019-05-11 22:54:50 -07:00
set -x
cloud_FetchFile "$nodeName" "$nodeIp" /solana-scratch/id_ecdsa "$sshPrivateKey" "$nodeZone" &&
cloud_FetchFile "$nodeName" "$nodeIp" /solana-scratch/id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone" &&
2019-05-11 22:54:50 -07:00
break
set +x
sleep 1
echo "Retry $i..."
done
)
chmod 400 "$sshPrivateKey"
ls -l "$sshPrivateKey"
fi
}
recordInstanceIp() {
declare name="$1"
2018-09-16 14:46:08 -07:00
declare publicIp="$2"
declare privateIp="$3"
declare zone="$4"
#declare index="$5"
declare failOnFailure="$6"
declare arrayName="$7"
if [ "$publicIp" = "TERMINATED" ] || [ "$privateIp" = "TERMINATED" ]; then
if $failOnFailure; then
exit 1
else
return 0
fi
fi
ok=true
echo "Waiting for $name to finish booting..."
(
2019-05-11 22:54:50 -07:00
set +e
fetchPrivateKey || exit 1
for i in $(seq 1 60); do
2019-05-11 22:54:50 -07:00
(
set -x
timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /solana-scratch/.instance-startup-complete"
2019-05-11 22:54:50 -07:00
)
2018-11-07 15:11:47 -08:00
ret=$?
if [[ $ret -eq 0 ]]; then
echo "$name has booted."
2018-11-06 19:23:20 -08:00
exit 0
fi
2019-05-11 22:54:50 -07:00
sleep 5
echo "Retry $i..."
done
2018-11-06 19:23:20 -08:00
echo "$name failed to boot."
exit 1
) || ok=false
if ! $ok; then
if $failOnFailure; then
exit 1
fi
else
{
echo "$arrayName+=($publicIp) # $name"
echo "${arrayName}Private+=($privateIp) # $name"
echo "${arrayName}Zone+=($zone) # $name"
} >> "$configFile"
declare latlng=
latlng=$(zoneLocation "$zone")
if [[ -n $latlng ]]; then
echo "$publicIp: $latlng" >> "$geoipConfigFile"
fi
fi
}
if $externalNodes; then
echo "Bootstrap validator is already configured"
else
echo "Looking for bootstrap validator instance..."
cloud_FindInstance "$prefix-bootstrap-validator"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap validator"
exit 1
}
echo "validatorIpList=()" >> "$configFile"
echo "validatorIpListPrivate=()" >> "$configFile"
cloud_ForEachInstance recordInstanceIp true validatorIpList
fi
if [[ $additionalValidatorCount -gt 0 ]]; then
numZones=${#zones[@]}
if [[ $additionalValidatorCount -gt $numZones ]]; then
numNodesPerZone=$((additionalValidatorCount / numZones))
numLeftOverNodes=$((additionalValidatorCount % numZones))
else
numNodesPerZone=1
numLeftOverNodes=0
fi
for ((i=((numZones - 1)); i >= 0; i--)); do
zone=${zones[i]}
if [[ $i -eq 0 ]]; then
numNodesPerZone=$((numNodesPerZone + numLeftOverNodes))
fi
echo "Looking for additional validator instances in $zone ..."
cloud_FindInstances "$prefix-$zone-validator"
declare numInstances=${#instances[@]}
if [[ $numInstances -ge $numNodesPerZone || ( ! $failOnValidatorBootupFailure && $numInstances -gt 0 ) ]]; then
cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" validatorIpList
else
echo "Unable to find additional validators"
if $failOnValidatorBootupFailure; then
exit 1
fi
fi
done
fi
2019-05-31 15:27:31 -07:00
if ! $externalNodes; then
echo "clientIpList=()" >> "$configFile"
echo "clientIpListPrivate=()" >> "$configFile"
fi
echo "Looking for client bencher instances..."
2018-09-16 14:46:08 -07:00
cloud_FindInstances "$prefix-client"
[[ ${#instances[@]} -eq 0 ]] || {
cloud_ForEachInstance recordInstanceIp true clientIpList
}
2019-05-31 15:27:31 -07:00
if ! $externalNodes; then
echo "blockstreamerIpList=()" >> "$configFile"
echo "blockstreamerIpListPrivate=()" >> "$configFile"
fi
echo "Looking for blockstreamer instances..."
cloud_FindInstances "$prefix-blockstreamer"
[[ ${#instances[@]} -eq 0 ]] || {
cloud_ForEachInstance recordInstanceIp true blockstreamerIpList
}
echo "Wrote $configFile"
2018-09-06 12:14:04 -07:00
$metricsWriteDatapoint "testnet-deploy net-config-complete=1"
}
2018-09-16 14:46:08 -07:00
delete() {
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-delete-begin=1"
2018-09-06 12:14:04 -07:00
case $cloudProvider in
gce | ec2 | azure)
# Filter for all nodes
filter="$prefix-"
;;
colo)
if [[ -n $forceDelete ]]; then
filter=".*-"
else
filter="$prefix-"
fi
;;
*)
echo "Error: Unknown cloud provider: $cloudProvider"
;;
esac
echo "Searching for instances: $filter"
cloud_FindInstances "$filter" "$reclaimOnlyPreemptibleReservations"
2018-09-07 08:56:43 -07:00
if [[ ${#instances[@]} -eq 0 ]]; then
echo "No instances found matching '$filter'"
else
cloud_DeleteInstances $forceDelete
fi
wait
if $externalNodes; then
echo "Let's not delete the current configuration file"
else
rm -f "$configFile"
fi
2018-09-06 12:14:04 -07:00
$metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
2018-09-16 14:46:08 -07:00
}
create_error_cleanup() {
declare RC=$?
if [[ "$RC" -ne 0 ]]; then
delete
fi
exit $RC
}
2018-09-16 14:46:08 -07:00
case $command in
delete)
delete
;;
create)
[[ -n $additionalValidatorCount ]] || usage "Need number of nodes"
2018-09-16 14:46:08 -07:00
delete
2018-09-06 13:00:01 -07:00
$metricsWriteDatapoint "testnet-deploy net-create-begin=1"
2018-09-06 12:14:04 -07:00
if $failOnValidatorBootupFailure; then
trap create_error_cleanup EXIT
fi
rm -rf "$sshPrivateKey"{,.pub}
2018-09-16 14:46:08 -07:00
# Note: using rsa because |aws ec2 import-key-pair| seems to fail for ecdsa
ssh-keygen -t rsa -N '' -f "$sshPrivateKey"
printNetworkInfo() {
cat <<EOF
==[ Network composition ]===============================================================
Bootstrap validator = $bootstrapLeaderMachineType (GPU=$enableGpu)
Additional validators = $additionalValidatorCount x $validatorMachineType
2018-09-16 14:46:08 -07:00
Client(s) = $clientNodeCount x $clientMachineType
Blockstreamer = $blockstreamer
========================================================================================
EOF
}
printNetworkInfo
creationDate=$(date)
creationInfo() {
cat <<EOF
Instance running since: $creationDate
========================================================================================
EOF
}
2018-09-16 14:46:08 -07:00
declare startupScript="$netConfigDir"/instance-startup-script.sh
cat > "$startupScript" <<EOF
#!/usr/bin/env bash
# autogenerated at $(date)
2018-11-11 09:25:59 -08:00
set -ex
if [[ -f /solana-scratch/.instance-startup-complete ]]; then
echo reboot
$(
cd "$here"/scripts/
if "$enableGpu"; then
cat enable-nvidia-persistence-mode.sh
fi
2019-10-24 20:14:26 -07:00
if [[ -n $validatorAdditionalDiskSizeInGb ]]; then
cat mount-additional-disk.sh
fi
cat ../../scripts/ulimit-n.sh
)
if [[ -x ~solana/solana/on-reboot ]]; then
sudo -u solana ~solana/solana/on-reboot
fi
# Skip most setup on instance reboot
exit 0
fi
cat > /etc/motd <<EOM
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
This instance has not been fully configured.
2018-09-16 14:46:08 -07:00
See startup script log messages in /var/log/syslog for status:
$ sudo cat /var/log/syslog | egrep \\(startup-script\\|cloud-init\)
To block until setup is complete, run:
$ until [[ -f /solana-scratch/.instance-startup-complete ]]; do sleep 1; done
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
$(creationInfo)
EOM
# Place the generated private key at /solana-scratch/id_ecdsa so it's retrievable by anybody
# who is able to log into this machine
mkdir -m 0777 /solana-scratch
cat > /solana-scratch/id_ecdsa <<EOK
$(cat "$sshPrivateKey")
EOK
cat > /solana-scratch/id_ecdsa.pub <<EOK
$(cat "$sshPrivateKey.pub")
EOK
chmod 444 /solana-scratch/id_ecdsa
USER=\$(id -un)
2019-06-14 08:00:12 -07:00
export DEBIAN_FRONTEND=noninteractive
$(
cd "$here"/scripts/
cat \
disable-background-upgrades.sh \
create-solana-user.sh \
2019-08-15 08:32:10 -07:00
solana-user-authorized_keys.sh \
add-testnet-solana-user-authorized_keys.sh \
2019-10-30 16:43:16 -07:00
install-ag.sh \
install-certbot.sh \
install-earlyoom.sh \
install-iftop.sh \
install-libssl-compatability.sh \
2019-02-15 20:17:30 -08:00
install-redis.sh \
2018-09-12 16:22:22 -07:00
install-rsync.sh \
localtime.sh \
network-config.sh \
2018-11-07 17:42:29 -08:00
remove-docker-interface.sh \
2018-11-06 19:23:20 -08:00
if "$enableGpu"; then
cat enable-nvidia-persistence-mode.sh
fi
if [[ -n $validatorAdditionalDiskSizeInGb ]]; then
cat mount-additional-disk.sh
fi
if [[ $selfDestructHours -gt 0 ]]; then
cat <<EOSD
# Setup GCE self-destruct
cat >/solana-scratch/gce-self-destruct.sh <<'EOS'
$(cat gce-self-destruct.sh)
EOS
EOSD
cat <<'EOSD'
# Populate terminal prompt update script
cat >/solana-scratch/gce-self-destruct-ps1.sh <<'EOS'
#!/usr/bin/env bash
source "$(dirname "$0")/gce-self-destruct.sh"
gce_self_destruct_ps1
EOS
chmod +x /solana-scratch/gce-self-destruct-ps1.sh
# Append MOTD and PS1 replacement to .profile
cat >>~solana/.profile <<'EOS'
# Print self-destruct countdown on login
source "/solana-scratch/gce-self-destruct.sh"
gce_self_destruct_motd
# Add self-destruct countdown to terminal prompt
export PS1='\[\e]0;\u@\h: \w\a\]${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]$(/solana-scratch/gce-self-destruct-ps1.sh):\[\033[01;34m\]\w\[\033[00m\]\$ '
EOS
EOSD
cat <<EOSD
source /solana-scratch/gce-self-destruct.sh
gce_self_destruct_setup $selfDestructHours
EOSD
fi
)
cat > /etc/motd <<EOM
See startup script log messages in /var/log/syslog for status:
$ sudo cat /var/log/syslog | egrep \\(startup-script\\|cloud-init\)
$(printNetworkInfo)
$(creationInfo)
EOM
$(
if [[ "$tmpfsAccounts" = "true" ]]; then
cat <<'EOSWAP'
# Setup swap/tmpfs for accounts
tmpfsMountPoint=/mnt/solana-accounts
swapDevice="/dev/nvme0n1"
swapUUID="43076c54-7840-4e59-a368-2d164f8984fb"
mkswap --uuid "$swapUUID" "$swapDevice"
echo "UUID=$swapUUID swap swap defaults 0 0" >> /etc/fstab
swapon "UUID=$swapUUID"
mkdir -p -m 0777 "$tmpfsMountPoint"
echo "tmpfs $tmpfsMountPoint tmpfs defaults,size=300G 0 0" >> /etc/fstab
mount "$tmpfsMountPoint"
EOSWAP
fi
)
touch /solana-scratch/.instance-startup-complete
EOF
if $blockstreamer; then
blockstreamerAddress=$customAddress
else
bootstrapLeaderAddress=$customAddress
fi
for zone in "${zones[@]}"; do
cloud_Initialize "$prefix" "$zone"
done
if $externalNodes; then
echo "Bootstrap validator is already configured"
else
cloud_CreateInstances "$prefix" "$prefix-bootstrap-validator" 1 \
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$validatorBootDiskSizeInGb" \
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$validatorAdditionalDiskSizeInGb" \
"$maybePreemptible" "$sshPrivateKey"
fi
if [[ $additionalValidatorCount -gt 0 ]]; then
num_zones=${#zones[@]}
if [[ $additionalValidatorCount -gt $num_zones ]]; then
numNodesPerZone=$((additionalValidatorCount / num_zones))
numLeftOverNodes=$((additionalValidatorCount % num_zones))
else
numNodesPerZone=1
numLeftOverNodes=0
fi
2019-05-11 13:48:16 -07:00
for ((i=((num_zones - 1)); i >= 0; i--)); do
zone=${zones[i]}
2019-05-11 13:48:16 -07:00
if [[ $i -eq 0 ]]; then
numNodesPerZone=$((numNodesPerZone + numLeftOverNodes))
fi
cloud_CreateInstances "$prefix" "$prefix-$zone-validator" "$numNodesPerZone" \
"$enableGpu" "$validatorMachineType" "$zone" "$validatorBootDiskSizeInGb" \
"$startupScript" "" "$bootDiskType" "$validatorAdditionalDiskSizeInGb" \
"$preemptible" "$sshPrivateKey" &
done
wait
fi
if [[ $clientNodeCount -gt 0 ]]; then
2018-09-16 14:46:08 -07:00
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
"$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
"$startupScript" "" "$bootDiskType" "" "$maybePreemptible" "$sshPrivateKey"
fi
if $blockstreamer; then
cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
"$enableGpu" "$blockstreamerMachineType" "${zones[0]}" "$validatorBootDiskSizeInGb" \
"$startupScript" "$blockstreamerAddress" "$bootDiskType" "" "$maybePreemptible" "$sshPrivateKey"
fi
2018-09-06 12:14:04 -07:00
$metricsWriteDatapoint "testnet-deploy net-create-complete=1"
2018-09-03 21:15:55 -07:00
prepareInstancesAndWriteConfigFile
;;
config)
failOnValidatorBootupFailure=false
2018-09-03 21:15:55 -07:00
prepareInstancesAndWriteConfigFile
;;
2019-01-16 09:38:29 -08:00
info)
loadConfigFile
printNode() {
declare nodeType=$1
declare ip=$2
declare ipPrivate=$3
declare zone=$4
printf " %-16s | %-15s | %-15s | %s\n" "$nodeType" "$ip" "$ipPrivate" "$zone"
2019-01-16 09:38:29 -08:00
}
2019-10-25 22:57:37 -07:00
if $evalInfo; then
echo "NET_NUM_VALIDATORS=${#validatorIpList[@]}"
echo "NET_NUM_CLIENTS=${#clientIpList[@]}"
echo "NET_NUM_BLOCKSTREAMERS=${#blockstreamerIpList[@]}"
else
printNode "Node Type" "Public IP" "Private IP" "Zone"
echo "-------------------+-----------------+-----------------+--------------"
fi
nodeType=bootstrap-validator
2019-11-19 17:49:25 -08:00
if [[ ${#validatorIpList[@]} -gt 0 ]]; then
for i in $(seq 0 $(( ${#validatorIpList[@]} - 1)) ); do
ipAddress=${validatorIpList[$i]}
ipAddressPrivate=${validatorIpListPrivate[$i]}
zone=${validatorIpListZone[$i]}
if $evalInfo; then
echo "NET_VALIDATOR${i}_IP=$ipAddress"
else
printNode $nodeType "$ipAddress" "$ipAddressPrivate" "$zone"
fi
nodeType=validator
done
fi
2019-01-16 09:38:29 -08:00
2019-11-19 17:49:25 -08:00
if [[ ${#clientIpList[@]} -gt 0 ]]; then
for i in $(seq 0 $(( ${#clientIpList[@]} - 1)) ); do
ipAddress=${clientIpList[$i]}
ipAddressPrivate=${clientIpListPrivate[$i]}
zone=${clientIpListZone[$i]}
if $evalInfo; then
echo "NET_CLIENT${i}_IP=$ipAddress"
else
printNode client "$ipAddress" "$ipAddressPrivate" "$zone"
fi
done
fi
2019-11-19 17:49:25 -08:00
if [[ ${#blockstreamerIpList[@]} -gt 0 ]]; then
for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do
ipAddress=${blockstreamerIpList[$i]}
ipAddressPrivate=${blockstreamerIpListPrivate[$i]}
zone=${blockstreamerIpListZone[$i]}
if $evalInfo; then
echo "NET_BLOCKSTREAMER${i}_IP=$ipAddress"
else
printNode blockstreamer "$ipAddress" "$ipAddressPrivate" "$zone"
fi
done
fi
2019-01-16 09:38:29 -08:00
;;
status)
cloud_StatusAll
;;
*)
usage "Unknown command: $command"
esac