#!/usr/bin/env bash set -e here=$(dirname "$0") # shellcheck source=net/common.sh source "$here"/common.sh cloudProvider=$(basename "$0" .sh) bootDiskType="" case $cloudProvider in gce) # shellcheck source=net/scripts/gce-provider.sh source "$here"/scripts/gce-provider.sh cpuBootstrapLeaderMachineType="--machine-type n1-standard-16" gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=4,type=nvidia-tesla-k80" bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType fullNodeMachineType=$cpuBootstrapLeaderMachineType clientMachineType="--custom-cpu 16 --custom-memory 20GB" blockstreamerMachineType="--machine-type n1-standard-8" ;; ec2) # shellcheck source=net/scripts/ec2-provider.sh source "$here"/scripts/ec2-provider.sh cpuBootstrapLeaderMachineType=m4.2xlarge gpuBootstrapLeaderMachineType=p2.xlarge bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType fullNodeMachineType=$cpuBootstrapLeaderMachineType clientMachineType=m4.2xlarge blockstreamerMachineType=m4.2xlarge ;; azure) # shellcheck source=net/scripts/azure-provider.sh source "$here"/scripts/azure-provider.sh # TODO: Dial in machine types for Azure cpuBootstrapLeaderMachineType=Standard_D16s_v3 gpuBootstrapLeaderMachineType=Standard_NC12 bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType fullNodeMachineType=$cpuBootstrapLeaderMachineType clientMachineType=Standard_D16s_v3 blockstreamerMachineType=Standard_D16s_v3 ;; *) echo "Error: Unknown cloud provider: $cloudProvider" ;; esac prefix=testnet-dev-${USER//[^A-Za-z0-9]/} additionalFullNodeCount=5 clientNodeCount=1 blockstreamer=false fullNodeBootDiskSizeInGb=1000 clientBootDiskSizeInGb=75 externalNodes=false failOnValidatorBootupFailure=true publicNetwork=false enableGpu=false customAddress= leaderRotation=true zones=() containsZone() { local e match="$1" shift for e; do [[ "$e" == "$match" ]] && return 0; done return 1 } usage() { exitcode=0 if [[ -n "$1" ]]; then exitcode=1 echo "Error: $*" fi cat <> "$configFile" else rm -f "$geoipConfigFile" cat >> "$configFile" <> "$configFile" declare latlng= latlng=$(zoneLocation "$zone") if [[ -n $latlng ]]; then echo "$publicIp: $latlng" >> "$geoipConfigFile" fi fi } fetchPrivateKey() { ( declare nodeName declare nodeIp declare nodeZone IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}") # Make sure the machine is alive or pingable timeout_sec=90 cloud_WaitForInstanceReady "$nodeName" "$nodeIp" "$nodeZone" "$timeout_sec" if [[ ! -r $sshPrivateKey ]]; then echo "Fetching $sshPrivateKey from $nodeName" # Try to scp in a couple times, sshd may not yet be up even though the # machine can be pinged... set -x -o pipefail for i in $(seq 1 30); do if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone"; then break fi fi sleep 1 echo "Retry $i..." done chmod 400 "$sshPrivateKey" ls -l "$sshPrivateKey" fi ) } if $externalNodes; then echo "Bootstrap leader is already configured" else echo "Looking for bootstrap leader instance..." cloud_FindInstance "$prefix-bootstrap-leader" [[ ${#instances[@]} -eq 1 ]] || { echo "Unable to find bootstrap leader" exit 1 } fetchPrivateKey echo "fullnodeIpList=()" >> "$configFile" echo "fullnodeIpListPrivate=()" >> "$configFile" cloud_ForEachInstance recordInstanceIp true fullnodeIpList fi if [[ $additionalFullNodeCount -gt 0 ]]; then for zone in "${zones[@]}"; do echo "Looking for additional fullnode instances in $zone ..." cloud_FindInstances "$prefix-$zone-fullnode" if [[ ${#instances[@]} -gt 0 ]]; then fetchPrivateKey cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList else echo "Unable to find additional fullnodes" if $failOnValidatorBootupFailure; then exit 1 fi fi done fi if $externalNodes; then echo "Let's not reset the current client configuration" else echo "clientIpList=()" >> "$configFile" echo "clientIpListPrivate=()" >> "$configFile" fi echo "Looking for client bencher instances..." cloud_FindInstances "$prefix-client" [[ ${#instances[@]} -eq 0 ]] || { cloud_ForEachInstance recordInstanceIp true clientIpList } if $externalNodes; then echo "Let's not reset the current blockstream configuration" else echo "blockstreamerIpList=()" >> "$configFile" echo "blockstreamerIpListPrivate=()" >> "$configFile" fi echo "Looking for blockstreamer instances..." cloud_FindInstances "$prefix-blockstreamer" [[ ${#instances[@]} -eq 0 ]] || { cloud_ForEachInstance recordInstanceIp true blockstreamerIpList } echo "Wrote $configFile" $metricsWriteDatapoint "testnet-deploy net-config-complete=1" } delete() { $metricsWriteDatapoint "testnet-deploy net-delete-begin=1" # Delete the bootstrap leader first to prevent unusual metrics on the dashboard # during shutdown (only applicable when leader rotation is disabled). # TODO: It would be better to fully cut-off metrics reporting before any # instances are deleted. filters=("$prefix-bootstrap-leader") for zone in "${zones[@]}"; do filters+=("$prefix-$zone") done # Filter for all other nodes (client, blockstreamer) filters+=("$prefix-") for filter in "${filters[@]}"; do echo "Searching for instances: $filter" cloud_FindInstances "$filter" if [[ ${#instances[@]} -eq 0 ]]; then echo "No instances found matching '$filter'" else cloud_DeleteInstances true & fi done wait if $externalNodes; then echo "Let's not delete the current configuration file" else rm -f "$configFile" fi $metricsWriteDatapoint "testnet-deploy net-delete-complete=1" } case $command in delete) delete ;; create) [[ -n $additionalFullNodeCount ]] || usage "Need number of nodes" delete $metricsWriteDatapoint "testnet-deploy net-create-begin=1" rm -rf "$sshPrivateKey"{,.pub} # Note: using rsa because |aws ec2 import-key-pair| seems to fail for ecdsa ssh-keygen -t rsa -N '' -f "$sshPrivateKey" printNetworkInfo() { cat < "$startupScript" < /etc/motd < /solana-id_ecdsa < /solana-id_ecdsa.pub < /etc/motd <