#!/usr/bin/env bash set -e here=$(dirname "$0") # shellcheck source=net/common.sh source "$here"/common.sh cloudProvider=$(basename "$0" .sh) bootDiskType="" case $cloudProvider in gce) # shellcheck source=net/scripts/gce-provider.sh source "$here"/scripts/gce-provider.sh cpuBootstrapLeaderMachineType=n1-standard-16 gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=4,type=nvidia-tesla-k80" bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType fullNodeMachineType=n1-standard-16 clientMachineType=n1-standard-16 ;; ec2) # shellcheck source=net/scripts/ec2-provider.sh source "$here"/scripts/ec2-provider.sh cpuBootstrapLeaderMachineType=m4.4xlarge gpuBootstrapLeaderMachineType=p2.xlarge bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType fullNodeMachineType=m4.2xlarge clientMachineType=m4.2xlarge ;; *) echo "Error: Unknown cloud provider: $cloudProvider" ;; esac prefix=testnet-dev-${USER//[^A-Za-z0-9]/} additionalFullNodeCount=5 clientNodeCount=1 fullNodeBootDiskSizeInGb=1000 clientBootDiskSizeInGb=75 publicNetwork=false enableGpu=false bootstrapLeaderAddress= leaderRotation=true usage() { exitcode=0 if [[ -n "$1" ]]; then exitcode=1 echo "Error: $*" fi cat <> "$configFile" <> "$configFile" echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile" } waitForStartupComplete() { declare name="$1" declare publicIp="$2" echo "Waiting for $name to finish booting..." ( set -x +e for i in $(seq 1 60); do timeout 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete" ret=$? if [[ $ret -eq 0 ]]; then exit 0 fi sleep 2 echo "Retry $i..." done echo "$name failed to boot." exit 1 ) echo "$name has booted." } echo "Looking for bootstrap leader instance..." cloud_FindInstance "$prefix-bootstrap-leader" [[ ${#instances[@]} -eq 1 ]] || { echo "Unable to find bootstrap leader" exit 1 } ( declare nodeName declare nodeIp IFS=: read -r nodeName nodeIp _ < <(echo "${instances[0]}") # Try to ping the machine first. timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done" if [[ ! -r $sshPrivateKey ]]; then echo "Fetching $sshPrivateKey from $nodeName" # Try to scp in a couple times, sshd may not yet be up even though the # machine can be pinged... set -x -o pipefail for i in $(seq 1 30); do if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey"; then break fi sleep 1 echo "Retry $i..." done chmod 400 "$sshPrivateKey" ls -l "$sshPrivateKey" fi ) echo "fullnodeIpList=()" >> "$configFile" echo "fullnodeIpListPrivate=()" >> "$configFile" cloud_ForEachInstance recordInstanceIp fullnodeIpList cloud_ForEachInstance waitForStartupComplete echo "Looking for additional fullnode instances..." cloud_FindInstances "$prefix-fullnode" [[ ${#instances[@]} -gt 0 ]] || { echo "Unable to find additional fullnodes" exit 1 } cloud_ForEachInstance recordInstanceIp fullnodeIpList cloud_ForEachInstance waitForStartupComplete echo "clientIpList=()" >> "$configFile" echo "clientIpListPrivate=()" >> "$configFile" echo "Looking for client bencher instances..." cloud_FindInstances "$prefix-client" [[ ${#instances[@]} -eq 0 ]] || { cloud_ForEachInstance recordInstanceIp clientIpList cloud_ForEachInstance waitForStartupComplete } echo "Wrote $configFile" $metricsWriteDatapoint "testnet-deploy net-config-complete=1" } delete() { $metricsWriteDatapoint "testnet-deploy net-delete-begin=1" # Delete the bootstrap leader first to prevent unusual metrics on the dashboard # during shutdown (only applicable when leader rotation is disabled). # TODO: It would be better to fully cut-off metrics reporting before any # instances are deleted. for filter in "$prefix-bootstrap-leader" "$prefix-"; do echo "Searching for instances: $filter" cloud_FindInstances "$filter" if [[ ${#instances[@]} -eq 0 ]]; then echo "No instances found matching '$filter'" else cloud_DeleteInstances true fi done rm -f "$configFile" $metricsWriteDatapoint "testnet-deploy net-delete-complete=1" } case $command in delete) delete ;; create) [[ -n $additionalFullNodeCount ]] || usage "Need number of nodes" if [[ $additionalFullNodeCount -le 0 ]]; then usage "One or more additional fullnodes are required" fi delete $metricsWriteDatapoint "testnet-deploy net-create-begin=1" rm -rf "$sshPrivateKey"{,.pub} # Note: using rsa because |aws ec2 import-key-pair| seems to fail for ecdsa ssh-keygen -t rsa -N '' -f "$sshPrivateKey" printNetworkInfo() { cat < "$startupScript" < /etc/motd < /solana-id_ecdsa < /solana-id_ecdsa.pub < /etc/motd <