#!/bin/bash -e here=$(dirname "$0") # shellcheck source=net/scripts/gcloud.sh source "$here"/scripts/gcloud.sh # shellcheck source=net/common.sh source "$here"/common.sh prefix=testnet-dev-${USER//[^A-Za-z0-9]/} validatorNodeCount=5 clientNodeCount=1 leaderBootDiskSize=1TB leaderMachineType=n1-standard-16 leaderAccelerator= validatorMachineType=n1-standard-4 validatorBootDiskSize=$leaderBootDiskSize validatorAccelerator= clientMachineType=n1-standard-16 clientBootDiskSize=40GB clientAccelerator= imageName="ubuntu-16-04-cuda-9-2-new" publicNetwork=false zone="us-west1-b" leaderAddress= usage() { exitcode=0 if [[ -n "$1" ]]; then exitcode=1 echo "Error: $*" fi cat <> "$configFile" <> "$configFile" if [[ $arrayName = "leaderIp" ]]; then if $publicNetwork; then echo "entrypointIp=$publicIp" >> "$configFile" else echo "entrypointIp=$privateIp" >> "$configFile" fi fi } waitForStartupComplete() { declare name="$1" declare publicIp="$3" echo "Waiting for $name to finish booting..." ( for i in $(seq 1 30); do if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then break fi sleep 2 echo "Retry $i..." done ) } echo "Looking for leader instance..." gcloud_FindInstances "name=$prefix-leader" show [[ ${#instances[@]} -eq 1 ]] || { echo "Unable to find leader" exit 1 } echo "Fetching $sshPrivateKey from $leaderName" ( rm -rf "$sshPrivateKey"{,pub} declare leaderName declare leaderZone declare leaderIp IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}") set -x # Try to ping the machine first. There can be a delay between when the # instance is reported as RUNNING and when it's reachable over the network timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done" # Try to scp in a couple times, sshd may not yet be up even though the # machine can be pinged... set -o pipefail for i in $(seq 1 10); do if gcloud compute scp --zone "$leaderZone" \ "$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then break fi sleep 1 echo "Retry $i..." done chmod 400 "$sshPrivateKey" ) echo "leaderIp=()" >> "$configFile" gcloud_ForEachInstance recordInstanceIp leaderIp gcloud_ForEachInstance waitForStartupComplete echo "Looking for validator instances..." gcloud_FindInstances "name~^$prefix-validator" show [[ ${#instances[@]} -gt 0 ]] || { echo "Unable to find validators" exit 1 } echo "validatorIpList=()" >> "$configFile" gcloud_ForEachInstance recordInstanceIp validatorIpList gcloud_ForEachInstance waitForStartupComplete echo "clientIpList=()" >> "$configFile" echo "Looking for client instances..." gcloud_FindInstances "name~^$prefix-client" show [[ ${#instances[@]} -eq 0 ]] || { gcloud_ForEachInstance recordInstanceIp clientIpList gcloud_ForEachInstance waitForStartupComplete } echo "Wrote $configFile" $metricsWriteDatapoint "testnet-deploy net-config-complete=1" } case $command in delete) $metricsWriteDatapoint "testnet-deploy net-delete-begin=1" # Delete the leader node first to prevent unusual metrics on the dashboard # during shutdown. # TODO: It would be better to fully cut-off metrics reporting before any # instances are deleted. for filter in "^$prefix-leader" "^$prefix-"; do gcloud_FindInstances "name~$filter" if [[ ${#instances[@]} -eq 0 ]]; then echo "No instances found matching '$filter'" else gcloud_DeleteInstances true fi done rm -f "$configFile" $metricsWriteDatapoint "testnet-deploy net-delete-complete=1" ;; create) [[ -n $validatorNodeCount ]] || usage "Need number of nodes" $metricsWriteDatapoint "testnet-deploy net-create-begin=1" rm -rf "$sshPrivateKey"{,.pub} ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey" printNetworkInfo() { cat < "$startupScript" < /etc/motd < /solana-id_ecdsa < /solana-id_ecdsa.pub < /etc/motd <