parent
05ada97d00
commit
2636a9c9f1
|
@ -0,0 +1 @@
|
||||||
|
gce.sh
|
37
net/gce.sh
37
net/gce.sh
|
@ -49,6 +49,18 @@ azure)
|
||||||
blockstreamerMachineType=Standard_D16s_v3
|
blockstreamerMachineType=Standard_D16s_v3
|
||||||
replicatorMachineType=Standard_D4s_v3
|
replicatorMachineType=Standard_D4s_v3
|
||||||
;;
|
;;
|
||||||
|
colo)
|
||||||
|
# shellcheck source=net/scripts/colo-provider.sh
|
||||||
|
source "$here"/scripts/colo-provider.sh
|
||||||
|
|
||||||
|
cpuBootstrapLeaderMachineType=0
|
||||||
|
gpuBootstrapLeaderMachineType=1
|
||||||
|
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
|
||||||
|
fullNodeMachineType=$cpuBootstrapLeaderMachineType
|
||||||
|
clientMachineType=0
|
||||||
|
blockstreamerMachineType=0
|
||||||
|
replicatorMachineType=0
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Error: Unknown cloud provider: $cloudProvider"
|
echo "Error: Unknown cloud provider: $cloudProvider"
|
||||||
;;
|
;;
|
||||||
|
@ -95,6 +107,7 @@ Manage testnet instances
|
||||||
config - configure the testnet and write a config file describing it
|
config - configure the testnet and write a config file describing it
|
||||||
delete - delete the testnet
|
delete - delete the testnet
|
||||||
info - display information about the currently configured testnet
|
info - display information about the currently configured testnet
|
||||||
|
status - display status information of all resources
|
||||||
|
|
||||||
common options:
|
common options:
|
||||||
-p [prefix] - Optional common prefix for instance names to avoid
|
-p [prefix] - Optional common prefix for instance names to avoid
|
||||||
|
@ -147,7 +160,7 @@ EOF
|
||||||
command=$1
|
command=$1
|
||||||
[[ -n $command ]] || usage
|
[[ -n $command ]] || usage
|
||||||
shift
|
shift
|
||||||
[[ $command = create || $command = config || $command = info || $command = delete ]] ||
|
[[ $command = create || $command = config || $command = info || $command = delete || $command = status ]] ||
|
||||||
usage "Invalid command: $command"
|
usage "Invalid command: $command"
|
||||||
|
|
||||||
shortArgs=()
|
shortArgs=()
|
||||||
|
@ -243,12 +256,7 @@ fi
|
||||||
case $cloudProvider in
|
case $cloudProvider in
|
||||||
gce)
|
gce)
|
||||||
;;
|
;;
|
||||||
ec2)
|
ec2|azure|colo)
|
||||||
if [[ -n $fullNodeAdditionalDiskSizeInGb ]] ; then
|
|
||||||
usage "Error: --fullnode-additional-disk-size-gb currently only supported with cloud provider: gce"
|
|
||||||
fi
|
|
||||||
;;
|
|
||||||
azure)
|
|
||||||
if [[ -n $fullNodeAdditionalDiskSizeInGb ]] ; then
|
if [[ -n $fullNodeAdditionalDiskSizeInGb ]] ; then
|
||||||
usage "Error: --fullnode-additional-disk-size-gb currently only supported with cloud provider: gce"
|
usage "Error: --fullnode-additional-disk-size-gb currently only supported with cloud provider: gce"
|
||||||
fi
|
fi
|
||||||
|
@ -682,7 +690,8 @@ EOF
|
||||||
else
|
else
|
||||||
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
|
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
|
||||||
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb"
|
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
|
||||||
|
"$sshPrivateKey"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $additionalFullNodeCount -gt 0 ]]; then
|
if [[ $additionalFullNodeCount -gt 0 ]]; then
|
||||||
|
@ -702,7 +711,8 @@ EOF
|
||||||
fi
|
fi
|
||||||
cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
|
cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
|
||||||
"$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
|
"$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" &
|
"$startupScript" "" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
|
||||||
|
"$sshPrivateKey" &
|
||||||
done
|
done
|
||||||
|
|
||||||
wait
|
wait
|
||||||
|
@ -711,19 +721,19 @@ EOF
|
||||||
if [[ $clientNodeCount -gt 0 ]]; then
|
if [[ $clientNodeCount -gt 0 ]]; then
|
||||||
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
|
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
|
||||||
"$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
|
"$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
|
||||||
"$startupScript" "" "$bootDiskType" ""
|
"$startupScript" "" "$bootDiskType" "" "$sshPrivateKey"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if $blockstreamer; then
|
if $blockstreamer; then
|
||||||
cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
|
cloud_CreateInstances "$prefix" "$prefix-blockstreamer" "1" \
|
||||||
"$enableGpu" "$blockstreamerMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
"$enableGpu" "$blockstreamerMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "$blockstreamerAddress" "$bootDiskType" ""
|
"$startupScript" "$blockstreamerAddress" "$bootDiskType" "" "$sshPrivateKey"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $replicatorNodeCount -gt 0 ]]; then
|
if [[ $replicatorNodeCount -gt 0 ]]; then
|
||||||
cloud_CreateInstances "$prefix" "$prefix-replicator" "$replicatorNodeCount" \
|
cloud_CreateInstances "$prefix" "$prefix-replicator" "$replicatorNodeCount" \
|
||||||
false "$replicatorMachineType" "${zones[0]}" "$replicatorBootDiskSizeInGb" \
|
false "$replicatorMachineType" "${zones[0]}" "$replicatorBootDiskSizeInGb" \
|
||||||
"$startupScript" "" "" ""
|
"$startupScript" "" "" "" "$sshPrivateKey"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$metricsWriteDatapoint "testnet-deploy net-create-complete=1"
|
$metricsWriteDatapoint "testnet-deploy net-create-complete=1"
|
||||||
|
@ -776,6 +786,9 @@ info)
|
||||||
printNode replicator "$ipAddress" "$ipAddressPrivate" "$zone"
|
printNode replicator "$ipAddress" "$ipAddressPrivate" "$zone"
|
||||||
done
|
done
|
||||||
;;
|
;;
|
||||||
|
status)
|
||||||
|
cloud_StatusAll
|
||||||
|
;;
|
||||||
*)
|
*)
|
||||||
usage "Unknown command: $command"
|
usage "Unknown command: $command"
|
||||||
esac
|
esac
|
||||||
|
|
|
@ -1,11 +1,6 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
[[ $(uname) = Linux ]] || exit 1
|
|
||||||
[[ $USER = root ]] || exit 1
|
|
||||||
|
|
||||||
[[ -d /home/solana/.ssh ]] || mkdir -p /home/solana/.ssh
|
|
||||||
|
|
||||||
cd "$(dirname "$0")"
|
cd "$(dirname "$0")"
|
||||||
|
|
||||||
# shellcheck source=net/scripts/solana-user-authorized_keys.sh
|
# shellcheck source=net/scripts/solana-user-authorized_keys.sh
|
||||||
|
@ -14,7 +9,6 @@ source solana-user-authorized_keys.sh
|
||||||
# solana-user-authorized_keys.sh defines the public keys for users that should
|
# solana-user-authorized_keys.sh defines the public keys for users that should
|
||||||
# automatically be granted access to ALL datacenter nodes.
|
# automatically be granted access to ALL datacenter nodes.
|
||||||
for i in "${!SOLANA_USERS[@]}"; do
|
for i in "${!SOLANA_USERS[@]}"; do
|
||||||
echo "environment=\"SOLANA_USER=${SOLANA_USERS[i]}\" ${SOLANA_PUBKEYS[i]}" >> /solana-authorized_keys
|
echo "environment=\"SOLANA_USER=${SOLANA_USERS[i]}\" ${SOLANA_PUBKEYS[i]}"
|
||||||
done
|
done
|
||||||
|
|
||||||
sudo -u solana mv /solana-authorized_keys /home/solana/.ssh/authorized_keys
|
|
||||||
|
|
|
@ -319,4 +319,12 @@ cloud_FetchFile() {
|
||||||
cloud_CreateAndAttachPersistentDisk() {
|
cloud_CreateAndAttachPersistentDisk() {
|
||||||
echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for azure"
|
echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for azure"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_StatusAll
|
||||||
|
#
|
||||||
|
# Not yet implemented for this cloud provider
|
||||||
|
cloud_StatusAll() {
|
||||||
|
echo "ERROR: cloud_StatusAll is not yet implemented for azure"
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,276 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# |source| this file
|
||||||
|
#
|
||||||
|
# Utilities for working with Colo instances
|
||||||
|
#
|
||||||
|
|
||||||
|
declare COLO_TODO_PARALLELIZE=false
|
||||||
|
|
||||||
|
__cloud_colo_here="$(dirname "${BASH_SOURCE[0]}")"
|
||||||
|
# shellcheck source=net/scripts/colo-utils.sh
|
||||||
|
source "${__cloud_colo_here}/colo-utils.sh"
|
||||||
|
|
||||||
|
# Default zone
|
||||||
|
cloud_DefaultZone() {
|
||||||
|
echo "Denver"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# __cloud_FindInstances
|
||||||
|
#
|
||||||
|
# Find instances matching the specified pattern.
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:zone:public IP:private IP"
|
||||||
|
#
|
||||||
|
# filter - The instances to filter on
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ __cloud_FindInstances "name=exact-machine-name"
|
||||||
|
# $ __cloud_FindInstances "name~^all-machines-with-a-common-machine-prefix"
|
||||||
|
#
|
||||||
|
__cloud_FindInstances() {
|
||||||
|
declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME INSTANCES_TEXT
|
||||||
|
declare filter=$1
|
||||||
|
instances=()
|
||||||
|
|
||||||
|
if ! $COLO_TODO_PARALLELIZE; then
|
||||||
|
colo_load_resources
|
||||||
|
colo_load_availability false
|
||||||
|
fi
|
||||||
|
INSTANCES_TEXT="$(
|
||||||
|
for AVAIL in "${COLO_RES_AVAILABILITY[@]}"; do
|
||||||
|
IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$AVAIL"
|
||||||
|
if [[ $INSTNAME =~ $filter ]]; then
|
||||||
|
IP=$PRIV_IP # Colo public IPs are firewalled to only allow UDP(8000-10000). Reuse private IP as public and require VPN
|
||||||
|
printf "%-40s | publicIp=%-16s privateIp=%s zone=%s\n" "$INSTNAME" "$IP" "$PRIV_IP" "$ZONE" 1>&2
|
||||||
|
echo -e "${INSTNAME}:${IP}:${PRIV_IP}:$ZONE"
|
||||||
|
fi
|
||||||
|
done | sort -t $'\v' -k1
|
||||||
|
)"
|
||||||
|
if [[ -n "$INSTANCES_TEXT" ]]; then
|
||||||
|
while read -r LINE; do
|
||||||
|
instances+=( "$LINE" )
|
||||||
|
done <<<"$INSTANCES_TEXT"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_FindInstances [namePrefix]
|
||||||
|
#
|
||||||
|
# Find instances with names matching the specified prefix
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:public IP:private IP"
|
||||||
|
#
|
||||||
|
# namePrefix - The instance name prefix to look for
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ cloud_FindInstances all-machines-with-a-common-machine-prefix
|
||||||
|
#
|
||||||
|
cloud_FindInstances() {
|
||||||
|
declare filter="^${1}.*"
|
||||||
|
__cloud_FindInstances "$filter"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_FindInstance [name]
|
||||||
|
#
|
||||||
|
# Find an instance with a name matching the exact pattern.
|
||||||
|
#
|
||||||
|
# For each matching instance, an entry in the `instances` array will be added with the
|
||||||
|
# following information about the instance:
|
||||||
|
# "name:public IP:private IP"
|
||||||
|
#
|
||||||
|
# name - The instance name to look for
|
||||||
|
#
|
||||||
|
# examples:
|
||||||
|
# $ cloud_FindInstance exact-machine-name
|
||||||
|
#
|
||||||
|
cloud_FindInstance() {
|
||||||
|
declare name="^${1}$"
|
||||||
|
__cloud_FindInstances "$name"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_Initialize [networkName]
|
||||||
|
#
|
||||||
|
# Perform one-time initialization that may be required for the given testnet.
|
||||||
|
#
|
||||||
|
# networkName - unique name of this testnet
|
||||||
|
#
|
||||||
|
# This function will be called before |cloud_CreateInstances|
|
||||||
|
cloud_Initialize() {
|
||||||
|
# networkName=$1 # unused
|
||||||
|
# zone=$2 #unused
|
||||||
|
colo_load_resources
|
||||||
|
if $COLO_TODO_PARALLELIZE; then
|
||||||
|
colo_load_availability
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName]
|
||||||
|
# [machineType] [bootDiskSize] [enableGpu]
|
||||||
|
# [startupScript] [address]
|
||||||
|
#
|
||||||
|
# Creates one more identical instances.
|
||||||
|
#
|
||||||
|
# networkName - unique name of this testnet
|
||||||
|
# namePrefix - unique string to prefix all the instance names with
|
||||||
|
# numNodes - number of instances to create
|
||||||
|
# imageName - Disk image for the instances
|
||||||
|
# machineType - GCE machine type. Note that this may also include an
|
||||||
|
# `--accelerator=` or other |gcloud compute instances create|
|
||||||
|
# options
|
||||||
|
# bootDiskSize - Optional size of the boot disk in GB
|
||||||
|
# enableGpu - Optionally enable GPU, use the value "true" to enable
|
||||||
|
# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
|
||||||
|
# startupScript - Optional startup script to execute when the instance boots
|
||||||
|
# address - Optional name of the GCE static IP address to attach to the
|
||||||
|
# instance. Requires that |numNodes| = 1 and that addressName
|
||||||
|
# has been provisioned in the GCE region that is hosting `$zone`
|
||||||
|
# bootDiskType - Optional specify SSD or HDD boot disk
|
||||||
|
# additionalDiskSize - Optional specify size of additional storage volume
|
||||||
|
#
|
||||||
|
# Tip: use cloud_FindInstances to locate the instances once this function
|
||||||
|
# returns
|
||||||
|
cloud_CreateInstances() {
|
||||||
|
#declare networkName="$1" # unused
|
||||||
|
declare namePrefix="$2"
|
||||||
|
declare numNodes="$3"
|
||||||
|
#declare enableGpu="$4" # unused
|
||||||
|
declare machineType="$5"
|
||||||
|
# declare zone="$6" # unused
|
||||||
|
#declare optionalBootDiskSize="$7" # unused
|
||||||
|
#declare optionalStartupScript="$8" # unused
|
||||||
|
#declare optionalAddress="$9" # unused
|
||||||
|
#declare optionalBootDiskType="${10}" # unused
|
||||||
|
#declare optionalAdditionalDiskSize="${11}" # unused
|
||||||
|
declare sshPrivateKey="${12}"
|
||||||
|
|
||||||
|
declare -a nodes
|
||||||
|
if [[ $numNodes = 1 ]]; then
|
||||||
|
nodes=("$namePrefix")
|
||||||
|
else
|
||||||
|
for node in $(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes"); do
|
||||||
|
nodes+=("$node")
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
if $COLO_TODO_PARALLELIZE; then
|
||||||
|
declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME INDEX RES LINE
|
||||||
|
declare -a AVAILABLE
|
||||||
|
declare AVAILABLE_TEXT
|
||||||
|
AVAILABLE_TEXT="$(
|
||||||
|
for RES in "${COLO_RES_AVAILABILITY[@]}"; do
|
||||||
|
IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$RES"
|
||||||
|
if [[ "FREE" = "$STATUS" ]]; then
|
||||||
|
INDEX=$(colo_res_index_from_ip "$IP")
|
||||||
|
RES_MACH="${COLO_RES_MACHINE[$INDEX]}"
|
||||||
|
if colo_machine_types_compatible "$RES_MACH" "$machineType"; then
|
||||||
|
if ! colo_node_is_requisitioned "$INDEX" "${COLO_RES_REQUISITIONED[*]}"; then
|
||||||
|
echo -e "$RES_MACH\v$IP"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done | sort -nt $'\v' -k1,1
|
||||||
|
)"
|
||||||
|
|
||||||
|
if [[ -n "$AVAILABLE_TEXT" ]]; then
|
||||||
|
while read -r LINE; do
|
||||||
|
AVAILABLE+=("$LINE")
|
||||||
|
done <<<"$AVAILABLE_TEXT"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ${#AVAILABLE[@]} -lt $numNodes ]]; then
|
||||||
|
echo "Insufficient resources available to allocate $numNodes $namePrefix" 1>&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
declare node
|
||||||
|
declare AI=0
|
||||||
|
for node in "${nodes[@]}"; do
|
||||||
|
IFS=$'\v' read -r _ IP <<<"${AVAILABLE[$AI]}"
|
||||||
|
colo_node_requisition "$IP" "$node" >/dev/null
|
||||||
|
AI=$((AI+1))
|
||||||
|
done
|
||||||
|
else
|
||||||
|
declare RES_MACH node
|
||||||
|
declare RI=0
|
||||||
|
declare NI=0
|
||||||
|
while [[ $NI -lt $numNodes && $RI -lt $COLO_RES_N ]]; do
|
||||||
|
node="${nodes[$NI]}"
|
||||||
|
RES_MACH="${COLO_RES_MACHINE[$RI]}"
|
||||||
|
IP="${COLO_RES_IP_PRIV[$RI]}"
|
||||||
|
if colo_machine_types_compatible "$RES_MACH" "$machineType"; then
|
||||||
|
if colo_node_requisition "$IP" "$node" "$sshPrivateKey" >/dev/null; then
|
||||||
|
NI=$((NI+1))
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
RI=$((RI+1))
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_DeleteInstances
|
||||||
|
#
|
||||||
|
# Deletes all the instances listed in the `instances` array
|
||||||
|
#
|
||||||
|
cloud_DeleteInstances() {
|
||||||
|
declare _ IP _ _
|
||||||
|
for instance in "${instances[@]}"; do
|
||||||
|
IFS=':' read -r _ IP _ _ <<< "$instance"
|
||||||
|
colo_node_free "$IP" >/dev/null
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_WaitForInstanceReady [instanceName] [instanceIp] [instanceZone] [timeout]
|
||||||
|
#
|
||||||
|
# Return once the newly created VM instance is responding. This function is cloud-provider specific.
|
||||||
|
#
|
||||||
|
cloud_WaitForInstanceReady() {
|
||||||
|
#declare instanceName="$1" # unused
|
||||||
|
declare instanceIp="$2"
|
||||||
|
declare timeout="$4"
|
||||||
|
|
||||||
|
timeout "${timeout}"s bash -c "set -o pipefail; until ping -c 3 $instanceIp | tr - _; do echo .; done"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile]
|
||||||
|
#
|
||||||
|
# Fetch a file from the given instance. This function uses a cloud-specific
|
||||||
|
# mechanism to fetch the file
|
||||||
|
#
|
||||||
|
cloud_FetchFile() {
|
||||||
|
#declare instanceName="$1" # unused
|
||||||
|
declare publicIp="$2"
|
||||||
|
declare remoteFile="$3"
|
||||||
|
declare localFile="$4"
|
||||||
|
#declare zone="$5" # unused
|
||||||
|
scp \
|
||||||
|
-o "StrictHostKeyChecking=no" \
|
||||||
|
-o "UserKnownHostsFile=/dev/null" \
|
||||||
|
-o "User=solana" \
|
||||||
|
-o "LogLevel=ERROR" \
|
||||||
|
-F /dev/null \
|
||||||
|
"solana@$publicIp:$remoteFile" "$localFile"
|
||||||
|
}
|
||||||
|
|
||||||
|
cloud_StatusAll() {
|
||||||
|
declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME
|
||||||
|
if ! $COLO_TODO_PARALLELIZE; then
|
||||||
|
colo_load_resources
|
||||||
|
colo_load_availability false
|
||||||
|
fi
|
||||||
|
for AVAIL in "${COLO_RES_AVAILABILITY[@]}"; do
|
||||||
|
IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$AVAIL"
|
||||||
|
printf "%-30s | publicIp=%-16s privateIp=%s status=%s who=%s zone=%s inst=%s\n" "$HOST_NAME" "$IP" "$PRIV_IP" "$STATUS" "$LOCK_USER" "$ZONE" "$INSTNAME"
|
||||||
|
done
|
||||||
|
}
|
|
@ -0,0 +1,277 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
declare -r SOLANA_LOCK_FILE="/home/solana/.solana.lock"
|
||||||
|
|
||||||
|
__colo_here="$(dirname "${BASH_SOURCE[0]}")"
|
||||||
|
# Load colo resource specs
|
||||||
|
export COLO_RES_N=0
|
||||||
|
export COLO_RES_HOSTNAME=()
|
||||||
|
export COLO_RES_IP=()
|
||||||
|
export COLO_RES_IP_PRIV=()
|
||||||
|
export COLO_RES_CPU_CORES=()
|
||||||
|
export COLO_RES_RAM_GB=()
|
||||||
|
export COLO_RES_STORAGE_TYPE=()
|
||||||
|
export COLO_RES_STORAGE_CAP_GB=()
|
||||||
|
export COLO_RES_ADD_STORAGE_TYPE=()
|
||||||
|
export COLO_RES_ADD_STORAGE_CAP_GB=()
|
||||||
|
export COLO_RES_MACHINE=()
|
||||||
|
|
||||||
|
export COLO_RESOURCES_LOADED=false
|
||||||
|
colo_load_resources() {
|
||||||
|
if ! ${COLO_RESOURCES_LOADED}; then
|
||||||
|
while read -r LINE; do
|
||||||
|
IFS='|' read -r H I PI C M ST SC AST ASC G Z <<<"$LINE"
|
||||||
|
COLO_RES_HOSTNAME+=( "$H" )
|
||||||
|
COLO_RES_IP+=( "$I" )
|
||||||
|
COLO_RES_IP_PRIV+=( "$PI" )
|
||||||
|
COLO_RES_CPU_CORES+=( "$C" )
|
||||||
|
COLO_RES_RAM_GB+=( "$M" )
|
||||||
|
COLO_RES_STORAGE_TYPE+=( "$ST" )
|
||||||
|
COLO_RES_STORAGE_CAP_GB+=( "$SC" )
|
||||||
|
COLO_RES_ADD_STORAGE_TYPE+=( "$(tr ',' $'\v' <<<"$AST")" )
|
||||||
|
COLO_RES_ADD_STORAGE_CAP_GB+=( "$(tr ',' $'\v' <<<"$ASC")" )
|
||||||
|
COLO_RES_MACHINE+=( "$G" )
|
||||||
|
COLO_RES_ZONE+=( "$Z" )
|
||||||
|
COLO_RES_N=$((COLO_RES_N+1))
|
||||||
|
done < <(sort -nt'|' -k10,10 "$__colo_here"/colo_nodes)
|
||||||
|
COLO_RESOURCES_LOADED=true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
declare COLO_RES_AVAILABILITY_CACHED=false
|
||||||
|
declare -ax COLO_RES_AVAILABILITY
|
||||||
|
colo_load_availability() {
|
||||||
|
declare USE_CACHE=${1:-${COLO_RES_AVAILABILITY_CACHED}}
|
||||||
|
declare LINE PRIV_IP STATUS LOCK_USER I IP HOST_NAME ZONE INSTNAME
|
||||||
|
if ! $USE_CACHE; then
|
||||||
|
COLO_RES_AVAILABILITY=()
|
||||||
|
COLO_RES_REQUISITIONED=()
|
||||||
|
while read -r LINE; do
|
||||||
|
IFS=$'\v' read -r PRIV_IP STATUS LOCK_USER INSTNAME <<< "$LINE"
|
||||||
|
I=$(colo_res_index_from_ip "$PRIV_IP")
|
||||||
|
IP="${COLO_RES_IP[$I]}"
|
||||||
|
HOST_NAME="${COLO_RES_HOSTNAME[$I]}"
|
||||||
|
ZONE="${COLO_RES_ZONE[$I]}"
|
||||||
|
COLO_RES_AVAILABILITY+=( "$(echo -e "$HOST_NAME\v$IP\v$PRIV_IP\v$STATUS\v$ZONE\v$LOCK_USER\v$INSTNAME")" )
|
||||||
|
done < <(colo_node_status_all | sort -t $'\v' -k1)
|
||||||
|
COLO_RES_AVAILABILITY_CACHED=true
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_res_index_from_ip() {
|
||||||
|
declare IP="$1"
|
||||||
|
for i in "${!COLO_RES_IP_PRIV[@]}"; do
|
||||||
|
if [ "$IP" = "${COLO_RES_IP_PRIV[$i]}" ]; then
|
||||||
|
echo "$i"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_instance_run() {
|
||||||
|
declare IP=$1
|
||||||
|
declare CMD="$2"
|
||||||
|
declare OUT
|
||||||
|
set +e
|
||||||
|
OUT=$(ssh -l solana -o "ConnectTimeout=3" -n "$IP" "$CMD" 2>&1)
|
||||||
|
declare RC=$?
|
||||||
|
set -e
|
||||||
|
while read -r LINE; do
|
||||||
|
echo -e "$IP\v$RC\v$LINE"
|
||||||
|
done <<< "$OUT"
|
||||||
|
return $RC
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_instance_run_foreach() {
|
||||||
|
declare CMD
|
||||||
|
if test 1 -eq $#; then
|
||||||
|
CMD="$1"
|
||||||
|
declare IPS=()
|
||||||
|
for I in $(seq 0 $((COLO_RES_N-1))); do
|
||||||
|
IPS+=( "${COLO_RES_IP_PRIV[$I]}" )
|
||||||
|
done
|
||||||
|
set "${IPS[@]}" "$CMD"
|
||||||
|
fi
|
||||||
|
CMD="${*: -1}"
|
||||||
|
for I in $(seq 0 $(($#-2))); do
|
||||||
|
declare IP="$1"
|
||||||
|
colo_instance_run "$IP" "$CMD" &
|
||||||
|
shift
|
||||||
|
done
|
||||||
|
|
||||||
|
wait
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_whoami() {
|
||||||
|
declare ME LINE SOL_USER
|
||||||
|
while read -r LINE; do
|
||||||
|
declare IP RC
|
||||||
|
IFS=$'\v' read -r IP RC SOL_USER <<< "$LINE"
|
||||||
|
if [ "$RC" -eq 0 ]; then
|
||||||
|
if [ -z "$ME" ] || [ "$ME" = "$SOL_USER" ]; then
|
||||||
|
ME="$SOL_USER"
|
||||||
|
else
|
||||||
|
echo "Found conflicting username \"$SOL_USER\" on $IP, expected \"$ME\"" 1>&2
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
done < <(colo_instance_run_foreach "[ -n \"\$SOLANA_USER\" ] && echo \"\$SOLANA_USER\"")
|
||||||
|
echo "$ME"
|
||||||
|
}
|
||||||
|
|
||||||
|
COLO_SOLANA_USER=""
|
||||||
|
colo_get_solana_user() {
|
||||||
|
if [ -z "$COLO_SOLANA_USER" ]; then
|
||||||
|
COLO_SOLANA_USER=$(colo_whoami)
|
||||||
|
fi
|
||||||
|
echo "$COLO_SOLANA_USER"
|
||||||
|
}
|
||||||
|
|
||||||
|
__colo_node_status_script() {
|
||||||
|
cat <<EOF
|
||||||
|
exec 3>&2
|
||||||
|
exec 2>/dev/null # Suppress stderr as the next call to exec fails most of
|
||||||
|
# the time due to $SOLANA_LOCK_FILE not existing and is running from a
|
||||||
|
# subshell where normal redirection doesn't work
|
||||||
|
exec 9<"$SOLANA_LOCK_FILE" && flock -s 9 && . "$SOLANA_LOCK_FILE" && exec 9>&-
|
||||||
|
echo -e "\$SOLANA_LOCK_USER\\v\$SOLANA_LOCK_INSTANCENAME"
|
||||||
|
exec 2>&3 # Restore stderr
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
__colo_node_status_result_normalize() {
|
||||||
|
declare IP RC US BY INSTNAME
|
||||||
|
declare ST="DOWN"
|
||||||
|
IFS=$'\v' read -r IP RC US INSTNAME <<< "$1"
|
||||||
|
if [ "$RC" -eq 0 ]; then
|
||||||
|
if [ -n "$US" ]; then
|
||||||
|
BY="$US"
|
||||||
|
ST="HELD"
|
||||||
|
else
|
||||||
|
ST="FREE"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
echo -e $"$IP\v$ST\v$BY\v$INSTNAME"
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_node_status() {
|
||||||
|
declare IP="$1"
|
||||||
|
__colo_node_status_result_normalize "$(colo_instance_run "$IP" "$(__colo_node_status_script)")"
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_node_status_all() {
|
||||||
|
declare LINE
|
||||||
|
while read -r LINE; do
|
||||||
|
__colo_node_status_result_normalize "$LINE"
|
||||||
|
done < <(colo_instance_run_foreach "$(__colo_node_status_script)")
|
||||||
|
}
|
||||||
|
|
||||||
|
# TODO: As part of COLO_TOOD_PARALLELIZE this list will need to be maintained
|
||||||
|
# in a lockfile to work around `cloud_CreateInstance` being called in the
|
||||||
|
# background for fullnodes
|
||||||
|
export COLO_RES_REQUISITIONED=()
|
||||||
|
colo_node_requisition() {
|
||||||
|
declare IP=$1
|
||||||
|
declare INSTANCE_NAME=$2
|
||||||
|
declare SSH_PRIVATE_KEY="$3"
|
||||||
|
|
||||||
|
declare INDEX
|
||||||
|
INDEX=$(colo_res_index_from_ip "$IP")
|
||||||
|
declare RC=false
|
||||||
|
|
||||||
|
colo_instance_run "$IP" "$(
|
||||||
|
cat <<EOF
|
||||||
|
if [ ! -f "$SOLANA_LOCK_FILE" ]; then
|
||||||
|
exec 9>>"$SOLANA_LOCK_FILE"
|
||||||
|
flock -x -n 9 || exit 1
|
||||||
|
[ -n "\$SOLANA_USER" ] && {
|
||||||
|
echo "export SOLANA_LOCK_USER=\$SOLANA_USER"
|
||||||
|
echo "export SOLANA_LOCK_INSTANCENAME=$INSTANCE_NAME"
|
||||||
|
echo "[ -v SSH_TTY -a -f \"\${HOME}/.solana-motd\" ] && cat \"\${HOME}/.solana-motd\" 1>&2"
|
||||||
|
} >&9 || ( rm "$SOLANA_LOCK_FILE" && false )
|
||||||
|
9>&-
|
||||||
|
cat > /solana-scratch/id_ecdsa <<EOK
|
||||||
|
$(cat "$SSH_PRIVATE_KEY")
|
||||||
|
EOK
|
||||||
|
cat > /solana-scratch/id_ecdsa.pub <<EOK
|
||||||
|
$(cat "${SSH_PRIVATE_KEY}.pub")
|
||||||
|
EOK
|
||||||
|
chmod 0600 /solana-scratch/id_ecdsa
|
||||||
|
cat > /solana-scratch/authorized_keys <<EOAK
|
||||||
|
$("$__colo_here"/add-datacenter-solana-user-authorized_keys.sh 2> /dev/null)
|
||||||
|
$(cat "${SSH_PRIVATE_KEY}.pub")
|
||||||
|
EOAK
|
||||||
|
cp /solana-scratch/id_ecdsa "\${HOME}/.ssh/id_ecdsa"
|
||||||
|
cp /solana-scratch/id_ecdsa.pub "\${HOME}/.ssh/id_ecdsa.pub"
|
||||||
|
cp /solana-scratch/authorized_keys "\${HOME}/.ssh/authorized_keys"
|
||||||
|
cat > "\${HOME}/.solana-motd" <<EOM
|
||||||
|
|
||||||
|
|
||||||
|
$(printNetworkInfo)
|
||||||
|
$(creationInfo)
|
||||||
|
EOM
|
||||||
|
|
||||||
|
# XXX: Stamp creation MUST be last!
|
||||||
|
touch /solana-scratch/.instance-startup-complete
|
||||||
|
else
|
||||||
|
false
|
||||||
|
fi
|
||||||
|
EOF
|
||||||
|
)"
|
||||||
|
# shellcheck disable=SC2181
|
||||||
|
if [[ 0 -eq $? ]]; then
|
||||||
|
COLO_RES_REQUISITIONED+=("$INDEX")
|
||||||
|
RC=true
|
||||||
|
fi
|
||||||
|
$RC
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_node_is_requisitioned() {
|
||||||
|
declare INDEX="$1"
|
||||||
|
declare REQ
|
||||||
|
declare RC=false
|
||||||
|
for REQ in "${COLO_RES_REQUISITIONED[@]}"; do
|
||||||
|
if [[ $REQ -eq $INDEX ]]; then
|
||||||
|
RC=true
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
$RC
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_machine_types_compatible() {
|
||||||
|
declare MAYBE_MACH="$1"
|
||||||
|
declare WANT_MACH="$2"
|
||||||
|
declare COMPATIBLE=false
|
||||||
|
# XXX: Colo machine types are just GPU count ATM...
|
||||||
|
if [[ "$MAYBE_MACH" -ge "$WANT_MACH" ]]; then
|
||||||
|
COMPATIBLE=true
|
||||||
|
fi
|
||||||
|
$COMPATIBLE
|
||||||
|
}
|
||||||
|
|
||||||
|
colo_node_free() {
|
||||||
|
declare IP=$1
|
||||||
|
colo_instance_run "$IP" "$(
|
||||||
|
cat <<EOF
|
||||||
|
RC=false
|
||||||
|
if [ -f "$SOLANA_LOCK_FILE" ]; then
|
||||||
|
exec 9<>"$SOLANA_LOCK_FILE"
|
||||||
|
flock -x -n 9 || exit 1
|
||||||
|
. "$SOLANA_LOCK_FILE"
|
||||||
|
if [ "\$SOLANA_LOCK_USER" = "\$SOLANA_USER" ]; then
|
||||||
|
git clean -qxdff
|
||||||
|
rm -f /solana-scratch/* /solana-scratch/.[^.]*
|
||||||
|
cat > "\${HOME}/.ssh/authorized_keys" <<EOAK
|
||||||
|
$("$__colo_here"/add-datacenter-solana-user-authorized_keys.sh 2> /dev/null)
|
||||||
|
EOAK
|
||||||
|
RC=true
|
||||||
|
fi
|
||||||
|
9>&-
|
||||||
|
fi
|
||||||
|
\$RC
|
||||||
|
EOF
|
||||||
|
)"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
astroids|216.24.140.155|10.1.1.26|16|64|NVME|2000|||0|Denver
|
||||||
|
pacman|216.24.140.154|10.1.1.25|16|64|NVME|2000|||0|Denver
|
||||||
|
dumoulin|216.24.140.149|10.1.1.20|16|64|SATA|2000|NVME,NVME,NVME|1000,1000,1000|4|Denver
|
||||||
|
foosball|216.24.140.150|10.1.1.21|16|64|SATA|2000|NVME|1000|2|Denver
|
||||||
|
pingpong|216.24.140.151|10.1.1.22|16|64|SATA|2000|NVME|1000|2|Denver
|
||||||
|
airhockey|216.24.140.152|10.1.1.23|16|64|SATA|2000|NVME|1000|2|Denver
|
||||||
|
billiards|216.24.140.153|10.1.1.24|16|64|SATA|2000|NVME|1000|2|Denver
|
|
@ -391,4 +391,12 @@ cloud_FetchFile() {
|
||||||
cloud_CreateAndAttachPersistentDisk() {
|
cloud_CreateAndAttachPersistentDisk() {
|
||||||
echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for ec2"
|
echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for ec2"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_StatusAll
|
||||||
|
#
|
||||||
|
# Not yet implemented for this cloud provider
|
||||||
|
cloud_StatusAll() {
|
||||||
|
echo "ERROR: cloud_StatusAll is not yet implemented for ec2"
|
||||||
|
}
|
||||||
|
|
|
@ -321,3 +321,11 @@ cloud_CreateAndAttachPersistentDisk() {
|
||||||
--zone "$zone" \
|
--zone "$zone" \
|
||||||
--auto-delete
|
--auto-delete
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_StatusAll
|
||||||
|
#
|
||||||
|
# Not yet implemented for this cloud provider
|
||||||
|
cloud_StatusAll() {
|
||||||
|
echo "ERROR: cloud_StatusAll is not yet implemented for GCE"
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue