Colo: Refactor remote command dispatch for create and delete (#7092)

* Colo: Dump escaping mess in remote script templates

* Colo: Rename script templates so shellcheck can get 'em

* shellcheck and nits

* Brace all of the things

* Consistent heredoc tags

* Use bash built-in square bracketing consistently

* simplify logic
This commit is contained in:
Trent Nelson 2019-11-25 10:32:17 -07:00 committed by GitHub
parent 094c391cd7
commit d8bc828839
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 314 additions and 303 deletions

View File

@ -1,47 +0,0 @@
#!/usr/bin/env bash
# XXX: This file isn't *quite* a script. It is intended to be passed via stdin
# to a node to requisition logic up node creation. Currently this is done in
# colo_node_requisition using the eval-cat trick. While this gets us what we
# want, care must be taken to ensure variable expansion happens at the right
# time. Any unescaped variable references ($X) in this file will be expanded by
# eval in colo_node_requisition. Escaped variable references (\$X) will be
# expanded upon execution on the remote node.
if [ ! -f "$SOLANA_LOCK_FILE" ]; then
exec 9>>"$SOLANA_LOCK_FILE"
flock -x -n 9 || ( echo "Failed to acquire lock!" 1>&2 && exit 1 )
[ -n "\$SOLANA_USER" ] && {
echo "export SOLANA_LOCK_USER=\$SOLANA_USER"
echo "export SOLANA_LOCK_INSTANCENAME=$INSTANCE_NAME"
echo "[ -v SSH_TTY -a -f \"\${HOME}/.solana-motd\" ] && cat \"\${HOME}/.solana-motd\" 1>&2"
} >&9 || ( rm "$SOLANA_LOCK_FILE" && echo "SOLANA_USER undefined" 1>&2 && false )
9>&-
cat > /solana-scratch/id_ecdsa <<EOK
$(cat "$SSH_PRIVATE_KEY")
EOK
cat > /solana-scratch/id_ecdsa.pub <<EOK
$(cat "${SSH_PRIVATE_KEY}.pub")
EOK
chmod 0600 /solana-scratch/id_ecdsa
cat > /solana-scratch/authorized_keys <<EOAK
$("$__colo_here"/add-datacenter-solana-user-authorized_keys.sh 2> /dev/null)
$(cat "${SSH_PRIVATE_KEY}.pub")
EOAK
cp /solana-scratch/id_ecdsa "\${HOME}/.ssh/id_ecdsa"
cp /solana-scratch/id_ecdsa.pub "\${HOME}/.ssh/id_ecdsa.pub"
cp /solana-scratch/authorized_keys "\${HOME}/.ssh/authorized_keys"
cat > "\${HOME}/.solana-motd" <<EOM
$(printNetworkInfo)
$(creationInfo)
EOM
# XXX: Stamp creation MUST be last!
touch /solana-scratch/.instance-startup-complete
else
exec 9<"$SOLANA_LOCK_FILE" && flock -s 9 && . "$SOLANA_LOCK_FILE" && exec 9>&-
echo "${INSTANCE_NAME} candidate is already \${SOLANA_LOCK_INSTANCENAME}" 1>&2
false
fi

View File

@ -0,0 +1,50 @@
#!/usr/bin/env bash
# These variable must be set before the main body is called
SOLANA_LOCK_FILE="${SOLANA_LOCK_FILE:?}"
INSTANCE_NAME="${INSTANCE_NAME:?}"
SSH_AUTHORIZED_KEYS="${SSH_AUTHORIZED_KEYS:?}"
SSH_PRIVATE_KEY_TEXT="${SSH_PRIVATE_KEY_TEXT:?}"
SSH_PUBLIC_KEY_TEXT="${SSH_PUBLIC_KEY_TEXT:?}"
NETWORK_INFO="${NETWORK_INFO:-"Network info unavailable"}"
CREATION_INFO="${CREATION_INFO:-"Creation info unavailable"}"
if [[ ! -f "${SOLANA_LOCK_FILE}" ]]; then
exec 9>>"${SOLANA_LOCK_FILE}"
flock -x -n 9 || ( echo "Failed to acquire lock!" 1>&2 && exit 1 )
SOLANA_USER="${SOLANA_USER:?"SOLANA_USER undefined"}"
{
echo "export SOLANA_LOCK_USER=${SOLANA_USER}"
echo "export SOLANA_LOCK_INSTANCENAME=${INSTANCE_NAME}"
echo "[[ -v SSH_TTY -a -f \"${HOME}/.solana-motd\" ]] && cat \"${HOME}/.solana-motd\" 1>&2"
} >&9
exec 9>&-
cat > /solana-scratch/id_ecdsa <<EOF
${SSH_PRIVATE_KEY_TEXT}
EOF
cat > /solana-scratch/id_ecdsa.pub <<EOF
${SSH_PUBLIC_KEY_TEXT}
EOF
chmod 0600 /solana-scratch/id_ecdsa
cat > /solana-scratch/authorized_keys <<EOF
${SSH_AUTHORIZED_KEYS}
${SSH_PUBLIC_KEY_TEXT}
EOF
cp /solana-scratch/id_ecdsa "${HOME}/.ssh/id_ecdsa"
cp /solana-scratch/id_ecdsa.pub "${HOME}/.ssh/id_ecdsa.pub"
cp /solana-scratch/authorized_keys "${HOME}/.ssh/authorized_keys"
cat > "${HOME}/.solana-motd" <<EOF
${NETWORK_INFO}
${CREATION_INFO}
EOF
# Stamp creation MUST be last!
touch /solana-scratch/.instance-startup-complete
else
# shellcheck disable=SC1090
exec 9<"${SOLANA_LOCK_FILE}" && flock -s 9 && . "${SOLANA_LOCK_FILE}" && exec 9>&-
echo "${INSTANCE_NAME} candidate is already ${SOLANA_LOCK_INSTANCENAME}" 1>&2
false
fi

View File

@ -1,114 +0,0 @@
#!/usr/bin/env bash
# XXX: This file isn't *quite* a script. It is intended to be passed via stdin
# to a node to execute cleanup logic upon deletion. Currently this is done in
# colo_node_free using the eval-cat trick. While this gets us what we want,
# care must be taken to ensure variable expansion happens at the right time.
# Any unescaped variable references ($X) in this file will be expanded by eval
# in colo_node_free. Escaped variable references (\$X) will be expanded upon
# execution on the remote node.
RC=false
if [ -f "$SOLANA_LOCK_FILE" ]; then
exec 9<>"$SOLANA_LOCK_FILE"
flock -x -n 9 || ( echo "Failed to acquire lock!" 1>&2 && exit 1 )
. "$SOLANA_LOCK_FILE"
if [ "\$SOLANA_LOCK_USER" = "\$SOLANA_USER" ]; then
# Begin running process cleanup
CLEANUP_PID=\$$
CLEANUP_PIDS=()
CLEANUP_PPIDS=()
get_pids() {
CLEANUP_PIDS=()
CLEANUP_PPIDS=()
declare line maybe_ppid maybe_pid
while read line; do
read maybe_ppid maybe_pid _ _ _ _ _ _ _ _ <<<"\$line"
CLEANUP_PIDS+=( \$maybe_pid )
CLEANUP_PPIDS+=( \$maybe_ppid )
done < <(ps jxh | sort -rn -k2,2)
}
CLEANUP_PROC_CHAINS=()
resolve_chains() {
CLEANUP_PROC_CHAINS=()
declare i pid ppid handled n
for i in "\${!CLEANUP_PIDS[@]}"; do
pid=\${CLEANUP_PIDS[\$i]}
ppid=\${CLEANUP_PPIDS[\$i]}
handled=false
for j in "\${!CLEANUP_PROC_CHAINS[@]}"; do
if grep -q "^\${ppid}\\\\b" <<<"\${CLEANUP_PROC_CHAINS[\$j]}"; then
CLEANUP_PROC_CHAINS[\$j]="\$pid \${CLEANUP_PROC_CHAINS[\$j]}"
handled=true
break
elif grep -q "\\\\b\${pid}\\$" <<<"\${CLEANUP_PROC_CHAINS[\$j]}"; then
CLEANUP_PROC_CHAINS[\$j]+=" \$ppid"
handled=true
# Don't break, we may be the parent of may proc chains
fi
done
if ! \$handled; then
n=\${#CLEANUP_PROC_CHAINS[@]}
CLEANUP_PROC_CHAINS[\$n]="\$pid \$ppid"
fi
done
}
# Kill screen sessions
while read SID; do
screen -S "\$SID" -X quit
done < <(screen -wipe 2>&1 | sed -e 's/^\s\+\([^[:space:]]\+\)\s.*/\1/;t;d')
# Kill tmux sessions
tmux kill-server &> /dev/null
# Kill other processes
for SIG in INT TERM KILL; do
get_pids
if [[ \${#CLEANUP_PIDS[@]} -eq 0 ]]; then
break
else
resolve_chains
for p in "\${CLEANUP_PROC_CHAINS[@]}"; do
if ! grep -q "\b\$CLEANUP_PID\b" <<<"\$p"; then
read -a TO_KILL <<<"\$p"
N=\${#TO_KILL[@]}
ROOT_PPID="\${TO_KILL[\$((N-1))]}"
if [[ 1 -ne \$ROOT_PPID ]]; then
LAST_PID_IDX=\$((N-2))
for I in \$(seq 0 \$LAST_PID_IDX); do
pid="\${TO_KILL[\$I]}"
kill -\$SIG \$pid &>/dev/null
done
fi
fi
done
get_pids
if [[ \${#CLEANUP_PIDS[@]} -gt 0 ]]; then
sleep 5
fi
fi
done
# End running process cleanup
# Begin filesystem cleanup
git clean -qxdff
rm -f /solana-scratch/* /solana-scratch/.[^.]*
cat > "\${HOME}/.ssh/authorized_keys" <<EOAK
$("$__colo_here"/add-datacenter-solana-user-authorized_keys.sh 2> /dev/null)
EOAK
EXTERNAL_CONFIG_DIR="${SECONDARY_DISK_MOUNT_POINT}/config/"
if [[ -d "\$EXTERNAL_CONFIG_DIR" ]]; then
rm -rf "\$EXTERNAL_CONFIG_DIR"
fi
# End filesystem cleanup
RC=true
else
echo "Invalid user: expected \\\"\$SOLANA_LOCK_USER\\\" got \\\"\$SOLANA_USER\\\""
fi
9>&-
fi
\$RC

View File

@ -0,0 +1,112 @@
#!/usr/bin/env bash
# These variable must be set before the main body is called
SOLANA_LOCK_FILE="${SOLANA_LOCK_FILE:?}"
SECONDARY_DISK_MOUNT_POINT="${SECONDARY_DISK_MOUNT_POINT:?}"
SSH_AUTHORIZED_KEYS="${SSH_AUTHORIZED_KEYS:?}"
RC=false
if [[ -f "${SOLANA_LOCK_FILE}" ]]; then
exec 9<>"${SOLANA_LOCK_FILE}"
flock -x -n 9 || ( echo "Failed to acquire lock!" 1>&2 && exit 1 )
# shellcheck disable=SC1090
. "${SOLANA_LOCK_FILE}"
if [[ "${SOLANA_LOCK_USER}" = "${SOLANA_USER}" ]]; then
# Begin running process cleanup
CLEANUP_PID=$$
CLEANUP_PIDS=()
CLEANUP_PPIDS=()
get_pids() {
CLEANUP_PIDS=()
CLEANUP_PPIDS=()
declare line maybe_ppid maybe_pid
while read -r line; do
read -r maybe_ppid maybe_pid _ _ _ _ _ _ _ _ <<<"${line}"
CLEANUP_PIDS+=( "${maybe_pid}" )
CLEANUP_PPIDS+=( "${maybe_ppid}" )
done < <(ps jxh | sort -rn -k2,2)
}
CLEANUP_PROC_CHAINS=()
resolve_chains() {
CLEANUP_PROC_CHAINS=()
declare i pid ppid handled n
for i in "${!CLEANUP_PIDS[@]}"; do
pid=${CLEANUP_PIDS[${i}]}
ppid=${CLEANUP_PPIDS[${i}]}
handled=false
for j in "${!CLEANUP_PROC_CHAINS[@]}"; do
if grep -q "^${ppid}\b" <<<"${CLEANUP_PROC_CHAINS[${j}]}"; then
CLEANUP_PROC_CHAINS[${j}]="${pid} ${CLEANUP_PROC_CHAINS[${j}]}"
handled=true
break
elif grep -q "\b${pid}\$" <<<"${CLEANUP_PROC_CHAINS[${j}]}"; then
CLEANUP_PROC_CHAINS[${j}]+=" ${ppid}"
handled=true
# Don't break, we may be the parent of may proc chains
fi
done
if ! ${handled}; then
n=${#CLEANUP_PROC_CHAINS[@]}
CLEANUP_PROC_CHAINS[${n}]="${pid} ${ppid}"
fi
done
}
# Kill screen sessions
while read -r SID; do
screen -S "${SID}" -X quit
done < <(screen -wipe 2>&1 | sed -e 's/^\s\+\([^[:space:]]\+\)\s.*/\1/;t;d')
# Kill tmux sessions
tmux kill-server &> /dev/null
# Kill other processes
for SIG in INT TERM KILL; do
get_pids
if [[ ${#CLEANUP_PIDS[@]} -eq 0 ]]; then
break
else
resolve_chains
for p in "${CLEANUP_PROC_CHAINS[@]}"; do
if ! grep -q "\b${CLEANUP_PID}\b" <<<"${p}"; then
read -ra TO_KILL <<<"${p}"
N=${#TO_KILL[@]}
ROOT_PPID="${TO_KILL[$((N-1))]}"
if [[ 1 -ne ${ROOT_PPID} ]]; then
LAST_PID_IDX=$((N-2))
for I in $(seq 0 ${LAST_PID_IDX}); do
pid="${TO_KILL[${I}]}"
kill "-${SIG}" "${pid}" &>/dev/null
done
fi
fi
done
get_pids
if [[ ${#CLEANUP_PIDS[@]} -gt 0 ]]; then
sleep 5
fi
fi
done
# End running process cleanup
# Begin filesystem cleanup
git clean -qxdff
rm -f /solana-scratch/* /solana-scratch/.[^.]*
cat > "${HOME}/.ssh/authorized_keys" <<EOAK
${SSH_AUTHORIZED_KEYS}
EOAK
EXTERNAL_CONFIG_DIR="${SECONDARY_DISK_MOUNT_POINT}/config/"
if [[ -d "${EXTERNAL_CONFIG_DIR}" ]]; then
rm -rf "${EXTERNAL_CONFIG_DIR}"
fi
# End filesystem cleanup
RC=true
else
echo "Invalid user: expected \"${SOLANA_LOCK_USER}\" got \"${SOLANA_USER}\"" 1>&2
fi
exec 9>&-
fi
${RC}

View File

@ -38,26 +38,26 @@ cloud_RestartPreemptedInstances() {
# #
__cloud_FindInstances() { __cloud_FindInstances() {
declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME INSTANCES_TEXT declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME INSTANCES_TEXT
declare filter=$1 declare filter=${1}
instances=() instances=()
if ! $COLO_PARALLELIZE; then if ! ${COLO_PARALLELIZE}; then
colo_load_resources colo_load_resources
colo_load_availability false colo_load_availability false
fi fi
INSTANCES_TEXT="$( INSTANCES_TEXT="$(
for AVAIL in "${COLO_RES_AVAILABILITY[@]}"; do for AVAIL in "${COLO_RES_AVAILABILITY[@]}"; do
IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$AVAIL" IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"${AVAIL}"
if [[ $INSTNAME =~ $filter ]]; then if [[ ${INSTNAME} =~ ${filter} ]]; then
printf "%-40s | publicIp=%-16s privateIp=%s zone=%s\n" "$INSTNAME" "$IP" "$PRIV_IP" "$ZONE" 1>&2 printf "%-40s | publicIp=%-16s privateIp=%s zone=%s\n" "${INSTNAME}" "${IP}" "${PRIV_IP}" "${ZONE}" 1>&2
echo -e "${INSTNAME}:${IP}:${PRIV_IP}:$ZONE" echo -e "${INSTNAME}:${IP}:${PRIV_IP}:${ZONE}"
fi fi
done | sort -t $'\v' -k1 done | sort -t $'\v' -k1
)" )"
if [[ -n "$INSTANCES_TEXT" ]]; then if [[ -n "${INSTANCES_TEXT}" ]]; then
while read -r LINE; do while read -r LINE; do
instances+=( "$LINE" ) instances+=( "${LINE}" )
done <<<"$INSTANCES_TEXT" done <<<"${INSTANCES_TEXT}"
fi fi
} }
@ -77,7 +77,7 @@ __cloud_FindInstances() {
# #
cloud_FindInstances() { cloud_FindInstances() {
declare filter="^${1}.*" declare filter="^${1}.*"
__cloud_FindInstances "$filter" __cloud_FindInstances "${filter}"
} }
# #
@ -96,7 +96,7 @@ cloud_FindInstances() {
# #
cloud_FindInstance() { cloud_FindInstance() {
declare name="^${1}$" declare name="^${1}$"
__cloud_FindInstances "$name" __cloud_FindInstances "${name}"
} }
# #
@ -108,10 +108,10 @@ cloud_FindInstance() {
# #
# This function will be called before |cloud_CreateInstances| # This function will be called before |cloud_CreateInstances|
cloud_Initialize() { cloud_Initialize() {
# networkName=$1 # unused # networkName=${1} # unused
# zone=$2 #unused # zone=${2} #unused
colo_load_resources colo_load_resources
if $COLO_PARALLELIZE; then if ${COLO_PARALLELIZE}; then
colo_load_availability colo_load_availability
fi fi
} }
@ -136,7 +136,7 @@ cloud_Initialize() {
# startupScript - Optional startup script to execute when the instance boots # startupScript - Optional startup script to execute when the instance boots
# address - Optional name of the GCE static IP address to attach to the # address - Optional name of the GCE static IP address to attach to the
# instance. Requires that |numNodes| = 1 and that addressName # instance. Requires that |numNodes| = 1 and that addressName
# has been provisioned in the GCE region that is hosting `$zone` # has been provisioned in the GCE region that is hosting `${zone}`
# bootDiskType - Optional specify SSD or HDD boot disk # bootDiskType - Optional specify SSD or HDD boot disk
# additionalDiskSize - Optional specify size of additional storage volume # additionalDiskSize - Optional specify size of additional storage volume
# preemptible - Optionally request a preemptible instance ("true") # preemptible - Optionally request a preemptible instance ("true")
@ -144,76 +144,76 @@ cloud_Initialize() {
# Tip: use cloud_FindInstances to locate the instances once this function # Tip: use cloud_FindInstances to locate the instances once this function
# returns # returns
cloud_CreateInstances() { cloud_CreateInstances() {
#declare networkName="$1" # unused #declare networkName="${1}" # unused
declare namePrefix="$2" declare namePrefix="${2}"
declare numNodes="$3" declare numNodes="${3}"
#declare enableGpu="$4" # unused #declare enableGpu="${4}" # unused
declare machineType="$5" declare machineType="${5}"
# declare zone="$6" # unused # declare zone="${6}" # unused
#declare optionalBootDiskSize="$7" # unused #declare optionalBootDiskSize="${7}" # unused
#declare optionalStartupScript="$8" # unused #declare optionalStartupScript="${8}" # unused
#declare optionalAddress="$9" # unused #declare optionalAddress="${9}" # unused
#declare optionalBootDiskType="${10}" # unused #declare optionalBootDiskType="${10}" # unused
#declare optionalAdditionalDiskSize="${11}" # unused #declare optionalAdditionalDiskSize="${11}" # unused
#declare optionalPreemptible="${12}" # unused #declare optionalPreemptible="${12}" # unused
declare sshPrivateKey="${13}" declare sshPrivateKey="${13}"
declare -a nodes declare -a nodes
if [[ $numNodes = 1 ]]; then if [[ ${numNodes} = 1 ]]; then
nodes=("$namePrefix") nodes=("${namePrefix}")
else else
for node in $(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes"); do for node in $(seq -f "${namePrefix}%0${#numNodes}g" 1 "${numNodes}"); do
nodes+=("$node") nodes+=("${node}")
done done
fi fi
if $COLO_PARALLELIZE; then if ${COLO_PARALLELIZE}; then
declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME INDEX RES LINE declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME INDEX RES LINE
declare -a AVAILABLE declare -a AVAILABLE
declare AVAILABLE_TEXT declare AVAILABLE_TEXT
AVAILABLE_TEXT="$( AVAILABLE_TEXT="$(
for RES in "${COLO_RES_AVAILABILITY[@]}"; do for RES in "${COLO_RES_AVAILABILITY[@]}"; do
IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$RES" IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"${RES}"
if [[ "FREE" = "$STATUS" ]]; then if [[ "FREE" = "${STATUS}" ]]; then
INDEX=$(colo_res_index_from_ip "$IP") INDEX=$(colo_res_index_from_ip "${IP}")
RES_MACH="${COLO_RES_MACHINE[$INDEX]}" RES_MACH="${COLO_RES_MACHINE[${INDEX}]}"
if colo_machine_types_compatible "$RES_MACH" "$machineType"; then if colo_machine_types_compatible "${RES_MACH}" "${machineType}"; then
if ! colo_node_is_requisitioned "$INDEX" "${COLO_RES_REQUISITIONED[*]}"; then if ! colo_node_is_requisitioned "${INDEX}" "${COLO_RES_REQUISITIONED[*]}"; then
echo -e "$RES_MACH\v$IP" echo -e "${RES_MACH}\v${IP}"
fi fi
fi fi
fi fi
done | sort -nt $'\v' -k1,1 done | sort -nt $'\v' -k1,1
)" )"
if [[ -n "$AVAILABLE_TEXT" ]]; then if [[ -n "${AVAILABLE_TEXT}" ]]; then
while read -r LINE; do while read -r LINE; do
AVAILABLE+=("$LINE") AVAILABLE+=("${LINE}")
done <<<"$AVAILABLE_TEXT" done <<<"${AVAILABLE_TEXT}"
fi fi
if [[ ${#AVAILABLE[@]} -lt $numNodes ]]; then if [[ ${#AVAILABLE[@]} -lt ${numNodes} ]]; then
echo "Insufficient resources available to allocate $numNodes $namePrefix" 1>&2 echo "Insufficient resources available to allocate ${numNodes} ${namePrefix}" 1>&2
exit 1 exit 1
fi fi
declare node declare node
declare AI=0 declare AI=0
for node in "${nodes[@]}"; do for node in "${nodes[@]}"; do
IFS=$'\v' read -r _ IP <<<"${AVAILABLE[$AI]}" IFS=$'\v' read -r _ IP <<<"${AVAILABLE[${AI}]}"
colo_node_requisition "$IP" "$node" >/dev/null colo_node_requisition "${IP}" "${node}" >/dev/null
AI=$((AI+1)) AI=$((AI+1))
done done
else else
declare RES_MACH node declare RES_MACH node
declare RI=0 declare RI=0
declare NI=0 declare NI=0
while [[ $NI -lt $numNodes && $RI -lt $COLO_RES_N ]]; do while [[ ${NI} -lt ${numNodes} && ${RI} -lt ${COLO_RES_N} ]]; do
node="${nodes[$NI]}" node="${nodes[${NI}]}"
RES_MACH="${COLO_RES_MACHINE[$RI]}" RES_MACH="${COLO_RES_MACHINE[${RI}]}"
IP="${COLO_RES_IP[$RI]}" IP="${COLO_RES_IP[${RI}]}"
if colo_machine_types_compatible "$RES_MACH" "$machineType"; then if colo_machine_types_compatible "${RES_MACH}" "${machineType}"; then
if colo_node_requisition "$IP" "$node" "$sshPrivateKey" >/dev/null; then if colo_node_requisition "${IP}" "${node}" "${sshPrivateKey}" >/dev/null; then
NI=$((NI+1)) NI=$((NI+1))
fi fi
fi fi
@ -230,8 +230,8 @@ cloud_CreateInstances() {
cloud_DeleteInstances() { cloud_DeleteInstances() {
declare _ IP _ _ declare _ IP _ _
for instance in "${instances[@]}"; do for instance in "${instances[@]}"; do
IFS=':' read -r _ IP _ _ <<< "$instance" IFS=':' read -r _ IP _ _ <<< "${instance}"
colo_node_free "$IP" >/dev/null colo_node_free "${IP}" >/dev/null
done done
} }
@ -241,9 +241,9 @@ cloud_DeleteInstances() {
# Return once the newly created VM instance is responding. This function is cloud-provider specific. # Return once the newly created VM instance is responding. This function is cloud-provider specific.
# #
cloud_WaitForInstanceReady() { cloud_WaitForInstanceReady() {
#declare instanceName="$1" # unused #declare instanceName="${1}" # unused
#declare instanceIp="$2" # unused #declare instanceIp="${2}" # unused
#declare timeout="$4" # unused #declare timeout="${4}" # unused
true true
} }
@ -255,28 +255,28 @@ cloud_WaitForInstanceReady() {
# mechanism to fetch the file # mechanism to fetch the file
# #
cloud_FetchFile() { cloud_FetchFile() {
#declare instanceName="$1" # unused #declare instanceName="${1}" # unused
declare publicIp="$2" declare publicIp="${2}"
declare remoteFile="$3" declare remoteFile="${3}"
declare localFile="$4" declare localFile="${4}"
#declare zone="$5" # unused #declare zone="${5}" # unused
scp \ scp \
-o "StrictHostKeyChecking=no" \ -o "StrictHostKeyChecking=no" \
-o "UserKnownHostsFile=/dev/null" \ -o "UserKnownHostsFile=/dev/null" \
-o "User=solana" \ -o "User=solana" \
-o "LogLevel=ERROR" \ -o "LogLevel=ERROR" \
-F /dev/null \ -F /dev/null \
"solana@$publicIp:$remoteFile" "$localFile" "solana@${publicIp}:${remoteFile}" "${localFile}"
} }
cloud_StatusAll() { cloud_StatusAll() {
declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME declare HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME
if ! $COLO_PARALLELIZE; then if ! ${COLO_PARALLELIZE}; then
colo_load_resources colo_load_resources
colo_load_availability false colo_load_availability false
fi fi
for AVAIL in "${COLO_RES_AVAILABILITY[@]}"; do for AVAIL in "${COLO_RES_AVAILABILITY[@]}"; do
IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"$AVAIL" IFS=$'\v' read -r HOST_NAME IP PRIV_IP STATUS ZONE LOCK_USER INSTNAME <<<"${AVAIL}"
printf "%-30s | publicIp=%-16s privateIp=%s status=%s who=%s zone=%s inst=%s\n" "$HOST_NAME" "$IP" "$PRIV_IP" "$STATUS" "$LOCK_USER" "$ZONE" "$INSTNAME" printf "%-30s | publicIp=%-16s privateIp=%s status=%s who=%s zone=%s inst=%s\n" "${HOST_NAME}" "${IP}" "${PRIV_IP}" "${STATUS}" "${LOCK_USER}" "${ZONE}" "${INSTNAME}"
done done
} }

View File

@ -5,7 +5,7 @@ declare -r SOLANA_LOCK_FILE="/home/solana/.solana.lock"
__colo_here="$(dirname "${BASH_SOURCE[0]}")" __colo_here="$(dirname "${BASH_SOURCE[0]}")"
# shellcheck source=net/common.sh # shellcheck source=net/common.sh
source "$__colo_here"/../common.sh source "${__colo_here}"/../common.sh
# Load colo resource specs # Load colo resource specs
export COLO_RES_N=0 export COLO_RES_N=0
@ -24,20 +24,20 @@ export COLO_RESOURCES_LOADED=false
colo_load_resources() { colo_load_resources() {
if ! ${COLO_RESOURCES_LOADED}; then if ! ${COLO_RESOURCES_LOADED}; then
while read -r LINE; do while read -r LINE; do
IFS='|' read -r H I PI C M ST SC AST ASC G Z <<<"$LINE" IFS='|' read -r H I PI C M ST SC AST ASC G Z <<<"${LINE}"
COLO_RES_HOSTNAME+=( "$H" ) COLO_RES_HOSTNAME+=( "${H}" )
COLO_RES_IP+=( "$I" ) COLO_RES_IP+=( "${I}" )
COLO_RES_IP_PRIV+=( "$PI" ) COLO_RES_IP_PRIV+=( "${PI}" )
COLO_RES_CPU_CORES+=( "$C" ) COLO_RES_CPU_CORES+=( "${C}" )
COLO_RES_RAM_GB+=( "$M" ) COLO_RES_RAM_GB+=( "${M}" )
COLO_RES_STORAGE_TYPE+=( "$ST" ) COLO_RES_STORAGE_TYPE+=( "${ST}" )
COLO_RES_STORAGE_CAP_GB+=( "$SC" ) COLO_RES_STORAGE_CAP_GB+=( "${SC}" )
COLO_RES_ADD_STORAGE_TYPE+=( "$(tr ',' $'\v' <<<"$AST")" ) COLO_RES_ADD_STORAGE_TYPE+=( "$(tr ',' $'\v' <<<"${AST}")" )
COLO_RES_ADD_STORAGE_CAP_GB+=( "$(tr ',' $'\v' <<<"$ASC")" ) COLO_RES_ADD_STORAGE_CAP_GB+=( "$(tr ',' $'\v' <<<"${ASC}")" )
COLO_RES_MACHINE+=( "$G" ) COLO_RES_MACHINE+=( "${G}" )
COLO_RES_ZONE+=( "$Z" ) COLO_RES_ZONE+=( "${Z}" )
COLO_RES_N=$((COLO_RES_N+1)) COLO_RES_N=$((COLO_RES_N+1))
done < <(sort -nt'|' -k10,10 "$__colo_here"/colo_nodes) done < <(sort -nt'|' -k10,10 "${__colo_here}"/colo_nodes)
COLO_RESOURCES_LOADED=true COLO_RESOURCES_LOADED=true
fi fi
} }
@ -47,26 +47,26 @@ declare -ax COLO_RES_AVAILABILITY
colo_load_availability() { colo_load_availability() {
declare USE_CACHE=${1:-${COLO_RES_AVAILABILITY_CACHED}} declare USE_CACHE=${1:-${COLO_RES_AVAILABILITY_CACHED}}
declare LINE PRIV_IP STATUS LOCK_USER I IP HOST_NAME ZONE INSTNAME declare LINE PRIV_IP STATUS LOCK_USER I IP HOST_NAME ZONE INSTNAME
if ! $USE_CACHE; then if ! ${USE_CACHE}; then
COLO_RES_AVAILABILITY=() COLO_RES_AVAILABILITY=()
COLO_RES_REQUISITIONED=() COLO_RES_REQUISITIONED=()
while read -r LINE; do while read -r LINE; do
IFS=$'\v' read -r IP STATUS LOCK_USER INSTNAME <<< "$LINE" IFS=$'\v' read -r IP STATUS LOCK_USER INSTNAME <<< "${LINE}"
I=$(colo_res_index_from_ip "$IP") I=$(colo_res_index_from_ip "${IP}")
PRIV_IP="${COLO_RES_IP_PRIV[$I]}" PRIV_IP="${COLO_RES_IP_PRIV[${I}]}"
HOST_NAME="${COLO_RES_HOSTNAME[$I]}" HOST_NAME="${COLO_RES_HOSTNAME[${I}]}"
ZONE="${COLO_RES_ZONE[$I]}" ZONE="${COLO_RES_ZONE[${I}]}"
COLO_RES_AVAILABILITY+=( "$(echo -e "$HOST_NAME\v$IP\v$PRIV_IP\v$STATUS\v$ZONE\v$LOCK_USER\v$INSTNAME")" ) COLO_RES_AVAILABILITY+=( "$(echo -e "${HOST_NAME}\v${IP}\v${PRIV_IP}\v${STATUS}\v${ZONE}\v${LOCK_USER}\v${INSTNAME}")" )
done < <(colo_node_status_all | sort -t $'\v' -k1) done < <(colo_node_status_all | sort -t $'\v' -k1)
COLO_RES_AVAILABILITY_CACHED=true COLO_RES_AVAILABILITY_CACHED=true
fi fi
} }
colo_res_index_from_ip() { colo_res_index_from_ip() {
declare IP="$1" declare IP="${1}"
for i in "${!COLO_RES_IP_PRIV[@]}"; do for i in "${!COLO_RES_IP_PRIV[@]}"; do
if [[ "$IP" = "${COLO_RES_IP[$i]}" || "$IP" = "${COLO_RES_IP_PRIV[$i]}" ]]; then if [[ "${IP}" = "${COLO_RES_IP[${i}]}" || "${IP}" = "${COLO_RES_IP_PRIV[${i}]}" ]]; then
echo "$i" echo "${i}"
return 0 return 0
fi fi
done done
@ -74,36 +74,36 @@ colo_res_index_from_ip() {
} }
colo_instance_run() { colo_instance_run() {
declare IP=$1 declare IP=${1}
declare CMD="$2" declare CMD="${2}"
declare OUT declare OUT
set +e set +e
OUT=$(ssh -l solana -o "StrictHostKeyChecking=no" -o "ConnectTimeout=3" -n "$IP" "$CMD" 2>&1) OUT=$(ssh -l solana -o "StrictHostKeyChecking=no" -o "ConnectTimeout=3" -n "${IP}" "${CMD}" 2>&1)
declare RC=$? declare RC=$?
set -e set -e
while read -r LINE; do while read -r LINE; do
echo -e "$IP\v$RC\v$LINE" echo -e "${IP}\v${RC}\v${LINE}"
if [[ "$RC" -ne 0 ]]; then if [[ "${RC}" -ne 0 ]]; then
echo "IP(${IP}) Err(${RC}) LINE(${LINE})" 1>&2 echo "IP(${IP}) Err(${RC}) LINE(${LINE})" 1>&2
fi fi
done < <(tr -d $'\r' <<<"$OUT") done < <(tr -d $'\r' <<<"${OUT}")
return $RC return ${RC}
} }
colo_instance_run_foreach() { colo_instance_run_foreach() {
declare CMD declare CMD
if test 1 -eq $#; then if test 1 -eq $#; then
CMD="$1" CMD="${1}"
declare IPS=() declare IPS=()
for I in $(seq 0 $((COLO_RES_N-1))); do for I in $(seq 0 $((COLO_RES_N-1))); do
IPS+=( "${COLO_RES_IP[$I]}" ) IPS+=( "${COLO_RES_IP[${I}]}" )
done done
set "${IPS[@]}" "$CMD" set "${IPS[@]}" "${CMD}"
fi fi
CMD="${*: -1}" CMD="${*: -1}"
for I in $(seq 0 $(($#-2))); do for I in $(seq 0 $(($#-2))); do
declare IP="$1" declare IP="${1}"
colo_instance_run "$IP" "$CMD" & colo_instance_run "${IP}" "${CMD}" &
shift shift
done done
@ -114,35 +114,35 @@ colo_whoami() {
declare ME LINE SOL_USER EOL declare ME LINE SOL_USER EOL
while read -r LINE; do while read -r LINE; do
declare IP RC declare IP RC
IFS=$'\v' read -r IP RC SOL_USER EOL <<< "$LINE" IFS=$'\v' read -r IP RC SOL_USER EOL <<< "${LINE}"
if [ "$RC" -eq 0 ]; then if [ "${RC}" -eq 0 ]; then
[[ "$EOL" = "EOL" ]] || echo "${FUNCNAME[0]}: Unexpected input \"$LINE\"" 1>&2 [[ "${EOL}" = "EOL" ]] || echo "${FUNCNAME[0]}: Unexpected input \"${LINE}\"" 1>&2
if [ -z "$ME" ] || [ "$ME" = "$SOL_USER" ]; then if [ -z "${ME}" ] || [ "${ME}" = "${SOL_USER}" ]; then
ME="$SOL_USER" ME="${SOL_USER}"
else else
echo "Found conflicting username \"$SOL_USER\" on $IP, expected \"$ME\"" 1>&2 echo "Found conflicting username \"${SOL_USER}\" on ${IP}, expected \"${ME}\"" 1>&2
fi fi
fi fi
done < <(colo_instance_run_foreach "[ -n \"\$SOLANA_USER\" ] && echo -e \"\$SOLANA_USER\\vEOL\"") done < <(colo_instance_run_foreach "[ -n \"\${SOLANA_USER}\" ] && echo -e \"\${SOLANA_USER}\\vEOL\"")
echo "$ME" echo "${ME}"
} }
COLO_SOLANA_USER="" COLO_SOLANA_USER=""
colo_get_solana_user() { colo_get_solana_user() {
if [ -z "$COLO_SOLANA_USER" ]; then if [ -z "${COLO_SOLANA_USER}" ]; then
COLO_SOLANA_USER=$(colo_whoami) COLO_SOLANA_USER=$(colo_whoami)
fi fi
echo "$COLO_SOLANA_USER" echo "${COLO_SOLANA_USER}"
} }
__colo_node_status_script() { __colo_node_status_script() {
cat <<EOF cat <<EOF
exec 3>&2 exec 3>&2
exec 2>/dev/null # Suppress stderr as the next call to exec fails most of exec 2>/dev/null # Suppress stderr as the next call to exec fails most of
# the time due to $SOLANA_LOCK_FILE not existing and is running from a # the time due to ${SOLANA_LOCK_FILE} not existing and is running from a
# subshell where normal redirection doesn't work # subshell where normal redirection doesn't work
exec 9<"$SOLANA_LOCK_FILE" && flock -s 9 && . "$SOLANA_LOCK_FILE" && exec 9>&- exec 9<"${SOLANA_LOCK_FILE}" && flock -s 9 && . "${SOLANA_LOCK_FILE}" && exec 9>&-
echo -e "\$SOLANA_LOCK_USER\\v\$SOLANA_LOCK_INSTANCENAME\\vEOL" echo -e "\${SOLANA_LOCK_USER}\\v\${SOLANA_LOCK_INSTANCENAME}\\vEOL"
exec 2>&3 # Restore stderr exec 2>&3 # Restore stderr
EOF EOF
} }
@ -150,31 +150,31 @@ EOF
__colo_node_status_result_normalize() { __colo_node_status_result_normalize() {
declare IP RC US BY INSTNAME EOL declare IP RC US BY INSTNAME EOL
declare ST="DOWN" declare ST="DOWN"
IFS=$'\v' read -r IP RC US INSTNAME EOL <<< "$1" IFS=$'\v' read -r IP RC US INSTNAME EOL <<< "${1}"
if [ "$RC" -eq 0 ]; then if [ "${RC}" -eq 0 ]; then
[[ "$EOL" = "EOL" ]] || echo "${FUNCNAME[0]}: Unexpected input \"$1\"" 1>&2 [[ "${EOL}" = "EOL" ]] || echo "${FUNCNAME[0]}: Unexpected input \"${1}\"" 1>&2
if [ -n "$US" ]; then if [ -n "${US}" ]; then
BY="$US" BY="${US}"
ST="HELD" ST="HELD"
if [[ -z "$INSTNAME" ]]; then if [[ -z "${INSTNAME}" ]]; then
return return
fi fi
else else
ST="FREE" ST="FREE"
fi fi
fi fi
echo -e $"$IP\v$ST\v$BY\v$INSTNAME" echo -e $"${IP}\v${ST}\v${BY}\v${INSTNAME}"
} }
colo_node_status() { colo_node_status() {
declare IP="$1" declare IP="${1}"
__colo_node_status_result_normalize "$(colo_instance_run "$IP" "$(__colo_node_status_script)")" __colo_node_status_result_normalize "$(colo_instance_run "${IP}" "$(__colo_node_status_script)")"
} }
colo_node_status_all() { colo_node_status_all() {
declare LINE declare LINE
while read -r LINE; do while read -r LINE; do
__colo_node_status_result_normalize "$LINE" __colo_node_status_result_normalize "${LINE}"
done < <(colo_instance_run_foreach "$(__colo_node_status_script)") done < <(colo_instance_run_foreach "$(__colo_node_status_script)")
} }
@ -183,58 +183,68 @@ colo_node_status_all() {
# for validators # for validators
export COLO_RES_REQUISITIONED=() export COLO_RES_REQUISITIONED=()
colo_node_requisition() { colo_node_requisition() {
declare IP=$1 declare IP=${1}
# shellcheck disable=SC2034 # shellcheck disable=SC2034
declare INSTANCE_NAME=$2 declare INSTANCE_NAME=${2}
# shellcheck disable=SC2034 # shellcheck disable=SC2034
declare SSH_PRIVATE_KEY="$3" declare SSH_PRIVATE_KEY="${3}"
declare INDEX declare INDEX
INDEX=$(colo_res_index_from_ip "$IP") INDEX=$(colo_res_index_from_ip "${IP}")
declare RC=false declare RC=false
colo_instance_run "$IP" "$(eval "cat <<EOF colo_instance_run "${IP}" "$(cat <<EOF
$(<"$__colo_here"/colo-node-onacquire-sh) SOLANA_LOCK_FILE="${SOLANA_LOCK_FILE}"
INSTANCE_NAME="${INSTANCE_NAME}"
SSH_AUTHORIZED_KEYS='$("${__colo_here}"/add-datacenter-solana-user-authorized_keys.sh 2> /dev/null)'
SSH_PRIVATE_KEY_TEXT="$(<"${SSH_PRIVATE_KEY}")"
SSH_PUBLIC_KEY_TEXT="$(<"${SSH_PRIVATE_KEY}.pub")"
NETWORK_INFO="$(printNetworkInfo 2>/dev/null)"
CREATION_INFO="$(creationInfo 2>/dev/null)"
$(<"${__colo_here}"/colo-node-onacquire.sh)
EOF EOF
")" )"
# shellcheck disable=SC2181 # shellcheck disable=SC2181
if [[ 0 -eq $? ]]; then if [[ 0 -eq $? ]]; then
COLO_RES_REQUISITIONED+=("$INDEX") COLO_RES_REQUISITIONED+=("${INDEX}")
RC=true RC=true
fi fi
$RC ${RC}
} }
colo_node_is_requisitioned() { colo_node_is_requisitioned() {
declare INDEX="$1" declare INDEX="${1}"
declare REQ declare REQ
declare RC=false declare RC=false
for REQ in "${COLO_RES_REQUISITIONED[@]}"; do for REQ in "${COLO_RES_REQUISITIONED[@]}"; do
if [[ $REQ -eq $INDEX ]]; then if [[ ${REQ} -eq ${INDEX} ]]; then
RC=true RC=true
break break
fi fi
done done
$RC ${RC}
} }
colo_machine_types_compatible() { colo_machine_types_compatible() {
declare MAYBE_MACH="$1" declare MAYBE_MACH="${1}"
declare WANT_MACH="$2" declare WANT_MACH="${2}"
declare COMPATIBLE=false declare COMPATIBLE=false
# Colo machine types are just GPU count ATM... # Colo machine types are just GPU count ATM...
if [[ "$MAYBE_MACH" -ge "$WANT_MACH" ]]; then if [[ "${MAYBE_MACH}" -ge "${WANT_MACH}" ]]; then
COMPATIBLE=true COMPATIBLE=true
fi fi
$COMPATIBLE ${COMPATIBLE}
} }
colo_node_free() { colo_node_free() {
declare IP=$1 declare IP=${1}
colo_instance_run "$IP" "$(eval "cat <<EOF colo_instance_run "${IP}" "$(cat <<EOF
$(<"$__colo_here"/colo-node-onfree-sh) SOLANA_LOCK_FILE="${SOLANA_LOCK_FILE}"
SECONDARY_DISK_MOUNT_POINT="${SECONDARY_DISK_MOUNT_POINT}"
SSH_AUTHORIZED_KEYS='$("${__colo_here}"/add-datacenter-solana-user-authorized_keys.sh 2> /dev/null)'
$(<"${__colo_here}"/colo-node-onfree.sh)
EOF EOF
")" )"
} }