solana/net/scripts/azure-provider.sh

370 lines
10 KiB
Bash
Executable File

# |source| this file
#
# Utilities for working with Azure instances
#
# Default zone
cloud_DefaultZone() {
echo "westus"
}
cloud_DefaultCustomMemoryGB() {
: # Not implemented
}
cloud_RestartPreemptedInstances() {
: # Not implemented
}
#
# __cloud_GetConfigValueFromInstanceName
# Return a piece of configuration information about an instance
# Provide the exact name of an instance and the configuration key, and the corresponding value will be returned
#
# example:
# This will return the name of the resource group of the instance named
# __cloud_GetConfigValueFromInstanceName some-instance-name resourceGroup
cloud_GetConfigValueFromInstanceName() {
query="[?name=='$1']"
key="[$2]"
config_value=$(az vm list -d -o tsv --query "$query.$key")
}
cloud_GetResourceGroupFromInstanceName() {
resourceGroup=$(az vm list -o tsv --query "[?name=='$1'].[resourceGroup]")
}
cloud_GetIdFromInstanceName() {
id=$(az vm list -o tsv --query "[?name=='$1'].[id]")
}
#
# __cloud_FindInstances
#
# Find instances matching the specified pattern.
#
# For each matching instance, an entry in the `instances` array will be added with the
# following information about the instance:
# "name:public IP:private IP:location"
#
# filter - The instances to filter on
#
# examples:
# $ __cloud_FindInstances prefix some-machine-prefix
# $ __cloud_FindInstances name exact-machine-name
#
# Examples of plain-text filter command
#
# This will return an exact match for a machine named pgnode
# az vm list -d --query "[?name=='pgnode'].[name,publicIps,privateIps,location]"
#
# This will return a match for any machine with prefix pgnode, ex: pgnode and pgnode2
# az vm list -d --query "[?starts_with(name,'pgnode')].[name,publicIps,privateIps,location]"
__cloud_FindInstances() {
case $1 in
prefix)
query="[?starts_with(name,'$2')]"
;;
name)
query="[?name=='$2']"
;;
*)
echo "Unknown filter command: $1"
;;
esac
keys="[name,publicIps,privateIps,location]"
instances=()
while read -r name publicIp privateIp location; do
instances+=("$name:$publicIp:$privateIp:$location")
done < <(az vm list -d -o tsv --query "$query.$keys")
echo "${instances[*]}"
}
#
# cloud_FindInstances [namePrefix]
#
# Find instances with names matching the specified prefix
#
# For each matching instance, an entry in the `instances` array will be added with the
# following information about the instance:
# "name:public IP:private IP:location"
#
# namePrefix - The instance name prefix to look for
#
# examples:
# $ cloud_FindInstances all-machines-with-a-common-machine-prefix
#
cloud_FindInstances() {
__cloud_FindInstances prefix "$1"
}
#
# cloud_FindInstance [name]
#
# Find an instance with a name matching the exact pattern.
#
# For each matching instance, an entry in the `instances` array will be added with the
# following information about the instance:
# "name:public IP:private IP:location"
#
# name - The instance name to look for
#
# examples:
# $ cloud_FindInstance exact-machine-name
#
cloud_FindInstance() {
__cloud_FindInstances name "$1"
}
#
# cloud_Initialize [networkName]
#
# Perform one-time initialization that may be required for the given testnet.
#
# networkName - unique name of this testnet
#
# This function will be called before |cloud_CreateInstances|
cloud_Initialize() {
declare resourceGroup="$1"
declare location="$2"
declare nsgName=${resourceGroup}-nsg
# Check if resource group exists. If not, create it.
(
set -x
numGroup=$(az group list --query "length([?name=='$resourceGroup'])")
if [[ $numGroup -eq 0 ]]; then
echo Resource Group "$resourceGroup" does not exist. Creating it now.
az group create --name "$resourceGroup" --location "$location"
else
echo Resource group "$resourceGroup" already exists.
az group show --name "$resourceGroup"
fi
az network nsg create --name "$nsgName" --resource-group "$resourceGroup"
)
create_nsg_rule() {
ruleName="$1"
ports="$2"
access="$3"
protocol="$4"
priority="$5"
(
set -x
az network nsg rule create -g "${resourceGroup}" --nsg-name "${nsgName}" -n "${ruleName}" \
--priority "${priority}" --source-address-prefixes "*" --source-port-ranges "*" \
--destination-address-prefixes "*" --destination-port-ranges "${ports}" --access "${access}" \
--protocol "${protocol}"
)
}
create_nsg_rule "InboundTCP" "8000-10000" "Allow" "Tcp" 1000
create_nsg_rule "InboundUDP" "8000-10000" "Allow" "Udp" 1001
create_nsg_rule "InboundHTTP" "80" "Allow" "Tcp" 1002
create_nsg_rule "InboundNetworkExplorerAPI" "3001" "Allow" "Tcp" 1003
create_nsg_rule "InboundDrone" "9900" "Allow" "Tcp" 1004
create_nsg_rule "InboundJsonRpc" "8899-8900" "Allow" "Tcp" 1005
create_nsg_rule "InboundRsync" "873" "Allow" "Tcp" 1006
create_nsg_rule "InboundStun" "3478" "Allow" "Udp" 1007
create_nsg_rule "InboundSSH" "22" "Allow" "Tcp" 1008
}
#
# cloud_CreateInstances [networkName] [namePrefix] [numNodes] [imageName]
# [machineType] [bootDiskSize] [enableGpu]
# [startupScript] [address]
#
# Creates one more identical instances.
#
# networkName - unique name of this testnet
# namePrefix - unique string to prefix all the instance names with
# numNodes - number of instances to create
# imageName - Disk image for the instances
# machineType - GCE machine type. Note that this may also include an
# `--accelerator=` or other |gcloud compute instances create|
# options
# bootDiskSize - Optional size of the boot disk in GB
# enableGpu - Optionally enable GPU, use the value "true" to enable
# eg, request 4 K80 GPUs with "count=4,type=nvidia-tesla-k80"
# startupScript - Optional startup script to execute when the instance boots
# address - Optional name of the GCE static IP address to attach to the
# instance. Requires that |numNodes| = 1 and that addressName
# has been provisioned in the GCE region that is hosting `$zone`
# bootDiskType - Optional specify SSD or HDD boot disk
# additionalDiskSize - Optional specify size of additional storage volume
#
# Tip: use cloud_FindInstances to locate the instances once this function
# returns
cloud_CreateInstances() {
declare networkName="$1"
declare namePrefix="$2"
declare numNodes="$3"
declare enableGpu="$4"
declare machineType="$5"
declare zone="$6"
declare optionalBootDiskSize="$7"
declare optionalStartupScript="$8"
declare optionalAddress="$9"
declare optionalBootDiskType="${10}"
declare -a nodes
if [[ $numNodes = 1 ]]; then
nodes=("$namePrefix")
else
for node in $(seq -f "${namePrefix}%0${#numNodes}g" 1 "$numNodes"); do
nodes+=("$node")
done
fi
nsgName=${networkName}-nsg
declare -a args
args=(
--resource-group "$networkName"
--tags testnet
--image UbuntuLTS
--size "$machineType"
--ssh-key-values "$(cat "${sshPrivateKey}".pub)"
--location "$zone"
--nsg "$nsgName"
)
if [[ -n $optionalBootDiskSize ]]; then
args+=(
--os-disk-size-gb "$optionalBootDiskSize"
)
fi
if [[ -n $optionalStartupScript ]]; then
args+=(
--custom-data "$optionalStartupScript"
)
fi
if [[ -n $optionalBootDiskType ]]; then
args+=(
--storage-sku "$optionalBootDiskType"
)
else
args+=(
--storage-sku StandardSSD_LRS
)
fi
if [[ -n $optionalAddress ]]; then
[[ $numNodes = 1 ]] || {
echo "Error: address may not be supplied when provisioning multiple nodes: $optionalAddress"
exit 1
}
args+=(
--public-ip-address "$optionalAddress"
)
fi
(
set -x
# For node in numNodes, create VM and put the creation process in the background with --no-wait
for nodeName in "${nodes[@]}"; do
az vm create --name "$nodeName" "${args[@]}" --no-wait
done
for nodeName in "${nodes[@]}"; do
az vm wait --created --name "$nodeName" --resource-group "$networkName" --verbose --timeout 600
done
# If GPU is to be enabled, install the appropriate extension
if $enableGpu; then
for nodeName in "${nodes[@]}"; do
az vm extension set \
--resource-group "$networkName" \
--vm-name "$nodeName" \
--name NvidiaGpuDriverLinux \
--publisher Microsoft.HpcCompute \
--version 1.2 \
--no-wait
done
# Wait until all nodes have GPU extension installed
for nodeName in "${nodes[@]}"; do
az vm wait --updated --name "$nodeName" --resource-group "$networkName" --verbose --timeout 600
done
fi
)
}
#
# cloud_DeleteInstances
#
# Deletes all the instances listed in the `instances` array
#
cloud_DeleteInstances() {
if [[ ${#instances[0]} -eq 0 ]]; then
echo No instances to delete
return
fi
declare names=("${instances[@]/:*/}")
(
set -x
id_list=()
# Build a space delimited list of all resource IDs to delete
for instance in "${names[@]}"; do
cloud_GetIdFromInstanceName "$instance"
id_list+=("$id")
done
# Delete all instances in the id_list and return once they are all deleted
az vm delete --ids "${id_list[@]}" --yes --verbose --no-wait
)
}
#
# cloud_WaitForInstanceReady [instanceName] [instanceIp] [instanceZone] [timeout]
#
# Return once the newly created VM instance is responding. This function is cloud-provider specific.
#
cloud_WaitForInstanceReady() {
declare instanceName="$1"
# declare instanceIp="$2" # unused
# declare instanceZone="$3" # unused
declare timeout="$4"
cloud_GetResourceGroupFromInstanceName "$instanceName"
az vm wait -g "$resourceGroup" -n "$instanceName" --created --interval 10 --timeout "$timeout"
}
#
# cloud_FetchFile [instanceName] [publicIp] [remoteFile] [localFile]
#
# Fetch a file from the given instance. This function uses a cloud-specific
# mechanism to fetch the file
#
cloud_FetchFile() {
declare instanceName="$1"
declare publicIp="$2"
declare remoteFile="$3"
declare localFile="$4"
cloud_GetConfigValueFromInstanceName "$instanceName" osProfile.adminUsername
scp "${config_value}@${publicIp}:${remoteFile}" "$localFile"
}
#
# cloud_CreateAndAttachPersistentDisk
#
# Not yet implemented for this cloud provider
cloud_CreateAndAttachPersistentDisk() {
echo "ERROR: cloud_CreateAndAttachPersistentDisk is not yet implemented for azure"
exit 1
}
#
# cloud_StatusAll
#
# Not yet implemented for this cloud provider
cloud_StatusAll() {
echo "ERROR: cloud_StatusAll is not yet implemented for azure"
}