Add support for preemptible GCP instances
This commit is contained in:
parent
60141e0c2c
commit
9267931ef6
19
net/gce.sh
19
net/gce.sh
|
@ -78,6 +78,7 @@ replicatorBootDiskSizeInGb=500
|
||||||
fullNodeAdditionalDiskSizeInGb=
|
fullNodeAdditionalDiskSizeInGb=
|
||||||
externalNodes=false
|
externalNodes=false
|
||||||
failOnValidatorBootupFailure=true
|
failOnValidatorBootupFailure=true
|
||||||
|
preemptible=true
|
||||||
|
|
||||||
publicNetwork=false
|
publicNetwork=false
|
||||||
letsEncryptDomainName=
|
letsEncryptDomainName=
|
||||||
|
@ -146,6 +147,11 @@ Manage testnet instances
|
||||||
- Add an additional [number] GB SSD to all fullnodes to store the config directory.
|
- Add an additional [number] GB SSD to all fullnodes to store the config directory.
|
||||||
If not set, config will be written to the boot disk by default.
|
If not set, config will be written to the boot disk by default.
|
||||||
Only supported on GCE.
|
Only supported on GCE.
|
||||||
|
--dedicated - Use dedicated instances for additional full nodes
|
||||||
|
(by default preemptible instances are used to reduce
|
||||||
|
cost). Note that the bootstrap leader, replicator,
|
||||||
|
blockstreamer and client nodes are always dedicated.
|
||||||
|
|
||||||
config-specific options:
|
config-specific options:
|
||||||
-P - Use public network IP addresses (default: $publicNetwork)
|
-P - Use public network IP addresses (default: $publicNetwork)
|
||||||
|
|
||||||
|
@ -180,6 +186,9 @@ while [[ -n $1 ]]; do
|
||||||
elif [[ $1 == --allow-boot-failures ]]; then
|
elif [[ $1 == --allow-boot-failures ]]; then
|
||||||
failOnValidatorBootupFailure=false
|
failOnValidatorBootupFailure=false
|
||||||
shift
|
shift
|
||||||
|
elif [[ $1 == --dedicated ]]; then
|
||||||
|
preemptible=false
|
||||||
|
shift
|
||||||
else
|
else
|
||||||
usage "Unknown long option: $1"
|
usage "Unknown long option: $1"
|
||||||
fi
|
fi
|
||||||
|
@ -378,6 +387,8 @@ EOF
|
||||||
|
|
||||||
buildSshOptions
|
buildSshOptions
|
||||||
|
|
||||||
|
cloud_RestartPreemptedInstances "$prefix"
|
||||||
|
|
||||||
fetchPrivateKey() {
|
fetchPrivateKey() {
|
||||||
declare nodeName
|
declare nodeName
|
||||||
declare nodeIp
|
declare nodeIp
|
||||||
|
@ -725,7 +736,7 @@ EOF
|
||||||
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
|
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
|
||||||
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
|
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
|
||||||
"$sshPrivateKey"
|
"never preemptible" "$sshPrivateKey"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ $additionalFullNodeCount -gt 0 ]]; then
|
if [[ $additionalFullNodeCount -gt 0 ]]; then
|
||||||
|
@ -746,7 +757,7 @@ EOF
|
||||||
cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
|
cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
|
||||||
"$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
|
"$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
|
||||||
"$startupScript" "" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
|
"$startupScript" "" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
|
||||||
"$sshPrivateKey" &
|
"$preemptible" "$sshPrivateKey" &
|
||||||
done
|
done
|
||||||
|
|
||||||
wait
|
wait
|
||||||
|
@ -755,7 +766,7 @@ EOF
|
||||||
if [[ $clientNodeCount -gt 0 ]]; then
|
if [[ $clientNodeCount -gt 0 ]]; then
|
||||||
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
|
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
|
||||||
"$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
|
"$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
|
||||||
"$startupScript" "" "$bootDiskType" "" "$sshPrivateKey"
|
"$startupScript" "" "$bootDiskType" "" "never preemptible" "$sshPrivateKey"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if $blockstreamer; then
|
if $blockstreamer; then
|
||||||
|
@ -767,7 +778,7 @@ EOF
|
||||||
if [[ $replicatorNodeCount -gt 0 ]]; then
|
if [[ $replicatorNodeCount -gt 0 ]]; then
|
||||||
cloud_CreateInstances "$prefix" "$prefix-replicator" "$replicatorNodeCount" \
|
cloud_CreateInstances "$prefix" "$prefix-replicator" "$replicatorNodeCount" \
|
||||||
false "$replicatorMachineType" "${zones[0]}" "$replicatorBootDiskSizeInGb" \
|
false "$replicatorMachineType" "${zones[0]}" "$replicatorBootDiskSizeInGb" \
|
||||||
"$startupScript" "" "" "" "$sshPrivateKey"
|
"$startupScript" "" "" "" "never preemptible" "$sshPrivateKey"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
$metricsWriteDatapoint "testnet-deploy net-create-complete=1"
|
$metricsWriteDatapoint "testnet-deploy net-create-complete=1"
|
||||||
|
|
26
net/net.sh
26
net/net.sh
|
@ -840,6 +840,32 @@ stop() {
|
||||||
echo "Stopping nodes took $SECONDS seconds"
|
echo "Stopping nodes took $SECONDS seconds"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
checkPremptibleInstances() {
|
||||||
|
# The fullnodeIpList nodes may be preemptible instances that can disappear at
|
||||||
|
# any time. Try to detect when a fullnode has been preempted to help the user
|
||||||
|
# out.
|
||||||
|
#
|
||||||
|
# Of course this isn't airtight as an instance could always disappear
|
||||||
|
# immediately after its successfully pinged.
|
||||||
|
for ipAddress in "${fullnodeIpList[@]}"; do
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
ping -o -t 4 "$ipAddress"
|
||||||
|
) || {
|
||||||
|
cat <<EOF
|
||||||
|
|
||||||
|
Warning: $ipAddress may have been preempted.
|
||||||
|
|
||||||
|
Run |./gce.sh config| to restart it
|
||||||
|
EOF
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
done
|
||||||
|
}
|
||||||
|
|
||||||
|
checkPremptibleInstances
|
||||||
|
|
||||||
case $command in
|
case $command in
|
||||||
restart)
|
restart)
|
||||||
prepare_deploy
|
prepare_deploy
|
||||||
|
|
|
@ -8,6 +8,10 @@ cloud_DefaultZone() {
|
||||||
echo "westus"
|
echo "westus"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cloud_RestartPreemptedInstances() {
|
||||||
|
: # Not implemented
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# __cloud_GetConfigValueFromInstanceName
|
# __cloud_GetConfigValueFromInstanceName
|
||||||
# Return a piece of configuration information about an instance
|
# Return a piece of configuration information about an instance
|
||||||
|
|
|
@ -16,6 +16,10 @@ cloud_DefaultZone() {
|
||||||
echo "Denver"
|
echo "Denver"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cloud_RestartPreemptedInstances() {
|
||||||
|
: # Not implemented
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# __cloud_FindInstances
|
# __cloud_FindInstances
|
||||||
#
|
#
|
||||||
|
@ -134,6 +138,7 @@ cloud_Initialize() {
|
||||||
# has been provisioned in the GCE region that is hosting `$zone`
|
# has been provisioned in the GCE region that is hosting `$zone`
|
||||||
# bootDiskType - Optional specify SSD or HDD boot disk
|
# bootDiskType - Optional specify SSD or HDD boot disk
|
||||||
# additionalDiskSize - Optional specify size of additional storage volume
|
# additionalDiskSize - Optional specify size of additional storage volume
|
||||||
|
# preemptible - Optionally request a preemptible instance ("true")
|
||||||
#
|
#
|
||||||
# Tip: use cloud_FindInstances to locate the instances once this function
|
# Tip: use cloud_FindInstances to locate the instances once this function
|
||||||
# returns
|
# returns
|
||||||
|
@ -149,7 +154,8 @@ cloud_CreateInstances() {
|
||||||
#declare optionalAddress="$9" # unused
|
#declare optionalAddress="$9" # unused
|
||||||
#declare optionalBootDiskType="${10}" # unused
|
#declare optionalBootDiskType="${10}" # unused
|
||||||
#declare optionalAdditionalDiskSize="${11}" # unused
|
#declare optionalAdditionalDiskSize="${11}" # unused
|
||||||
declare sshPrivateKey="${12}"
|
#declare optionalPreemptible="${12}" # unused
|
||||||
|
declare sshPrivateKey="${13}"
|
||||||
|
|
||||||
declare -a nodes
|
declare -a nodes
|
||||||
if [[ $numNodes = 1 ]]; then
|
if [[ $numNodes = 1 ]]; then
|
||||||
|
|
|
@ -7,6 +7,10 @@ cloud_DefaultZone() {
|
||||||
echo "us-east-1b"
|
echo "us-east-1b"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cloud_RestartPreemptedInstances() {
|
||||||
|
: # Not implemented
|
||||||
|
}
|
||||||
|
|
||||||
# AWS region is zone with the last character removed
|
# AWS region is zone with the last character removed
|
||||||
__cloud_GetRegion() {
|
__cloud_GetRegion() {
|
||||||
declare zone="$1"
|
declare zone="$1"
|
||||||
|
|
|
@ -8,6 +8,29 @@ cloud_DefaultZone() {
|
||||||
echo "us-west1-b"
|
echo "us-west1-b"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# cloud_RestartPreemptedInstances [namePrefix]
|
||||||
|
#
|
||||||
|
# Restart any preempted instances matching the specified prefix
|
||||||
|
#
|
||||||
|
# namePrefix - The instance name prefix of the preempted instances
|
||||||
|
#
|
||||||
|
cloud_RestartPreemptedInstances() {
|
||||||
|
declare filter="$1"
|
||||||
|
|
||||||
|
declare name status zone
|
||||||
|
while read -r name status zone; do
|
||||||
|
echo "Starting $status instance: $name"
|
||||||
|
(
|
||||||
|
set -x
|
||||||
|
gcloud compute instances start --zone "$zone" "$name"
|
||||||
|
)
|
||||||
|
done < <(gcloud compute instances list \
|
||||||
|
--filter "$filter" \
|
||||||
|
--format 'value(name,status,zone)' \
|
||||||
|
| grep TERMINATED)
|
||||||
|
}
|
||||||
|
|
||||||
#
|
#
|
||||||
# __cloud_FindInstances
|
# __cloud_FindInstances
|
||||||
#
|
#
|
||||||
|
@ -125,6 +148,7 @@ cloud_Initialize() {
|
||||||
# has been provisioned in the GCE region that is hosting `$zone`
|
# has been provisioned in the GCE region that is hosting `$zone`
|
||||||
# bootDiskType - Optional specify SSD or HDD boot disk
|
# bootDiskType - Optional specify SSD or HDD boot disk
|
||||||
# additionalDiskSize - Optional specify size of additional storage volume
|
# additionalDiskSize - Optional specify size of additional storage volume
|
||||||
|
# preemptible - Optionally request a preemptible instance ("true")
|
||||||
#
|
#
|
||||||
# Tip: use cloud_FindInstances to locate the instances once this function
|
# Tip: use cloud_FindInstances to locate the instances once this function
|
||||||
# returns
|
# returns
|
||||||
|
@ -140,6 +164,8 @@ cloud_CreateInstances() {
|
||||||
declare optionalAddress="$9"
|
declare optionalAddress="$9"
|
||||||
declare optionalBootDiskType="${10}"
|
declare optionalBootDiskType="${10}"
|
||||||
declare optionalAdditionalDiskSize="${11}"
|
declare optionalAdditionalDiskSize="${11}"
|
||||||
|
declare optionalPreemptible="${12}"
|
||||||
|
#declare sshPrivateKey="${13}" # unused
|
||||||
|
|
||||||
if $enableGpu; then
|
if $enableGpu; then
|
||||||
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
|
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
|
||||||
|
@ -176,6 +202,10 @@ cloud_CreateInstances() {
|
||||||
# shellcheck disable=SC2206 # Do not want to quote $imageName as it may contain extra args
|
# shellcheck disable=SC2206 # Do not want to quote $imageName as it may contain extra args
|
||||||
args+=(--image $imageName)
|
args+=(--image $imageName)
|
||||||
|
|
||||||
|
if [[ $optionalPreemptible = true ]]; then
|
||||||
|
args+=(--preemptible)
|
||||||
|
fi
|
||||||
|
|
||||||
# shellcheck disable=SC2206 # Do not want to quote $machineType as it may contain extra args
|
# shellcheck disable=SC2206 # Do not want to quote $machineType as it may contain extra args
|
||||||
for word in $machineType; do
|
for word in $machineType; do
|
||||||
# Special handling for the "--min-cpu-platform" argument which may contain a
|
# Special handling for the "--min-cpu-platform" argument which may contain a
|
||||||
|
|
Loading…
Reference in New Issue