Add support for preemptible GCP instances
This commit is contained in:
parent
60141e0c2c
commit
9267931ef6
19
net/gce.sh
19
net/gce.sh
|
@ -78,6 +78,7 @@ replicatorBootDiskSizeInGb=500
|
|||
fullNodeAdditionalDiskSizeInGb=
|
||||
externalNodes=false
|
||||
failOnValidatorBootupFailure=true
|
||||
preemptible=true
|
||||
|
||||
publicNetwork=false
|
||||
letsEncryptDomainName=
|
||||
|
@ -146,6 +147,11 @@ Manage testnet instances
|
|||
- Add an additional [number] GB SSD to all fullnodes to store the config directory.
|
||||
If not set, config will be written to the boot disk by default.
|
||||
Only supported on GCE.
|
||||
--dedicated - Use dedicated instances for additional full nodes
|
||||
(by default preemptible instances are used to reduce
|
||||
cost). Note that the bootstrap leader, replicator,
|
||||
blockstreamer and client nodes are always dedicated.
|
||||
|
||||
config-specific options:
|
||||
-P - Use public network IP addresses (default: $publicNetwork)
|
||||
|
||||
|
@ -180,6 +186,9 @@ while [[ -n $1 ]]; do
|
|||
elif [[ $1 == --allow-boot-failures ]]; then
|
||||
failOnValidatorBootupFailure=false
|
||||
shift
|
||||
elif [[ $1 == --dedicated ]]; then
|
||||
preemptible=false
|
||||
shift
|
||||
else
|
||||
usage "Unknown long option: $1"
|
||||
fi
|
||||
|
@ -378,6 +387,8 @@ EOF
|
|||
|
||||
buildSshOptions
|
||||
|
||||
cloud_RestartPreemptedInstances "$prefix"
|
||||
|
||||
fetchPrivateKey() {
|
||||
declare nodeName
|
||||
declare nodeIp
|
||||
|
@ -725,7 +736,7 @@ EOF
|
|||
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
|
||||
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
|
||||
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
|
||||
"$sshPrivateKey"
|
||||
"never preemptible" "$sshPrivateKey"
|
||||
fi
|
||||
|
||||
if [[ $additionalFullNodeCount -gt 0 ]]; then
|
||||
|
@ -746,7 +757,7 @@ EOF
|
|||
cloud_CreateInstances "$prefix" "$prefix-$zone-fullnode" "$numNodesPerZone" \
|
||||
"$enableGpu" "$fullNodeMachineType" "$zone" "$fullNodeBootDiskSizeInGb" \
|
||||
"$startupScript" "" "$bootDiskType" "$fullNodeAdditionalDiskSizeInGb" \
|
||||
"$sshPrivateKey" &
|
||||
"$preemptible" "$sshPrivateKey" &
|
||||
done
|
||||
|
||||
wait
|
||||
|
@ -755,7 +766,7 @@ EOF
|
|||
if [[ $clientNodeCount -gt 0 ]]; then
|
||||
cloud_CreateInstances "$prefix" "$prefix-client" "$clientNodeCount" \
|
||||
"$enableGpu" "$clientMachineType" "${zones[0]}" "$clientBootDiskSizeInGb" \
|
||||
"$startupScript" "" "$bootDiskType" "" "$sshPrivateKey"
|
||||
"$startupScript" "" "$bootDiskType" "" "never preemptible" "$sshPrivateKey"
|
||||
fi
|
||||
|
||||
if $blockstreamer; then
|
||||
|
@ -767,7 +778,7 @@ EOF
|
|||
if [[ $replicatorNodeCount -gt 0 ]]; then
|
||||
cloud_CreateInstances "$prefix" "$prefix-replicator" "$replicatorNodeCount" \
|
||||
false "$replicatorMachineType" "${zones[0]}" "$replicatorBootDiskSizeInGb" \
|
||||
"$startupScript" "" "" "" "$sshPrivateKey"
|
||||
"$startupScript" "" "" "" "never preemptible" "$sshPrivateKey"
|
||||
fi
|
||||
|
||||
$metricsWriteDatapoint "testnet-deploy net-create-complete=1"
|
||||
|
|
26
net/net.sh
26
net/net.sh
|
@ -840,6 +840,32 @@ stop() {
|
|||
echo "Stopping nodes took $SECONDS seconds"
|
||||
}
|
||||
|
||||
|
||||
checkPremptibleInstances() {
|
||||
# The fullnodeIpList nodes may be preemptible instances that can disappear at
|
||||
# any time. Try to detect when a fullnode has been preempted to help the user
|
||||
# out.
|
||||
#
|
||||
# Of course this isn't airtight as an instance could always disappear
|
||||
# immediately after its successfully pinged.
|
||||
for ipAddress in "${fullnodeIpList[@]}"; do
|
||||
(
|
||||
set -x
|
||||
ping -o -t 4 "$ipAddress"
|
||||
) || {
|
||||
cat <<EOF
|
||||
|
||||
Warning: $ipAddress may have been preempted.
|
||||
|
||||
Run |./gce.sh config| to restart it
|
||||
EOF
|
||||
exit 1
|
||||
}
|
||||
done
|
||||
}
|
||||
|
||||
checkPremptibleInstances
|
||||
|
||||
case $command in
|
||||
restart)
|
||||
prepare_deploy
|
||||
|
|
|
@ -8,6 +8,10 @@ cloud_DefaultZone() {
|
|||
echo "westus"
|
||||
}
|
||||
|
||||
cloud_RestartPreemptedInstances() {
|
||||
: # Not implemented
|
||||
}
|
||||
|
||||
#
|
||||
# __cloud_GetConfigValueFromInstanceName
|
||||
# Return a piece of configuration information about an instance
|
||||
|
|
|
@ -16,6 +16,10 @@ cloud_DefaultZone() {
|
|||
echo "Denver"
|
||||
}
|
||||
|
||||
cloud_RestartPreemptedInstances() {
|
||||
: # Not implemented
|
||||
}
|
||||
|
||||
#
|
||||
# __cloud_FindInstances
|
||||
#
|
||||
|
@ -134,6 +138,7 @@ cloud_Initialize() {
|
|||
# has been provisioned in the GCE region that is hosting `$zone`
|
||||
# bootDiskType - Optional specify SSD or HDD boot disk
|
||||
# additionalDiskSize - Optional specify size of additional storage volume
|
||||
# preemptible - Optionally request a preemptible instance ("true")
|
||||
#
|
||||
# Tip: use cloud_FindInstances to locate the instances once this function
|
||||
# returns
|
||||
|
@ -149,7 +154,8 @@ cloud_CreateInstances() {
|
|||
#declare optionalAddress="$9" # unused
|
||||
#declare optionalBootDiskType="${10}" # unused
|
||||
#declare optionalAdditionalDiskSize="${11}" # unused
|
||||
declare sshPrivateKey="${12}"
|
||||
#declare optionalPreemptible="${12}" # unused
|
||||
declare sshPrivateKey="${13}"
|
||||
|
||||
declare -a nodes
|
||||
if [[ $numNodes = 1 ]]; then
|
||||
|
|
|
@ -7,6 +7,10 @@ cloud_DefaultZone() {
|
|||
echo "us-east-1b"
|
||||
}
|
||||
|
||||
cloud_RestartPreemptedInstances() {
|
||||
: # Not implemented
|
||||
}
|
||||
|
||||
# AWS region is zone with the last character removed
|
||||
__cloud_GetRegion() {
|
||||
declare zone="$1"
|
||||
|
|
|
@ -8,6 +8,29 @@ cloud_DefaultZone() {
|
|||
echo "us-west1-b"
|
||||
}
|
||||
|
||||
#
|
||||
# cloud_RestartPreemptedInstances [namePrefix]
|
||||
#
|
||||
# Restart any preempted instances matching the specified prefix
|
||||
#
|
||||
# namePrefix - The instance name prefix of the preempted instances
|
||||
#
|
||||
cloud_RestartPreemptedInstances() {
|
||||
declare filter="$1"
|
||||
|
||||
declare name status zone
|
||||
while read -r name status zone; do
|
||||
echo "Starting $status instance: $name"
|
||||
(
|
||||
set -x
|
||||
gcloud compute instances start --zone "$zone" "$name"
|
||||
)
|
||||
done < <(gcloud compute instances list \
|
||||
--filter "$filter" \
|
||||
--format 'value(name,status,zone)' \
|
||||
| grep TERMINATED)
|
||||
}
|
||||
|
||||
#
|
||||
# __cloud_FindInstances
|
||||
#
|
||||
|
@ -125,6 +148,7 @@ cloud_Initialize() {
|
|||
# has been provisioned in the GCE region that is hosting `$zone`
|
||||
# bootDiskType - Optional specify SSD or HDD boot disk
|
||||
# additionalDiskSize - Optional specify size of additional storage volume
|
||||
# preemptible - Optionally request a preemptible instance ("true")
|
||||
#
|
||||
# Tip: use cloud_FindInstances to locate the instances once this function
|
||||
# returns
|
||||
|
@ -140,6 +164,8 @@ cloud_CreateInstances() {
|
|||
declare optionalAddress="$9"
|
||||
declare optionalBootDiskType="${10}"
|
||||
declare optionalAdditionalDiskSize="${11}"
|
||||
declare optionalPreemptible="${12}"
|
||||
#declare sshPrivateKey="${13}" # unused
|
||||
|
||||
if $enableGpu; then
|
||||
# Custom Ubuntu 18.04 LTS image with CUDA 9.2 and CUDA 10.0 installed
|
||||
|
@ -176,6 +202,10 @@ cloud_CreateInstances() {
|
|||
# shellcheck disable=SC2206 # Do not want to quote $imageName as it may contain extra args
|
||||
args+=(--image $imageName)
|
||||
|
||||
if [[ $optionalPreemptible = true ]]; then
|
||||
args+=(--preemptible)
|
||||
fi
|
||||
|
||||
# shellcheck disable=SC2206 # Do not want to quote $machineType as it may contain extra args
|
||||
for word in $machineType; do
|
||||
# Special handling for the "--min-cpu-platform" argument which may contain a
|
||||
|
|
Loading…
Reference in New Issue