Specify machine type without necessarily enabling GPU (#6529)

* Specifiy machine type without necessarily enabling GPU

* Make long arg, extend --enable-gpu to automation

* Set machine types only in one place

* Fixup

* Fixup flag in automation

* Typo

* shellcheck
This commit is contained in:
Dan Albert 2019-10-24 15:12:25 -06:00 committed by GitHub
parent 2de2fbd5e3
commit dadcb632d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 49 additions and 23 deletions

View File

@ -14,8 +14,6 @@ gce)
cpuBootstrapLeaderMachineType="--custom-cpu 12 --custom-memory 32GB --min-cpu-platform Intel%20Skylake"
gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=1,type=nvidia-tesla-p100"
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
validatorMachineType=$cpuBootstrapLeaderMachineType
clientMachineType="--custom-cpu 16 --custom-memory 20GB"
blockstreamerMachineType="--machine-type n1-standard-8"
archiverMachineType="--custom-cpu 4 --custom-memory 16GB"
@ -30,8 +28,6 @@ ec2)
# AVX-512 support. The default, p2.xlarge, does not support
# AVX-512
gpuBootstrapLeaderMachineType=p2.xlarge
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
validatorMachineType=$cpuBootstrapLeaderMachineType
clientMachineType=c5.2xlarge
blockstreamerMachineType=c5.2xlarge
archiverMachineType=c5.xlarge
@ -43,8 +39,6 @@ azure)
# TODO: Dial in machine types for Azure
cpuBootstrapLeaderMachineType=Standard_D16s_v3
gpuBootstrapLeaderMachineType=Standard_NC12
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
validatorMachineType=$cpuBootstrapLeaderMachineType
clientMachineType=Standard_D16s_v3
blockstreamerMachineType=Standard_D16s_v3
archiverMachineType=Standard_D4s_v3
@ -55,8 +49,6 @@ colo)
cpuBootstrapLeaderMachineType=0
gpuBootstrapLeaderMachineType=1
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
validatorMachineType=$cpuBootstrapLeaderMachineType
clientMachineType=0
blockstreamerMachineType=0
archiverMachineType=0
@ -84,6 +76,7 @@ evalInfo=false
publicNetwork=false
letsEncryptDomainName=
enableGpu=false
customMachineType=
customAddress=
zones=()
@ -131,8 +124,9 @@ Manage testnet instances
-r [number] - Number of archiver nodes (default: $archiverNodeCount)
-u - Include a Blockstreamer (default: $blockstreamer)
-P - Use public network IP addresses (default: $publicNetwork)
-g - Enable GPU (default: $enableGpu)
-G - Enable GPU, and set count/type of GPUs to use
-g - Enable GPU and automatically set validator machine types to $gpuBootstrapLeaderMachineType
(default: $enableGpu)
-G - Enable GPU, and set custom GPU machine type to use
(e.g $gpuBootstrapLeaderMachineType)
-a [address] - Address to be be assigned to the Blockstreamer if present,
otherwise the bootstrap validator.
@ -141,9 +135,14 @@ Manage testnet instances
* For EC2, [address] is the "allocation ID" of the desired
Elastic IP.
-d [disk-type] - Specify a boot disk type (default None) Use pd-ssd to get ssd on GCE.
--letsencrypt [dns name] - Attempt to generate a TLS certificate using this
DNS name (useful only when the -a and -P options
are also provided)
--letsencrypt [dns name]
- Attempt to generate a TLS certificate using this
DNS name (useful only when the -a and -P options
are also provided)
--custom-machine-type
- Set a custom machine type without assuming whether or not
GPU is enabled. Set this explicitly with --enable-gpu/-g to call out the presence of GPUs.
--enable-gpu - Use with --custom-machine-type to specify whether or not GPUs should be used/enabled
--validator-additional-disk-size-gb [number]
- Add an additional [number] GB SSD to all validators to store the config directory.
If not set, config will be written to the boot disk by default.
@ -195,6 +194,12 @@ while [[ -n $1 ]]; do
elif [[ $1 == --eval ]]; then
evalInfo=true
shift
elif [[ $1 == --enable-gpu ]]; then
enableGpu=true
shift
elif [[ $1 = --custom-machine-type ]]; then
customMachineType="$2"
shift 2
else
usage "Unknown long option: $1"
fi
@ -230,15 +235,10 @@ while getopts "h?p:Pn:c:r:z:gG:a:d:uxf" opt "${shortArgs[@]}"; do
;;
g)
enableGpu=true
bootstrapLeaderMachineType=$gpuBootstrapLeaderMachineType
validatorMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
;;
G)
enableGpu=true
bootstrapLeaderMachineType="$OPTARG"
validatorMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
customMachineType="$OPTARG"
;;
a)
customAddress=$OPTARG
@ -258,6 +258,16 @@ while getopts "h?p:Pn:c:r:z:gG:a:d:uxf" opt "${shortArgs[@]}"; do
esac
done
if [[ -n "$customMachineType" ]] ; then
bootstrapLeaderMachineType="$customMachineType"
elif [[ "$enableGpu" = "true" ]] ; then
bootstrapLeaderMachineType="$gpuBootstrapLeaderMachineType"
else
bootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType"
fi
validatorMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
[[ ${#zones[@]} -gt 0 ]] || zones+=("$(cloud_DefaultZone)")
[[ -z $1 ]] || usage "Unexpected argument: $1"

View File

@ -5,6 +5,7 @@ steps:
UPLOAD_RESULTS_TO_SLACK: "true"
CLOUD_PROVIDER: "colo"
TESTNET_TAG: "colo-edge-perf-gpu-enabled"
ENABLE_GPU: "true"
RAMP_UP_TIME: 0
TEST_DURATION_SECONDS: 600
NUMBER_OF_VALIDATOR_NODES: 4

View File

@ -8,6 +8,7 @@ steps:
RAMP_UP_TIME: 60
TEST_DURATION_SECONDS: 300
NUMBER_OF_VALIDATOR_NODES: 10
ENABLE_GPU: "false"
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16"
NUMBER_OF_CLIENT_NODES: 1
CLIENT_OPTIONS: "bench-tps=1=--tx_count 80000 --thread-batch-sleep-ms 1000"

View File

@ -8,6 +8,8 @@ steps:
RAMP_UP_TIME: 0
TEST_DURATION_SECONDS: 600
NUMBER_OF_VALIDATOR_NODES: 5
ENABLE_GPU: "false"
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16"
NUMBER_OF_CLIENT_NODES: 2
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"
TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a"

View File

@ -8,6 +8,7 @@ steps:
RAMP_UP_TIME: 0
TEST_DURATION_SECONDS: 600
NUMBER_OF_VALIDATOR_NODES: 5
ENABLE_GPU: "true"
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100"
NUMBER_OF_CLIENT_NODES: 2
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"

View File

@ -8,10 +8,11 @@ steps:
RAMP_UP_TIME: 0
TEST_DURATION_SECONDS: 600
NUMBER_OF_VALIDATOR_NODES: 50
ENABLE_GPU: "true"
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100"
NUMBER_OF_CLIENT_NODES: 2
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"
TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a"
ADDITIONAL_FLAGS: ""
ADDITIONAL_FLAGS: "--dedicated --allow-boot-failures"
agents:
- "queue=testnet-deploy"

View File

@ -74,16 +74,18 @@ function launchTestnet() {
case $CLOUD_PROVIDER in
gce)
# shellcheck disable=SC2068
# shellcheck disable=SC2086
net/gce.sh create \
-d pd-ssd \
-n "$NUMBER_OF_VALIDATOR_NODES" -c "$NUMBER_OF_CLIENT_NODES" \
"$maybeMachineType" "$VALIDATOR_NODE_MACHINE_TYPE" \
$maybeCustomMachineType $VALIDATOR_NODE_MACHINE_TYPE "$maybeEnableGpu" \
-p "$TESTNET_TAG" ${TESTNET_CLOUD_ZONES[@]/#/"-z "} ${ADDITIONAL_FLAGS[@]/#/" "}
;;
colo)
# shellcheck disable=SC2068
# shellcheck disable=SC2086
net/colo.sh create \
-n "$NUMBER_OF_VALIDATOR_NODES" -c "$NUMBER_OF_CLIENT_NODES" -g \
-n "$NUMBER_OF_VALIDATOR_NODES" -c "$NUMBER_OF_CLIENT_NODES" "$maybeEnableGpu" \
-p "$TESTNET_TAG" ${ADDITIONAL_FLAGS[@]/#/" "}
;;
*)
@ -169,6 +171,13 @@ if [[ -z $NUMBER_OF_VALIDATOR_NODES ]] ; then
exit 1
fi
if [[ -z $ENABLE_GPU ]] ; then
ENABLE_GPU=false
fi
if [[ "$ENABLE_GPU" = "true" ]] ; then
maybeEnableGpu="--enable-gpu"
fi
if [[ -z $NUMBER_OF_CLIENT_NODES ]] ; then
echo NUMBER_OF_CLIENT_NODES not defined
exit 1
@ -193,7 +202,7 @@ source ci/upload-ci-artifact.sh
source system-test/testnet-performance/upload_results_to_slack.sh
maybeClientOptions=${CLIENT_OPTIONS:+"-c"}
maybeMachineType=${VALIDATOR_NODE_MACHINE_TYPE:+"-G"}
maybeCustomMachineType=${VALIDATOR_NODE_MACHINE_TYPE:+"--custom-machine-type"}
IFS=, read -r -a TESTNET_CLOUD_ZONES <<<"${TESTNET_ZONES}"
@ -203,6 +212,7 @@ RESULT_DETAILS="Test failed to finish"
TEST_PARAMS_TO_DISPLAY=(CLOUD_PROVIDER \
NUMBER_OF_VALIDATOR_NODES \
ENABLE_GPU \
VALIDATOR_NODE_MACHINE_TYPE \
NUMBER_OF_CLIENT_NODES \
CLIENT_OPTIONS \