Specify machine type without necessarily enabling GPU (#6529)

* Specifiy machine type without necessarily enabling GPU

* Make long arg, extend --enable-gpu to automation

* Set machine types only in one place

* Fixup

* Fixup flag in automation

* Typo

* shellcheck
This commit is contained in:
Dan Albert 2019-10-24 15:12:25 -06:00 committed by GitHub
parent 2de2fbd5e3
commit dadcb632d8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 49 additions and 23 deletions

View File

@ -14,8 +14,6 @@ gce)
cpuBootstrapLeaderMachineType="--custom-cpu 12 --custom-memory 32GB --min-cpu-platform Intel%20Skylake" cpuBootstrapLeaderMachineType="--custom-cpu 12 --custom-memory 32GB --min-cpu-platform Intel%20Skylake"
gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=1,type=nvidia-tesla-p100" gpuBootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType --accelerator count=1,type=nvidia-tesla-p100"
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
validatorMachineType=$cpuBootstrapLeaderMachineType
clientMachineType="--custom-cpu 16 --custom-memory 20GB" clientMachineType="--custom-cpu 16 --custom-memory 20GB"
blockstreamerMachineType="--machine-type n1-standard-8" blockstreamerMachineType="--machine-type n1-standard-8"
archiverMachineType="--custom-cpu 4 --custom-memory 16GB" archiverMachineType="--custom-cpu 4 --custom-memory 16GB"
@ -30,8 +28,6 @@ ec2)
# AVX-512 support. The default, p2.xlarge, does not support # AVX-512 support. The default, p2.xlarge, does not support
# AVX-512 # AVX-512
gpuBootstrapLeaderMachineType=p2.xlarge gpuBootstrapLeaderMachineType=p2.xlarge
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
validatorMachineType=$cpuBootstrapLeaderMachineType
clientMachineType=c5.2xlarge clientMachineType=c5.2xlarge
blockstreamerMachineType=c5.2xlarge blockstreamerMachineType=c5.2xlarge
archiverMachineType=c5.xlarge archiverMachineType=c5.xlarge
@ -43,8 +39,6 @@ azure)
# TODO: Dial in machine types for Azure # TODO: Dial in machine types for Azure
cpuBootstrapLeaderMachineType=Standard_D16s_v3 cpuBootstrapLeaderMachineType=Standard_D16s_v3
gpuBootstrapLeaderMachineType=Standard_NC12 gpuBootstrapLeaderMachineType=Standard_NC12
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
validatorMachineType=$cpuBootstrapLeaderMachineType
clientMachineType=Standard_D16s_v3 clientMachineType=Standard_D16s_v3
blockstreamerMachineType=Standard_D16s_v3 blockstreamerMachineType=Standard_D16s_v3
archiverMachineType=Standard_D4s_v3 archiverMachineType=Standard_D4s_v3
@ -55,8 +49,6 @@ colo)
cpuBootstrapLeaderMachineType=0 cpuBootstrapLeaderMachineType=0
gpuBootstrapLeaderMachineType=1 gpuBootstrapLeaderMachineType=1
bootstrapLeaderMachineType=$cpuBootstrapLeaderMachineType
validatorMachineType=$cpuBootstrapLeaderMachineType
clientMachineType=0 clientMachineType=0
blockstreamerMachineType=0 blockstreamerMachineType=0
archiverMachineType=0 archiverMachineType=0
@ -84,6 +76,7 @@ evalInfo=false
publicNetwork=false publicNetwork=false
letsEncryptDomainName= letsEncryptDomainName=
enableGpu=false enableGpu=false
customMachineType=
customAddress= customAddress=
zones=() zones=()
@ -131,8 +124,9 @@ Manage testnet instances
-r [number] - Number of archiver nodes (default: $archiverNodeCount) -r [number] - Number of archiver nodes (default: $archiverNodeCount)
-u - Include a Blockstreamer (default: $blockstreamer) -u - Include a Blockstreamer (default: $blockstreamer)
-P - Use public network IP addresses (default: $publicNetwork) -P - Use public network IP addresses (default: $publicNetwork)
-g - Enable GPU (default: $enableGpu) -g - Enable GPU and automatically set validator machine types to $gpuBootstrapLeaderMachineType
-G - Enable GPU, and set count/type of GPUs to use (default: $enableGpu)
-G - Enable GPU, and set custom GPU machine type to use
(e.g $gpuBootstrapLeaderMachineType) (e.g $gpuBootstrapLeaderMachineType)
-a [address] - Address to be be assigned to the Blockstreamer if present, -a [address] - Address to be be assigned to the Blockstreamer if present,
otherwise the bootstrap validator. otherwise the bootstrap validator.
@ -141,9 +135,14 @@ Manage testnet instances
* For EC2, [address] is the "allocation ID" of the desired * For EC2, [address] is the "allocation ID" of the desired
Elastic IP. Elastic IP.
-d [disk-type] - Specify a boot disk type (default None) Use pd-ssd to get ssd on GCE. -d [disk-type] - Specify a boot disk type (default None) Use pd-ssd to get ssd on GCE.
--letsencrypt [dns name] - Attempt to generate a TLS certificate using this --letsencrypt [dns name]
DNS name (useful only when the -a and -P options - Attempt to generate a TLS certificate using this
are also provided) DNS name (useful only when the -a and -P options
are also provided)
--custom-machine-type
- Set a custom machine type without assuming whether or not
GPU is enabled. Set this explicitly with --enable-gpu/-g to call out the presence of GPUs.
--enable-gpu - Use with --custom-machine-type to specify whether or not GPUs should be used/enabled
--validator-additional-disk-size-gb [number] --validator-additional-disk-size-gb [number]
- Add an additional [number] GB SSD to all validators to store the config directory. - Add an additional [number] GB SSD to all validators to store the config directory.
If not set, config will be written to the boot disk by default. If not set, config will be written to the boot disk by default.
@ -195,6 +194,12 @@ while [[ -n $1 ]]; do
elif [[ $1 == --eval ]]; then elif [[ $1 == --eval ]]; then
evalInfo=true evalInfo=true
shift shift
elif [[ $1 == --enable-gpu ]]; then
enableGpu=true
shift
elif [[ $1 = --custom-machine-type ]]; then
customMachineType="$2"
shift 2
else else
usage "Unknown long option: $1" usage "Unknown long option: $1"
fi fi
@ -230,15 +235,10 @@ while getopts "h?p:Pn:c:r:z:gG:a:d:uxf" opt "${shortArgs[@]}"; do
;; ;;
g) g)
enableGpu=true enableGpu=true
bootstrapLeaderMachineType=$gpuBootstrapLeaderMachineType
validatorMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
;; ;;
G) G)
enableGpu=true enableGpu=true
bootstrapLeaderMachineType="$OPTARG" customMachineType="$OPTARG"
validatorMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
;; ;;
a) a)
customAddress=$OPTARG customAddress=$OPTARG
@ -258,6 +258,16 @@ while getopts "h?p:Pn:c:r:z:gG:a:d:uxf" opt "${shortArgs[@]}"; do
esac esac
done done
if [[ -n "$customMachineType" ]] ; then
bootstrapLeaderMachineType="$customMachineType"
elif [[ "$enableGpu" = "true" ]] ; then
bootstrapLeaderMachineType="$gpuBootstrapLeaderMachineType"
else
bootstrapLeaderMachineType="$cpuBootstrapLeaderMachineType"
fi
validatorMachineType=$bootstrapLeaderMachineType
blockstreamerMachineType=$bootstrapLeaderMachineType
[[ ${#zones[@]} -gt 0 ]] || zones+=("$(cloud_DefaultZone)") [[ ${#zones[@]} -gt 0 ]] || zones+=("$(cloud_DefaultZone)")
[[ -z $1 ]] || usage "Unexpected argument: $1" [[ -z $1 ]] || usage "Unexpected argument: $1"

View File

@ -5,6 +5,7 @@ steps:
UPLOAD_RESULTS_TO_SLACK: "true" UPLOAD_RESULTS_TO_SLACK: "true"
CLOUD_PROVIDER: "colo" CLOUD_PROVIDER: "colo"
TESTNET_TAG: "colo-edge-perf-gpu-enabled" TESTNET_TAG: "colo-edge-perf-gpu-enabled"
ENABLE_GPU: "true"
RAMP_UP_TIME: 0 RAMP_UP_TIME: 0
TEST_DURATION_SECONDS: 600 TEST_DURATION_SECONDS: 600
NUMBER_OF_VALIDATOR_NODES: 4 NUMBER_OF_VALIDATOR_NODES: 4

View File

@ -8,6 +8,7 @@ steps:
RAMP_UP_TIME: 60 RAMP_UP_TIME: 60
TEST_DURATION_SECONDS: 300 TEST_DURATION_SECONDS: 300
NUMBER_OF_VALIDATOR_NODES: 10 NUMBER_OF_VALIDATOR_NODES: 10
ENABLE_GPU: "false"
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16" VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16"
NUMBER_OF_CLIENT_NODES: 1 NUMBER_OF_CLIENT_NODES: 1
CLIENT_OPTIONS: "bench-tps=1=--tx_count 80000 --thread-batch-sleep-ms 1000" CLIENT_OPTIONS: "bench-tps=1=--tx_count 80000 --thread-batch-sleep-ms 1000"

View File

@ -8,6 +8,8 @@ steps:
RAMP_UP_TIME: 0 RAMP_UP_TIME: 0
TEST_DURATION_SECONDS: 600 TEST_DURATION_SECONDS: 600
NUMBER_OF_VALIDATOR_NODES: 5 NUMBER_OF_VALIDATOR_NODES: 5
ENABLE_GPU: "false"
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16"
NUMBER_OF_CLIENT_NODES: 2 NUMBER_OF_CLIENT_NODES: 2
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250" CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"
TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a" TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a"

View File

@ -8,6 +8,7 @@ steps:
RAMP_UP_TIME: 0 RAMP_UP_TIME: 0
TEST_DURATION_SECONDS: 600 TEST_DURATION_SECONDS: 600
NUMBER_OF_VALIDATOR_NODES: 5 NUMBER_OF_VALIDATOR_NODES: 5
ENABLE_GPU: "true"
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100"
NUMBER_OF_CLIENT_NODES: 2 NUMBER_OF_CLIENT_NODES: 2
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250" CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"

View File

@ -8,10 +8,11 @@ steps:
RAMP_UP_TIME: 0 RAMP_UP_TIME: 0
TEST_DURATION_SECONDS: 600 TEST_DURATION_SECONDS: 600
NUMBER_OF_VALIDATOR_NODES: 50 NUMBER_OF_VALIDATOR_NODES: 50
ENABLE_GPU: "true"
VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" VALIDATOR_NODE_MACHINE_TYPE: "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100"
NUMBER_OF_CLIENT_NODES: 2 NUMBER_OF_CLIENT_NODES: 2
CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250" CLIENT_OPTIONS: "bench-tps=2=--tx_count 15000 --thread-batch-sleep-ms 250"
TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a" TESTNET_ZONES: "us-west1-a,us-west1-b,us-central1-a,europe-west4-a"
ADDITIONAL_FLAGS: "" ADDITIONAL_FLAGS: "--dedicated --allow-boot-failures"
agents: agents:
- "queue=testnet-deploy" - "queue=testnet-deploy"

View File

@ -74,16 +74,18 @@ function launchTestnet() {
case $CLOUD_PROVIDER in case $CLOUD_PROVIDER in
gce) gce)
# shellcheck disable=SC2068 # shellcheck disable=SC2068
# shellcheck disable=SC2086
net/gce.sh create \ net/gce.sh create \
-d pd-ssd \ -d pd-ssd \
-n "$NUMBER_OF_VALIDATOR_NODES" -c "$NUMBER_OF_CLIENT_NODES" \ -n "$NUMBER_OF_VALIDATOR_NODES" -c "$NUMBER_OF_CLIENT_NODES" \
"$maybeMachineType" "$VALIDATOR_NODE_MACHINE_TYPE" \ $maybeCustomMachineType $VALIDATOR_NODE_MACHINE_TYPE "$maybeEnableGpu" \
-p "$TESTNET_TAG" ${TESTNET_CLOUD_ZONES[@]/#/"-z "} ${ADDITIONAL_FLAGS[@]/#/" "} -p "$TESTNET_TAG" ${TESTNET_CLOUD_ZONES[@]/#/"-z "} ${ADDITIONAL_FLAGS[@]/#/" "}
;; ;;
colo) colo)
# shellcheck disable=SC2068 # shellcheck disable=SC2068
# shellcheck disable=SC2086
net/colo.sh create \ net/colo.sh create \
-n "$NUMBER_OF_VALIDATOR_NODES" -c "$NUMBER_OF_CLIENT_NODES" -g \ -n "$NUMBER_OF_VALIDATOR_NODES" -c "$NUMBER_OF_CLIENT_NODES" "$maybeEnableGpu" \
-p "$TESTNET_TAG" ${ADDITIONAL_FLAGS[@]/#/" "} -p "$TESTNET_TAG" ${ADDITIONAL_FLAGS[@]/#/" "}
;; ;;
*) *)
@ -169,6 +171,13 @@ if [[ -z $NUMBER_OF_VALIDATOR_NODES ]] ; then
exit 1 exit 1
fi fi
if [[ -z $ENABLE_GPU ]] ; then
ENABLE_GPU=false
fi
if [[ "$ENABLE_GPU" = "true" ]] ; then
maybeEnableGpu="--enable-gpu"
fi
if [[ -z $NUMBER_OF_CLIENT_NODES ]] ; then if [[ -z $NUMBER_OF_CLIENT_NODES ]] ; then
echo NUMBER_OF_CLIENT_NODES not defined echo NUMBER_OF_CLIENT_NODES not defined
exit 1 exit 1
@ -193,7 +202,7 @@ source ci/upload-ci-artifact.sh
source system-test/testnet-performance/upload_results_to_slack.sh source system-test/testnet-performance/upload_results_to_slack.sh
maybeClientOptions=${CLIENT_OPTIONS:+"-c"} maybeClientOptions=${CLIENT_OPTIONS:+"-c"}
maybeMachineType=${VALIDATOR_NODE_MACHINE_TYPE:+"-G"} maybeCustomMachineType=${VALIDATOR_NODE_MACHINE_TYPE:+"--custom-machine-type"}
IFS=, read -r -a TESTNET_CLOUD_ZONES <<<"${TESTNET_ZONES}" IFS=, read -r -a TESTNET_CLOUD_ZONES <<<"${TESTNET_ZONES}"
@ -203,6 +212,7 @@ RESULT_DETAILS="Test failed to finish"
TEST_PARAMS_TO_DISPLAY=(CLOUD_PROVIDER \ TEST_PARAMS_TO_DISPLAY=(CLOUD_PROVIDER \
NUMBER_OF_VALIDATOR_NODES \ NUMBER_OF_VALIDATOR_NODES \
ENABLE_GPU \
VALIDATOR_NODE_MACHINE_TYPE \ VALIDATOR_NODE_MACHINE_TYPE \
NUMBER_OF_CLIENT_NODES \ NUMBER_OF_CLIENT_NODES \
CLIENT_OPTIONS \ CLIENT_OPTIONS \