From 5ac173d2089e40a70f84192a1e31801d43b101c8 Mon Sep 17 00:00:00 2001 From: Dan Albert Date: Fri, 12 Jul 2019 09:38:47 -0600 Subject: [PATCH] Enable GPUs and secondary disks for TdS net, pull external account file (#5031) * Enable V100 GPUs over 3 regions for TdS cluster * Turn on secondary config-local drive for tds net * Enable long args bypass for GPU machine details * bypass quoted long arg * Pull external account file from wget * typo * Symlink config-local instead of changing the path variables * Fix link path --- ci/testnet-deploy.sh | 3 +++ ci/testnet-manager.sh | 20 ++++++++++++++++---- multinode-demo/common.sh | 9 ++++++--- net/gce.sh | 3 +++ 4 files changed, 28 insertions(+), 7 deletions(-) diff --git a/ci/testnet-deploy.sh b/ci/testnet-deploy.sh index 8a8bcb6151..39bf332847 100755 --- a/ci/testnet-deploy.sh +++ b/ci/testnet-deploy.sh @@ -119,6 +119,9 @@ while [[ -n $1 ]]; do elif [[ $1 = --fullnode-additional-disk-size-gb ]]; then maybeFullnodeAdditionalDiskSize="$1 $2" shift 2 + elif [[ $1 == --machine-type* ]]; then # Bypass quoted long args for GPUs + shortArgs+=("$1") + shift else usage "Unknown long option: $1" fi diff --git a/ci/testnet-manager.sh b/ci/testnet-manager.sh index a37d381122..ca06fa558e 100755 --- a/ci/testnet-manager.sh +++ b/ci/testnet-manager.sh @@ -469,12 +469,23 @@ deploy() { tds) ( set -x - # TODO: Should we spread the few nodes around multiple zones? + + EXTERNAL_ACCOUNTS_FILE_URL=https://raw.githubusercontent.com/solana-labs/tour-de-sol/master/stage1/validator.yml + EXTERNAL_ACCOUNTS_FILE=/tmp/validator.yml + + wget ${EXTERNAL_ACCOUNTS_FILE_URL} -O ${EXTERNAL_ACCOUNTS_FILE} + + # Multiple V100 GPUs are available in us-west1, us-central1 and europe-west4 # shellcheck disable=SC2068 # shellcheck disable=SC2086 NO_LEDGER_VERIFY=1 \ NO_VALIDATOR_SANITY=1 \ - ci/testnet-deploy.sh -p tds-solana-com -C gce ${GCE_ZONE_ARGS[0]} \ + ci/testnet-deploy.sh -p tds-solana-com -C gce \ + -G "--machine-type n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \ + -d pd-ssd \ + -z us-west1-a \ + -z us-central1-a \ + -z europe-west4-a \ -t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 1 -P -u \ -a tds-solana-com --letsencrypt tds.solana.com \ --hashes-per-tick auto \ @@ -483,9 +494,10 @@ deploy() { ${maybeStop:+-S} \ ${maybeDelete:+-D} \ --stake-internal-nodes 1000000000000 \ - --external-accounts-file /tmp/stakes.yml \ + --external-accounts-file "$EXTERNAL_ACCOUNTS_FILE" \ --lamports 8589934592000000000 \ - --skip-deploy-update + --skip-deploy-update \ + --fullnode-additional-disk-size-gb 32000 ) ;; diff --git a/multinode-demo/common.sh b/multinode-demo/common.sh index 139a9556f6..eefbcdb7b6 100644 --- a/multinode-demo/common.sh +++ b/multinode-demo/common.sh @@ -70,11 +70,14 @@ source "$SOLANA_ROOT"/scripts/configure-metrics.sh SOLANA_RSYNC_CONFIG_DIR=$SOLANA_ROOT/config # Configuration that remains local +SOLANA_CONFIG_DIR=$SOLANA_ROOT/config-local + +# If there is a secondary disk, symlink the config-local dir there SECONDARY_DISK_MOUNT_POINT=/mnt/extra-disk if [[ -d $SECONDARY_DISK_MOUNT_POINT ]]; then - SOLANA_CONFIG_DIR=$SECONDARY_DISK_MOUNT_POINT/config-local -else - SOLANA_CONFIG_DIR=$SOLANA_ROOT/config-local + mkdir -p $SECONDARY_DISK_MOUNT_POINT/config-local + mkdir -p "$SOLANA_ROOT" + ln -s $SECONDARY_DISK_MOUNT_POINT/config-local "$SOLANA_ROOT" fi default_arg() { diff --git a/net/gce.sh b/net/gce.sh index 09c7834d57..a71fb9549c 100755 --- a/net/gce.sh +++ b/net/gce.sh @@ -159,6 +159,9 @@ while [[ -n $1 ]]; do elif [[ $1 = --fullnode-additional-disk-size-gb ]]; then fullNodeAdditionalDiskSizeInGb="$2" shift 2 + elif [[ $1 == --machine-type* ]]; then # Bypass quoted long args for GPUs + shortArgs+=("$1") + shift else usage "Unknown long option: $1" fi