Add AWS-based nets

This commit is contained in:
Michael Vines 2018-11-06 19:23:20 -08:00
parent 2af5aad032
commit dd4fb7aa90
5 changed files with 159 additions and 37 deletions

View File

@ -21,12 +21,13 @@ usage() {
echo "Error: $*" echo "Error: $*"
fi fi
cat <<EOF cat <<EOF
usage: $0 [name] [zone] [options...] usage: $0 [name] [cloud] [zone] [options...]
Deploys a CD testnet Deploys a CD testnet
name - name of the network name - name of the network
zone - zone to deploy the network into cloud - cloud provider to use (gce, ec2)
zone - cloud provider zone to deploy the network into
options: options:
-s edge|beta|stable - Deploy the specified Snap release channel -s edge|beta|stable - Deploy the specified Snap release channel
@ -48,10 +49,12 @@ EOF
} }
netName=$1 netName=$1
zone=$2 cloudProvider=$2
zone=$3
[[ -n $netName ]] || usage [[ -n $netName ]] || usage
[[ -n $cloudProvider ]] || usage "Cloud provider not specified"
[[ -n $zone ]] || usage "Zone not specified" [[ -n $zone ]] || usage "Zone not specified"
shift 2 shift 3
while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do
case $opt in case $opt in
@ -108,7 +111,7 @@ while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do
done done
gce_create_args=( create_args=(
-a "$leaderAddress" -a "$leaderAddress"
-c "$clientNodeCount" -c "$clientNodeCount"
-n "$validatorNodeCount" -n "$validatorNodeCount"
@ -118,26 +121,26 @@ gce_create_args=(
if $enableGpu; then if $enableGpu; then
if [[ -z $leaderMachineType ]]; then if [[ -z $leaderMachineType ]]; then
gce_create_args+=(-g) create_args+=(-g)
else else
gce_create_args+=(-G "$leaderMachineType") create_args+=(-G "$leaderMachineType")
fi fi
fi fi
if $publicNetwork; then if $publicNetwork; then
gce_create_args+=(-P) create_args+=(-P)
fi fi
set -x set -x
echo --- gce.sh delete echo "--- $cloudProvider.sh delete"
time net/gce.sh delete -z "$zone" -p "$netName" time net/"$cloudProvider".sh delete -z "$zone" -p "$netName"
if $delete; then if $delete; then
exit 0 exit 0
fi fi
echo --- gce.sh create echo "--- $cloudProvider.sh create"
time net/gce.sh create "${gce_create_args[@]}" time net/"$cloudProvider".sh create "${create_args[@]}"
net/init-metrics.sh -e net/init-metrics.sh -e
echo --- net.sh start echo --- net.sh start

View File

@ -8,7 +8,7 @@ if [[ -z $BUILDKITE ]]; then
fi fi
if [[ -z $SOLANA_METRICS_PARTIAL_CONFIG ]]; then if [[ -z $SOLANA_METRICS_PARTIAL_CONFIG ]]; then
echo SOLANA_METRICS_CONFIG not defined echo SOLANA_METRICS_PARTIAL_CONFIG not defined
exit 1 exit 1
fi fi
@ -37,6 +37,14 @@ steps:
value: "testnet-master" value: "testnet-master"
- label: "testnet-master-perf" - label: "testnet-master-perf"
value: "testnet-master-perf" value: "testnet-master-perf"
- label: "testnet-edge"
value: "testnet-edge"
- label: "testnet-edge-perf"
value: "testnet-edge-perf"
- label: "testnet-beta"
value: "testnet-beta"
- label: "testnet-beta-perf"
value: "testnet-beta-perf"
- select: "Operation" - select: "Operation"
key: "testnet-operation" key: "testnet-operation"
default: "sanity-or-restart" default: "sanity-or-restart"
@ -64,10 +72,14 @@ ci/channel-info.sh
eval "$(ci/channel-info.sh)" eval "$(ci/channel-info.sh)"
case $TESTNET in case $TESTNET in
testnet-master|testnet-master-perf) testnet-edge|testnet-edge-perf|testnet-master|testnet-master-perf)
CHANNEL_OR_TAG=edge CHANNEL_OR_TAG=edge
CHANNEL_BRANCH=$EDGE_CHANNEL CHANNEL_BRANCH=$EDGE_CHANNEL
;; ;;
testnet-beta|testnet-beta-perf)
CHANNEL_OR_TAG=beta
CHANNEL_BRANCH=$BETA_CHANNEL
;;
testnet|testnet-perf) testnet|testnet-perf)
if [[ -n $BETA_CHANNEL_LATEST_TAG ]]; then if [[ -n $BETA_CHANNEL_LATEST_TAG ]]; then
CHANNEL_OR_TAG=$BETA_CHANNEL_LATEST_TAG CHANNEL_OR_TAG=$BETA_CHANNEL_LATEST_TAG
@ -103,20 +115,67 @@ fi
sanity() { sanity() {
echo "--- sanity $TESTNET" echo "--- sanity $TESTNET"
case $TESTNET in case $TESTNET in
testnet-master-perf) testnet-edge)
# shellcheck disable=2030
# shellcheck disable=2031
( (
set -ex set -ex
ci/testnet-sanity.sh master-perf-testnet-solana-com us-west1-b export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh edge-testnet-solana-com ec2 us-east-1b
) )
;; ;;
testnet-master) testnet-edge-perf)
# shellcheck disable=2030 # shellcheck disable=2030
# shellcheck disable=2031
( (
set -ex set -ex
export REJECT_EXTRA_NODES=1 export REJECT_EXTRA_NODES=1
export NO_LEDGER_VERIFY=1 export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1 export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh master-testnet-solana-com us-west1-b ci/testnet-sanity.sh edge-perf-testnet-solana-com ec2 us-east-1a
)
;;
testnet-beta)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh beta-testnet-solana-com ec2 us-west-1a
)
;;
testnet-beta-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export REJECT_EXTRA_NODES=1
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh beta-perf-testnet-solana-com ec2 us-west-2b
)
;;
testnet-master)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh master-testnet-solana-com gce us-west1-b
)
;;
testnet-master-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export REJECT_EXTRA_NODES=1
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh master-perf-testnet-solana-com gce us-west1-b
) )
;; ;;
testnet) testnet)
@ -126,7 +185,7 @@ sanity() {
set -ex set -ex
export NO_LEDGER_VERIFY=1 export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1 export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh testnet-solana-com us-east1-c ci/testnet-sanity.sh testnet-solana-com gce us-east1-c
) )
;; ;;
testnet-perf) testnet-perf)
@ -137,7 +196,7 @@ sanity() {
export REJECT_EXTRA_NODES=1 export REJECT_EXTRA_NODES=1
export NO_LEDGER_VERIFY=1 export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1 export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh perf-testnet-solana-com us-west1-b ci/testnet-sanity.sh perf-testnet-solana-com gce us-west1-b
) )
;; ;;
*) *)
@ -157,12 +216,51 @@ start() {
fi fi
case $TESTNET in case $TESTNET in
testnet-master-perf) testnet-edge)
# shellcheck disable=2030
# shellcheck disable=2031
( (
set -ex set -ex
ci/testnet-deploy.sh master-perf-testnet-solana-com us-west1-b \ export NO_LEDGER_VERIFY=1
-G "n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \ export NO_VALIDATOR_SANITY=1
-t "$CHANNEL_OR_TAG" -c 2 \ ci/testnet-deploy.sh edge-testnet-solana-com ec2 us-east-1b \
-s "$CHANNEL_OR_TAG" -n 3 -c 0 -P \
${maybeDelete:+-d}
)
;;
testnet-edge-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh edge-perf-testnet-solana-com ec2 us-east-1a \
-g -t "$CHANNEL_OR_TAG" -c 2 \
${maybeDelete:+-d}
)
;;
testnet-beta)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh edge-testnet-solana-com ec2 us-west-1a \
-s "$CHANNEL_OR_TAG" -n 3 -c 0 -P \
${maybeDelete:+-d}
)
;;
testnet-beta-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh beta-perf-testnet-solana-com ec2 us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \
${maybeDelete:+-d} ${maybeDelete:+-d}
) )
;; ;;
@ -173,11 +271,24 @@ start() {
set -ex set -ex
export NO_LEDGER_VERIFY=1 export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1 export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh master-testnet-solana-com us-west1-b \ ci/testnet-deploy.sh master-testnet-solana-com gce us-west1-b \
-s "$CHANNEL_OR_TAG" -n 3 -c 0 -P -a master-testnet-solana-com \ -s "$CHANNEL_OR_TAG" -n 3 -c 0 -P -a master-testnet-solana-com \
${maybeDelete:+-d} ${maybeDelete:+-d}
) )
;; ;;
testnet-master-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh master-perf-testnet-solana-com gce us-west1-b \
-G "n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \
-t "$CHANNEL_OR_TAG" -c 2 \
${maybeDelete:+-d}
)
;;
testnet) testnet)
# shellcheck disable=2030 # shellcheck disable=2030
# shellcheck disable=2031 # shellcheck disable=2031
@ -185,7 +296,7 @@ start() {
set -ex set -ex
export NO_LEDGER_VERIFY=1 export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1 export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh testnet-solana-com us-east1-c \ ci/testnet-deploy.sh testnet-solana-com gce us-east1-c \
-s "$CHANNEL_OR_TAG" -n 3 -g -c 0 -P -a testnet-solana-com \ -s "$CHANNEL_OR_TAG" -n 3 -g -c 0 -P -a testnet-solana-com \
${maybeDelete:+-d} ${maybeDelete:+-d}
) )
@ -197,7 +308,7 @@ start() {
set -ex set -ex
export NO_LEDGER_VERIFY=1 export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1 export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh perf-testnet-solana-com us-west1-b \ ci/testnet-deploy.sh perf-testnet-solana-com gce us-west1-b \
-G "n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \ -G "n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \
-t "$CHANNEL_OR_TAG" -c 2 \ -t "$CHANNEL_OR_TAG" -c 2 \
${maybeDelete:+-d} ${maybeDelete:+-d}

View File

@ -9,12 +9,13 @@ usage() {
echo "Error: $*" echo "Error: $*"
fi fi
cat <<EOF cat <<EOF
usage: $0 [name] [zone] usage: $0 [name] [cloud] [zone]
Sanity check a CD testnet Sanity check a CD testnet
name - name of the network name - name of the network
zone - zone of the network cloud - cloud provider to use (gce, ec2)
zone - cloud provider zone of the network
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
metrics metrics
@ -23,13 +24,15 @@ EOF
} }
netName=$1 netName=$1
zone=$2 cloudProvider=$2
zone=$3
[[ -n $netName ]] || usage "" [[ -n $netName ]] || usage ""
[[ -n $cloudProvider ]] || usage "Cloud provider not specified"
[[ -n $zone ]] || usage "Zone not specified" [[ -n $zone ]] || usage "Zone not specified"
set -x set -x
echo --- gce.sh config echo "--- $cloudProvider.sh config"
net/gce.sh config -p "$netName" -z "$zone" net/"$cloudProvider".sh config -p "$netName" -z "$zone"
net/init-metrics.sh -e net/init-metrics.sh -e
echo --- net.sh sanity echo --- net.sh sanity
net/net.sh sanity \ net/net.sh sanity \

View File

@ -36,8 +36,8 @@ ec2)
cpuLeaderMachineType=m4.4xlarge cpuLeaderMachineType=m4.4xlarge
gpuLeaderMachineType=p2.xlarge gpuLeaderMachineType=p2.xlarge
leaderMachineType=$cpuLeaderMachineType leaderMachineType=$cpuLeaderMachineType
validatorMachineType=m4.4xlarge validatorMachineType=m4.2xlarge
clientMachineType=m4.4xlarge clientMachineType=m4.2xlarge
;; ;;
*) *)
echo "Error: Unknown cloud provider: $cloudProvider" echo "Error: Unknown cloud provider: $cloudProvider"
@ -218,13 +218,16 @@ EOF
echo "Waiting for $name to finish booting..." echo "Waiting for $name to finish booting..."
( (
for i in $(seq 1 30); do set -x
if (set -x; timeout 20s ssh "${sshOptions[@]}" "$publicIp" "test -f /.instance-startup-complete"); then for i in $(seq 1 45); do
break if timeout 20s ssh "${sshOptions[@]}" "$publicIp" "test -f /.instance-startup-complete"; then
exit 0
fi fi
sleep 2 sleep 2
echo "Retry $i..." echo "Retry $i..."
done done
echo "$name failed to boot."
exit 1
) )
echo "$name has booted." echo "$name has booted."
} }
@ -389,6 +392,7 @@ $(
install-libssl-compatability.sh \ install-libssl-compatability.sh \
install-rsync.sh \ install-rsync.sh \
network-config.sh \ network-config.sh \
) )
cat > /etc/motd <<EOM cat > /etc/motd <<EOM

View File

@ -85,6 +85,7 @@ local|tar)
./fetch-perf-libs.sh ./fetch-perf-libs.sh
export LD_LIBRARY_PATH="$PWD/target/perf-libs:$LD_LIBRARY_PATH" export LD_LIBRARY_PATH="$PWD/target/perf-libs:$LD_LIBRARY_PATH"
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
scripts/oom-monitor.sh > oom-monitor.log 2>&1 & scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
scripts/net-stats.sh > net-stats.log 2>&1 & scripts/net-stats.sh > net-stats.log 2>&1 &