Add AWS-based nets

This commit is contained in:
Michael Vines 2018-11-06 19:23:20 -08:00
parent 2af5aad032
commit dd4fb7aa90
5 changed files with 159 additions and 37 deletions

View File

@ -21,12 +21,13 @@ usage() {
echo "Error: $*"
fi
cat <<EOF
usage: $0 [name] [zone] [options...]
usage: $0 [name] [cloud] [zone] [options...]
Deploys a CD testnet
name - name of the network
zone - zone to deploy the network into
cloud - cloud provider to use (gce, ec2)
zone - cloud provider zone to deploy the network into
options:
-s edge|beta|stable - Deploy the specified Snap release channel
@ -48,10 +49,12 @@ EOF
}
netName=$1
zone=$2
cloudProvider=$2
zone=$3
[[ -n $netName ]] || usage
[[ -n $cloudProvider ]] || usage "Cloud provider not specified"
[[ -n $zone ]] || usage "Zone not specified"
shift 2
shift 3
while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do
case $opt in
@ -108,7 +111,7 @@ while getopts "h?p:Pn:c:s:t:gG:a:d" opt; do
done
gce_create_args=(
create_args=(
-a "$leaderAddress"
-c "$clientNodeCount"
-n "$validatorNodeCount"
@ -118,26 +121,26 @@ gce_create_args=(
if $enableGpu; then
if [[ -z $leaderMachineType ]]; then
gce_create_args+=(-g)
create_args+=(-g)
else
gce_create_args+=(-G "$leaderMachineType")
create_args+=(-G "$leaderMachineType")
fi
fi
if $publicNetwork; then
gce_create_args+=(-P)
create_args+=(-P)
fi
set -x
echo --- gce.sh delete
time net/gce.sh delete -z "$zone" -p "$netName"
echo "--- $cloudProvider.sh delete"
time net/"$cloudProvider".sh delete -z "$zone" -p "$netName"
if $delete; then
exit 0
fi
echo --- gce.sh create
time net/gce.sh create "${gce_create_args[@]}"
echo "--- $cloudProvider.sh create"
time net/"$cloudProvider".sh create "${create_args[@]}"
net/init-metrics.sh -e
echo --- net.sh start

View File

@ -8,7 +8,7 @@ if [[ -z $BUILDKITE ]]; then
fi
if [[ -z $SOLANA_METRICS_PARTIAL_CONFIG ]]; then
echo SOLANA_METRICS_CONFIG not defined
echo SOLANA_METRICS_PARTIAL_CONFIG not defined
exit 1
fi
@ -37,6 +37,14 @@ steps:
value: "testnet-master"
- label: "testnet-master-perf"
value: "testnet-master-perf"
- label: "testnet-edge"
value: "testnet-edge"
- label: "testnet-edge-perf"
value: "testnet-edge-perf"
- label: "testnet-beta"
value: "testnet-beta"
- label: "testnet-beta-perf"
value: "testnet-beta-perf"
- select: "Operation"
key: "testnet-operation"
default: "sanity-or-restart"
@ -64,10 +72,14 @@ ci/channel-info.sh
eval "$(ci/channel-info.sh)"
case $TESTNET in
testnet-master|testnet-master-perf)
testnet-edge|testnet-edge-perf|testnet-master|testnet-master-perf)
CHANNEL_OR_TAG=edge
CHANNEL_BRANCH=$EDGE_CHANNEL
;;
testnet-beta|testnet-beta-perf)
CHANNEL_OR_TAG=beta
CHANNEL_BRANCH=$BETA_CHANNEL
;;
testnet|testnet-perf)
if [[ -n $BETA_CHANNEL_LATEST_TAG ]]; then
CHANNEL_OR_TAG=$BETA_CHANNEL_LATEST_TAG
@ -103,20 +115,67 @@ fi
sanity() {
echo "--- sanity $TESTNET"
case $TESTNET in
testnet-master-perf)
testnet-edge)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
ci/testnet-sanity.sh master-perf-testnet-solana-com us-west1-b
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh edge-testnet-solana-com ec2 us-east-1b
)
;;
testnet-master)
testnet-edge-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export REJECT_EXTRA_NODES=1
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh master-testnet-solana-com us-west1-b
ci/testnet-sanity.sh edge-perf-testnet-solana-com ec2 us-east-1a
)
;;
testnet-beta)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh beta-testnet-solana-com ec2 us-west-1a
)
;;
testnet-beta-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export REJECT_EXTRA_NODES=1
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh beta-perf-testnet-solana-com ec2 us-west-2b
)
;;
testnet-master)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh master-testnet-solana-com gce us-west1-b
)
;;
testnet-master-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export REJECT_EXTRA_NODES=1
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh master-perf-testnet-solana-com gce us-west1-b
)
;;
testnet)
@ -126,7 +185,7 @@ sanity() {
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh testnet-solana-com us-east1-c
ci/testnet-sanity.sh testnet-solana-com gce us-east1-c
)
;;
testnet-perf)
@ -137,7 +196,7 @@ sanity() {
export REJECT_EXTRA_NODES=1
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-sanity.sh perf-testnet-solana-com us-west1-b
ci/testnet-sanity.sh perf-testnet-solana-com gce us-west1-b
)
;;
*)
@ -157,12 +216,51 @@ start() {
fi
case $TESTNET in
testnet-master-perf)
testnet-edge)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
ci/testnet-deploy.sh master-perf-testnet-solana-com us-west1-b \
-G "n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \
-t "$CHANNEL_OR_TAG" -c 2 \
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh edge-testnet-solana-com ec2 us-east-1b \
-s "$CHANNEL_OR_TAG" -n 3 -c 0 -P \
${maybeDelete:+-d}
)
;;
testnet-edge-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh edge-perf-testnet-solana-com ec2 us-east-1a \
-g -t "$CHANNEL_OR_TAG" -c 2 \
${maybeDelete:+-d}
)
;;
testnet-beta)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh edge-testnet-solana-com ec2 us-west-1a \
-s "$CHANNEL_OR_TAG" -n 3 -c 0 -P \
${maybeDelete:+-d}
)
;;
testnet-beta-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh beta-perf-testnet-solana-com ec2 us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \
${maybeDelete:+-d}
)
;;
@ -173,11 +271,24 @@ start() {
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh master-testnet-solana-com us-west1-b \
ci/testnet-deploy.sh master-testnet-solana-com gce us-west1-b \
-s "$CHANNEL_OR_TAG" -n 3 -c 0 -P -a master-testnet-solana-com \
${maybeDelete:+-d}
)
;;
testnet-master-perf)
# shellcheck disable=2030
# shellcheck disable=2031
(
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh master-perf-testnet-solana-com gce us-west1-b \
-G "n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \
-t "$CHANNEL_OR_TAG" -c 2 \
${maybeDelete:+-d}
)
;;
testnet)
# shellcheck disable=2030
# shellcheck disable=2031
@ -185,7 +296,7 @@ start() {
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh testnet-solana-com us-east1-c \
ci/testnet-deploy.sh testnet-solana-com gce us-east1-c \
-s "$CHANNEL_OR_TAG" -n 3 -g -c 0 -P -a testnet-solana-com \
${maybeDelete:+-d}
)
@ -197,7 +308,7 @@ start() {
set -ex
export NO_LEDGER_VERIFY=1
export NO_VALIDATOR_SANITY=1
ci/testnet-deploy.sh perf-testnet-solana-com us-west1-b \
ci/testnet-deploy.sh perf-testnet-solana-com gce us-west1-b \
-G "n1-standard-16 --accelerator count=2,type=nvidia-tesla-v100" \
-t "$CHANNEL_OR_TAG" -c 2 \
${maybeDelete:+-d}

View File

@ -9,12 +9,13 @@ usage() {
echo "Error: $*"
fi
cat <<EOF
usage: $0 [name] [zone]
usage: $0 [name] [cloud] [zone]
Sanity check a CD testnet
name - name of the network
zone - zone of the network
cloud - cloud provider to use (gce, ec2)
zone - cloud provider zone of the network
Note: the SOLANA_METRICS_CONFIG environment variable is used to configure
metrics
@ -23,13 +24,15 @@ EOF
}
netName=$1
zone=$2
cloudProvider=$2
zone=$3
[[ -n $netName ]] || usage ""
[[ -n $cloudProvider ]] || usage "Cloud provider not specified"
[[ -n $zone ]] || usage "Zone not specified"
set -x
echo --- gce.sh config
net/gce.sh config -p "$netName" -z "$zone"
echo "--- $cloudProvider.sh config"
net/"$cloudProvider".sh config -p "$netName" -z "$zone"
net/init-metrics.sh -e
echo --- net.sh sanity
net/net.sh sanity \

View File

@ -36,8 +36,8 @@ ec2)
cpuLeaderMachineType=m4.4xlarge
gpuLeaderMachineType=p2.xlarge
leaderMachineType=$cpuLeaderMachineType
validatorMachineType=m4.4xlarge
clientMachineType=m4.4xlarge
validatorMachineType=m4.2xlarge
clientMachineType=m4.2xlarge
;;
*)
echo "Error: Unknown cloud provider: $cloudProvider"
@ -218,13 +218,16 @@ EOF
echo "Waiting for $name to finish booting..."
(
for i in $(seq 1 30); do
if (set -x; timeout 20s ssh "${sshOptions[@]}" "$publicIp" "test -f /.instance-startup-complete"); then
break
set -x
for i in $(seq 1 45); do
if timeout 20s ssh "${sshOptions[@]}" "$publicIp" "test -f /.instance-startup-complete"; then
exit 0
fi
sleep 2
echo "Retry $i..."
done
echo "$name failed to boot."
exit 1
)
echo "$name has booted."
}
@ -389,6 +392,7 @@ $(
install-libssl-compatability.sh \
install-rsync.sh \
network-config.sh \
)
cat > /etc/motd <<EOM

View File

@ -85,6 +85,7 @@ local|tar)
./fetch-perf-libs.sh
export LD_LIBRARY_PATH="$PWD/target/perf-libs:$LD_LIBRARY_PATH"
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH"
scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
scripts/net-stats.sh > net-stats.log 2>&1 &