testnet-demo now runs across more GCE zones (#4053)

* testnet-demo now runs across more GCE zones

* Save zone info to config file

* Add geoip whitelist for common data centers

* Skip more of start

* Include -x for config

* Fetch private key from first validator node if necessary

* Correct -r propagation
This commit is contained in:
Michael Vines 2019-04-28 19:50:02 -07:00
parent 4e5677f116
commit 0cc3956693
No known key found for this signature in database
GPG Key ID: 33F4FDEC4E0E88BD
5 changed files with 215 additions and 69 deletions

View File

@ -11,7 +11,8 @@ clientNodeCount=0
additionalFullNodeCount=10
publicNetwork=false
stopNetwork=false
skipSetup=false
reuseLedger=false
skipCreate=false
skipStart=false
externalNode=false
failOnValidatorBootupFailure=true
@ -56,6 +57,7 @@ Deploys a CD testnet
-r - Reuse existing node/ledger configuration from a
previous |start| (ie, don't run ./multinode-demo/setup.sh).
-x - External node. Default: false
-e - Skip create. Assume the nodes have already been created
-s - Skip start. Nodes will still be created or configured, but network software will not be started.
-S - Stop network software without tearing down nodes.
-f - Discard validator nodes that didn't bootup successfully
@ -68,7 +70,7 @@ EOF
zone=()
while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sf" opt; do
while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sfe" opt; do
case $opt in
h | \?)
usage
@ -121,7 +123,10 @@ while getopts "h?p:Pn:c:t:gG:a:Dbd:rusxz:p:C:Sf" opt; do
delete=true
;;
r)
skipSetup=true
reuseLedger=true
;;
e)
skipCreate=true
;;
s)
skipStart=true
@ -175,15 +180,15 @@ for val in "${zone[@]}"; do
done
if $stopNetwork; then
skipSetup=true
skipCreate=true
fi
if $delete; then
skipSetup=false
skipCreate=false
fi
# Create the network
if ! $skipSetup; then
if ! $skipCreate; then
echo "--- $cloudProvider.sh delete"
# shellcheck disable=SC2068
time net/"$cloudProvider".sh delete ${zone_args[@]} -p "$netName" ${externalNode:+-x}
@ -245,6 +250,10 @@ else
config_args+=(-P)
fi
if $externalNode; then
config_args+=(-x)
fi
if ! $failOnValidatorBootupFailure; then
config_args+=(-f)
fi
@ -262,35 +271,35 @@ if $stopNetwork; then
exit 0
fi
echo --- net.sh start
maybeRejectExtraNodes=
if ! $publicNetwork; then
maybeRejectExtraNodes="-o rejectExtraNodes"
fi
maybeNoValidatorSanity=
if [[ -n $NO_VALIDATOR_SANITY ]]; then
maybeNoValidatorSanity="-o noValidatorSanity"
fi
maybeNoLedgerVerify=
if [[ -n $NO_LEDGER_VERIFY ]]; then
maybeNoLedgerVerify="-o noLedgerVerify"
fi
maybeSkipSetup=
if $skipSetup; then
maybeSkipSetup="-r"
fi
ok=true
if ! $skipStart; then
(
if $skipSetup; then
if $skipCreate; then
# TODO: Enable rolling updates
#op=update
op=restart
else
op=start
fi
echo "--- net.sh $op"
maybeRejectExtraNodes=
if ! $publicNetwork; then
maybeRejectExtraNodes="-o rejectExtraNodes"
fi
maybeNoValidatorSanity=
if [[ -n $NO_VALIDATOR_SANITY ]]; then
maybeNoValidatorSanity="-o noValidatorSanity"
fi
maybeNoLedgerVerify=
if [[ -n $NO_LEDGER_VERIFY ]]; then
maybeNoLedgerVerify="-o noLedgerVerify"
fi
maybeReuseLedger=
if $reuseLedger; then
maybeReuseLedger="-r"
fi
maybeUpdateManifestKeypairFile=
# shellcheck disable=SC2154 # SOLANA_INSTALL_UPDATE_MANIFEST_KEYPAIR_x86_64_unknown_linux_gnu comes from .buildkite/env/
@ -302,7 +311,7 @@ if ! $skipStart; then
# shellcheck disable=SC2086 # Don't want to double quote the $maybeXYZ variables
time net/net.sh $op -t "$tarChannelOrTag" \
$maybeUpdateManifestKeypairFile \
$maybeSkipSetup \
$maybeReuseLedger \
$maybeRejectExtraNodes \
$maybeNoValidatorSanity \
$maybeNoLedgerVerify

View File

@ -81,11 +81,38 @@ eval "$(ci/channel-info.sh)"
EC2_ZONES=(us-west-1a sa-east-1a ap-northeast-2a eu-central-1a ca-central-1a)
# GCE zones with _lots_ of quota
GCE_ZONES=(
us-west1-a us-west1-b us-west1-c
us-central1-a us-central1-b
us-east1-b us-east1-c us-east1-d
europe-west4-a europe-west4-b europe-west4-c
us-west1-a
us-central1-a
us-east1-b
europe-west4-a
us-west1-b
us-central1-b
us-east1-c
europe-west4-b
us-west1-c
us-east1-d
europe-west4-c
)
# GCE zones with enough quota for one CPU-only fullnode
GCE_LOW_QUOTA_ZONES=(
asia-east2-a
asia-northeast1-b
asia-northeast2-b
asia-south1-c
asia-southeast1-b
australia-southeast1-b
europe-north1-a
europe-west2-b
europe-west3-c
europe-west6-a
northamerica-northeast1-a
southamerica-east1-b
)
case $TESTNET in
@ -113,7 +140,8 @@ testnet-perf)
testnet-demo)
CHANNEL_OR_TAG=beta
CHANNEL_BRANCH=$BETA_CHANNEL
: "${GCE_NODE_COUNT:=200}"
: "${GCE_NODE_COUNT:=186}"
: "${GCE_LOW_QUOTA_NODE_COUNT:=12}"
: "${TESTNET_DB_HOST:=https://clocktower-f1d56615.influxcloud.net:8086}"
;;
*)
@ -130,6 +158,10 @@ GCE_ZONE_ARGS=()
for val in "${GCE_ZONES[@]}"; do
GCE_ZONE_ARGS+=("-z $val")
done
GCE_LOW_QUOTA_ZONE_ARGS=()
for val in "${GCE_LOW_QUOTA_ZONES[@]}"; do
GCE_LOW_QUOTA_ZONE_ARGS+=("-z $val")
done
if [[ -n $TESTNET_DB_HOST ]]; then
SOLANA_METRICS_PARTIAL_CONFIG="host=$TESTNET_DB_HOST,$SOLANA_METRICS_PARTIAL_CONFIG"
@ -158,6 +190,7 @@ steps:
TESTNET_DB_HOST: "$TESTNET_DB_HOST"
EC2_NODE_COUNT: "$EC2_NODE_COUNT"
GCE_NODE_COUNT: "$GCE_NODE_COUNT"
GCE_LOW_QUOTA_NODE_COUNT: "$GCE_LOW_QUOTA_NODE_COUNT"
EOF
) | buildkite-agent pipeline upload
exit 0
@ -277,7 +310,7 @@ deploy() {
set -x
ci/testnet-deploy.sh -p edge-testnet-solana-com -C ec2 -z us-west-1a \
-t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0ccd4f2239886fa94 \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -292,7 +325,7 @@ deploy() {
ci/testnet-deploy.sh -p edge-perf-testnet-solana-com -C ec2 -z us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \
-b \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -305,7 +338,7 @@ deploy() {
ci/testnet-deploy.sh -p beta-testnet-solana-com -C ec2 -z us-west-1a \
-t "$CHANNEL_OR_TAG" -n 3 -c 0 -u -P -a eipalloc-0f286cf8a0771ce35 \
-b \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -320,7 +353,7 @@ deploy() {
ci/testnet-deploy.sh -p beta-perf-testnet-solana-com -C ec2 -z us-west-2b \
-g -t "$CHANNEL_OR_TAG" -c 2 \
-b \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -337,7 +370,7 @@ deploy() {
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-solana-com -C ec2 ${EC2_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$EC2_NODE_COUNT" -c 0 -u -P -a eipalloc-0fa502bf95f6f18b2 \
${skipCreate:+-r} \
${skipCreate:+-e} \
${maybeSkipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -346,11 +379,11 @@ deploy() {
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D} \
${EC2_NODE_COUNT:+-x}
-x
fi
)
;;
@ -365,7 +398,7 @@ deploy() {
-t "$CHANNEL_OR_TAG" -c 2 \
-b \
-d pd-ssd \
${skipCreate:+-r} \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
@ -374,12 +407,25 @@ deploy() {
testnet-demo)
(
set -x
if [[ -n $GCE_NODE_COUNT ]]; then
if [[ -n $GCE_LOW_QUOTA_NODE_COUNT ]] || [[ -n $skipStart ]]; then
maybeSkipStart="skip"
fi
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p demo-testnet-solana-com -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P -u -f \
-a demo-testnet-solana-com \
${skipCreate:+-e} \
${maybeSkipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}
if [[ -n $GCE_LOW_QUOTA_NODE_COUNT ]]; then
# shellcheck disable=SC2068
ci/testnet-deploy.sh -p testnet-demo -C gce ${GCE_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_NODE_COUNT" -c 0 -P -u -f \
-a demo-testnet-solana-com \
${skipCreate:+-r} \
ci/testnet-deploy.sh -p demo-testnet-solana-com2 -C gce ${GCE_LOW_QUOTA_ZONE_ARGS[@]} \
-t "$CHANNEL_OR_TAG" -n "$GCE_LOW_QUOTA_NODE_COUNT" -c 0 -P -f -x \
${skipCreate:+-e} \
${skipStart:+-s} \
${maybeStop:+-S} \
${maybeDelete:+-D}

View File

@ -19,6 +19,7 @@ mkdir -p "$netConfigDir" "$netLogDir"
source "$(dirname "${BASH_SOURCE[0]}")"/../scripts/configure-metrics.sh
configFile="$netConfigDir/config"
geoipConfigFile="$netConfigDir/geoip.yml"
entrypointIp=
publicNetwork=
@ -28,10 +29,13 @@ externalNodeSshKey=
sshOptions=()
fullnodeIpList=()
fullnodeIpListPrivate=()
fullnodeIpListZone=()
clientIpList=()
clientIpListPrivate=()
clientIpListZone=()
blockstreamerIpList=()
blockstreamerIpListPrivate=()
blockstreamerIpListZone=()
leaderRotation=
buildSshOptions() {

View File

@ -227,6 +227,7 @@ esac
# name - name of the instance
# publicIp - The public IP address of this instance
# privateIp - The private IP address of this instance
# zone - Zone of this instance
# count - Monotonically increasing count for each
# invocation of cmd, starting at 1
# ... - Extra args to cmd..
@ -242,11 +243,70 @@ cloud_ForEachInstance() {
declare name publicIp privateIp
IFS=: read -r name publicIp privateIp zone < <(echo "$info")
eval "$cmd" "$name" "$publicIp" "$privateIp" "$count" "$@"
eval "$cmd" "$name" "$publicIp" "$privateIp" "$zone" "$count" "$@"
count=$((count + 1))
done
}
# Given a cloud provider zone, return an approximate lat,long location for the
# data center. Normal geoip lookups for cloud provider IP addresses are
# sometimes widely inaccurate.
zoneLocation() {
declare zone="$1"
case "$zone" in
us-west1*)
echo "[45.5946, -121.1787]"
;;
us-central1*)
echo "[41.2619, -95.8608]"
;;
us-east1*)
echo "[33.1960, -80.0131]"
;;
asia-east2*)
echo "[22.3193, 114.1694]"
;;
asia-northeast1*)
echo "[35.6762, 139.6503]"
;;
asia-northeast2*)
echo "[34.6937, 135.5023]"
;;
asia-south1*)
echo "[19.0760, 72.8777]"
;;
asia-southeast1*)
echo "[1.3404, 103.7090]"
;;
australia-southeast1*)
echo "[-33.8688, 151.2093]"
;;
europe-north1*)
echo "[60.5693, 27.1878]"
;;
europe-west2*)
echo "[51.5074, -0.1278]"
;;
europe-west3*)
echo "[50.1109, 8.6821]"
;;
europe-west4*)
echo "[53.4386, 6.8355]"
;;
europe-west6*)
echo "[47.3769, 8.5417]"
;;
northamerica-northeast1*)
echo "[45.5017, -73.5673]"
;;
southamerica-east1*)
echo "[-23.5505, -46.6333]"
;;
*)
;;
esac
}
prepareInstancesAndWriteConfigFile() {
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
@ -254,6 +314,7 @@ prepareInstancesAndWriteConfigFile() {
echo "Appending to existing config file"
echo "externalNodeSshKey=$sshPrivateKey" >> "$configFile"
else
rm -f "$geoipConfigFile"
cat >> "$configFile" <<EOF
# autogenerated at $(date)
netBasename=$prefix
@ -262,6 +323,7 @@ sshPrivateKey=$sshPrivateKey
leaderRotation=$leaderRotation
EOF
fi
touch "$geoipConfigFile"
buildSshOptions
@ -269,11 +331,13 @@ EOF
declare name="$1"
declare publicIp="$2"
declare privateIp="$3"
declare zone="$4"
#declare index="$5"
declare failOnFailure="$5"
declare arrayName="$6"
declare failOnFailure="$6"
declare arrayName="$7"
# This check should eventually be moved to cloud provider specific script
# This check should eventually be moved to cloud provider specific script
if [ "$publicIp" = "TERMINATED" ] || [ "$privateIp" = "TERMINATED" ]; then
if $failOnFailure; then
exit 1
@ -286,7 +350,7 @@ EOF
echo "Waiting for $name to finish booting..."
(
set -x +e
for i in $(seq 1 20); do
for i in $(seq 1 30); do
timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"
ret=$?
if [[ $ret -eq 0 ]]; then
@ -305,21 +369,21 @@ EOF
exit 1
fi
else
echo "$arrayName+=($publicIp) # $name" >> "$configFile"
echo "${arrayName}Private+=($privateIp) # $name" >> "$configFile"
{
echo "$arrayName+=($publicIp) # $name"
echo "${arrayName}Private+=($privateIp) # $name"
echo "${arrayName}Zone+=($zone) # $name"
} >> "$configFile"
declare latlng=
latlng=$(zoneLocation "$zone")
if [[ -n $latlng ]]; then
echo "$publicIp: $latlng" >> "$geoipConfigFile"
fi
fi
}
if $externalNodes; then
echo "Bootstrap leader is already configured"
else
echo "Looking for bootstrap leader instance..."
cloud_FindInstance "$prefix-bootstrap-leader"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap leader"
exit 1
}
fetchPrivateKey() {
(
declare nodeName
declare nodeIp
@ -338,7 +402,9 @@ EOF
set -x -o pipefail
for i in $(seq 1 30); do
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then
break
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone"; then
break
fi
fi
sleep 1
@ -350,6 +416,20 @@ EOF
fi
)
}
if $externalNodes; then
echo "Bootstrap leader is already configured"
else
echo "Looking for bootstrap leader instance..."
cloud_FindInstance "$prefix-bootstrap-leader"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap leader"
exit 1
}
fetchPrivateKey
echo "fullnodeIpList=()" >> "$configFile"
echo "fullnodeIpListPrivate=()" >> "$configFile"
cloud_ForEachInstance recordInstanceIp true fullnodeIpList
@ -363,6 +443,8 @@ EOF
echo "Unable to find additional fullnodes"
exit 1
}
fetchPrivateKey
cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList
done
fi
@ -588,29 +670,33 @@ info)
declare nodeType=$1
declare ip=$2
declare ipPrivate=$3
printf " %-16s | %-15s | %-15s\n" "$nodeType" "$ip" "$ipPrivate"
declare zone=$4
printf " %-16s | %-15s | %-15s | %s\n" "$nodeType" "$ip" "$ipPrivate" "$zone"
}
printNode "Node Type" "Public IP" "Private IP"
echo "-------------------+-----------------+-----------------"
printNode "Node Type" "Public IP" "Private IP" "Zone"
echo "-------------------+-----------------+-----------------+--------------"
nodeType=bootstrap-leader
for i in $(seq 0 $(( ${#fullnodeIpList[@]} - 1)) ); do
ipAddress=${fullnodeIpList[$i]}
ipAddressPrivate=${fullnodeIpListPrivate[$i]}
printNode $nodeType "$ipAddress" "$ipAddressPrivate"
zone=${fullnodeIpListZone[$i]}
printNode $nodeType "$ipAddress" "$ipAddressPrivate" "$zone"
nodeType=fullnode
done
for i in $(seq 0 $(( ${#clientIpList[@]} - 1)) ); do
ipAddress=${clientIpList[$i]}
ipAddressPrivate=${clientIpListPrivate[$i]}
printNode bench-tps "$ipAddress" "$ipAddressPrivate"
zone=${clientIpListZone[$i]}
printNode bench-tps "$ipAddress" "$ipAddressPrivate" "$zone"
done
for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do
ipAddress=${blockstreamerIpList[$i]}
ipAddressPrivate=${blockstreamerIpListPrivate[$i]}
printNode blockstreamer "$ipAddress" "$ipAddressPrivate"
zone=${blockstreamerIpListZone[$i]}
printNode blockstreamer "$ipAddress" "$ipAddressPrivate" "$zone"
done
;;
*)

View File

@ -135,6 +135,7 @@ local|tar)
scp "$entrypointIp":~/solana/config-local/mint-id.json config-local/
./multinode-demo/drone.sh > drone.log 2>&1 &
export BLOCKEXPLORER_GEOIP_WHITELIST=$PWD/net/config/geoip.yml
npm install @solana/blockexplorer@1
npx solana-blockexplorer > blockexplorer.log 2>&1 &