Support for configuring testnet nodes across multiple cloud services

This commit is contained in:
Pankaj Garg 2019-04-01 22:54:41 +00:00
parent 25a2f08f8d
commit 97bd7a00f1
3 changed files with 76 additions and 44 deletions

View File

@ -24,6 +24,7 @@ entrypointIp=
publicNetwork=
netBasename=
sshPrivateKey=
externalNodeSshKey=
sshOptions=()
fullnodeIpList=()
fullnodeIpListPrivate=()
@ -41,8 +42,9 @@ buildSshOptions() {
-o "User=solana"
-o "IdentityFile=$sshPrivateKey"
-o "LogLevel=ERROR"
-F /dev/null
)
[[ -z $externalNodeSshKey ]] || sshOptions+=(-o "IdentityFile=$externalNodeSshKey")
}
loadConfigFile() {

View File

@ -42,6 +42,7 @@ clientNodeCount=1
blockstreamer=false
fullNodeBootDiskSizeInGb=1000
clientBootDiskSizeInGb=75
externalNodes=false
publicNetwork=false
enableGpu=false
@ -114,7 +115,7 @@ shift
[[ $command = create || $command = config || $command = info || $command = delete ]] ||
usage "Invalid command: $command"
while getopts "h?p:Pn:c:z:gG:a:d:bu" opt; do
while getopts "h?p:Pn:c:z:gG:a:d:bux" opt; do
case $opt in
h | \?)
usage
@ -157,6 +158,9 @@ while getopts "h?p:Pn:c:z:gG:a:d:bu" opt; do
u)
blockstreamer=true
;;
x)
externalNodes=true
;;
*)
usage "unhandled option: $opt"
;;
@ -219,13 +223,18 @@ cloud_ForEachInstance() {
prepareInstancesAndWriteConfigFile() {
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
cat >> "$configFile" <<EOF
if $externalNodes; then
echo "Appending to existing config file"
echo "externalNodeSshKey=$sshPrivateKey" >> "$configFile"
else
cat >> "$configFile" <<EOF
# autogenerated at $(date)
netBasename=$prefix
publicNetwork=$publicNetwork
sshPrivateKey=$sshPrivateKey
leaderRotation=$leaderRotation
EOF
fi
buildSshOptions
@ -262,46 +271,50 @@ EOF
echo "$name has booted."
}
echo "Looking for bootstrap leader instance..."
cloud_FindInstance "$prefix-bootstrap-leader"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap leader"
exit 1
}
if $externalNodes; then
echo "Bootstrap leader is already configured"
else
echo "Looking for bootstrap leader instance..."
cloud_FindInstance "$prefix-bootstrap-leader"
[[ ${#instances[@]} -eq 1 ]] || {
echo "Unable to find bootstrap leader"
exit 1
}
(
declare nodeName
declare nodeIp
declare nodeZone
IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}")
(
declare nodeName
declare nodeIp
declare nodeZone
IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}")
# Try to ping the machine first.
timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done"
# Try to ping the machine first.
timeout 90s bash -c "set -o pipefail; until ping -c 3 $nodeIp | tr - _; do echo .; done"
if [[ ! -r $sshPrivateKey ]]; then
echo "Fetching $sshPrivateKey from $nodeName"
if [[ ! -r $sshPrivateKey ]]; then
echo "Fetching $sshPrivateKey from $nodeName"
# Try to scp in a couple times, sshd may not yet be up even though the
# machine can be pinged...
set -x -o pipefail
for i in $(seq 1 30); do
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then
break
fi
# Try to scp in a couple times, sshd may not yet be up even though the
# machine can be pinged...
set -x -o pipefail
for i in $(seq 1 30); do
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then
break
fi
sleep 1
echo "Retry $i..."
done
sleep 1
echo "Retry $i..."
done
chmod 400 "$sshPrivateKey"
ls -l "$sshPrivateKey"
fi
)
chmod 400 "$sshPrivateKey"
ls -l "$sshPrivateKey"
fi
)
echo "fullnodeIpList=()" >> "$configFile"
echo "fullnodeIpListPrivate=()" >> "$configFile"
cloud_ForEachInstance recordInstanceIp fullnodeIpList
cloud_ForEachInstance waitForStartupComplete
echo "fullnodeIpList=()" >> "$configFile"
echo "fullnodeIpListPrivate=()" >> "$configFile"
cloud_ForEachInstance recordInstanceIp fullnodeIpList
cloud_ForEachInstance waitForStartupComplete
fi
echo "Looking for additional fullnode instances..."
for zone in "${zones[@]}"; do
@ -314,8 +327,12 @@ EOF
cloud_ForEachInstance waitForStartupComplete
done
echo "clientIpList=()" >> "$configFile"
echo "clientIpListPrivate=()" >> "$configFile"
if $externalNodes; then
echo "Let's not reset the current client configuration"
else
echo "clientIpList=()" >> "$configFile"
echo "clientIpListPrivate=()" >> "$configFile"
fi
echo "Looking for client bencher instances..."
cloud_FindInstances "$prefix-client"
[[ ${#instances[@]} -eq 0 ]] || {
@ -323,8 +340,12 @@ EOF
cloud_ForEachInstance waitForStartupComplete
}
echo "blockstreamerIpList=()" >> "$configFile"
echo "blockstreamerIpListPrivate=()" >> "$configFile"
if $externalNodes; then
echo "Let's not reset the current blockstream configuration"
else
echo "blockstreamerIpList=()" >> "$configFile"
echo "blockstreamerIpListPrivate=()" >> "$configFile"
fi
echo "Looking for blockstreamer instances..."
cloud_FindInstances "$prefix-blockstreamer"
[[ ${#instances[@]} -eq 0 ]] || {
@ -360,7 +381,11 @@ delete() {
cloud_DeleteInstances true
fi
done
rm -f "$configFile"
if $externalNodes; then
echo "Let's not delete the current configuration file"
else
rm -f "$configFile"
fi
$metricsWriteDatapoint "testnet-deploy net-delete-complete=1"
@ -474,9 +499,13 @@ EOF
cloud_Initialize "$prefix" "$zone"
done
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType"
if $externalNodes; then
echo "Bootstrap leader is already configured"
else
cloud_CreateInstances "$prefix" "$prefix-bootstrap-leader" 1 \
"$enableGpu" "$bootstrapLeaderMachineType" "${zones[0]}" "$fullNodeBootDiskSizeInGb" \
"$startupScript" "$bootstrapLeaderAddress" "$bootDiskType"
fi
num_zones=${#zones[@]}
numNodesPerZone=$((additionalFullNodeCount / num_zones))

View File

@ -163,6 +163,7 @@ startCommon() {
mkdir -p ~/.cargo/bin
"
fi
ssh-copy-id -f -i "$externalNodeSshKey" "${sshOptions[@]}" "solana@$ipAddress"
rsync -vPrc -e "ssh ${sshOptions[*]}" \
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
"$ipAddress":~/solana/