net/ improvements (#4257)

automerge
This commit is contained in:
Michael Vines 2019-05-11 22:54:50 -07:00 committed by Grimes
parent dd12db2f06
commit 50f79e495e
2 changed files with 53 additions and 58 deletions

View File

@ -327,6 +327,40 @@ EOF
buildSshOptions
fetchPrivateKey() {
declare nodeName
declare nodeIp
declare nodeZone
IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}")
# Make sure the machine is alive or pingable
timeout_sec=90
cloud_WaitForInstanceReady "$nodeName" "$nodeIp" "$nodeZone" "$timeout_sec"
if [[ ! -r $sshPrivateKey ]]; then
echo "Fetching $sshPrivateKey from $nodeName"
# Try to scp in a couple times, sshd may not yet be up even though the
# machine can be pinged...
(
set -o pipefail
for i in $(seq 1 30); do
set -x
cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone" &&
cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone" &&
break
set +x
sleep 1
echo "Retry $i..."
done
)
chmod 400 "$sshPrivateKey"
ls -l "$sshPrivateKey"
fi
}
recordInstanceIp() {
declare name="$1"
declare publicIp="$2"
@ -349,15 +383,19 @@ EOF
ok=true
echo "Waiting for $name to finish booting..."
(
set -x +e
for i in $(seq 1 60); do
timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"
set +e
fetchPrivateKey || exit 1
for i in $(seq 1 30); do
(
set -x
timeout --preserve-status --foreground 20s ssh "${sshOptions[@]}" "$publicIp" "ls -l /.instance-startup-complete"
)
ret=$?
if [[ $ret -eq 0 ]]; then
echo "$name has booted."
exit 0
fi
sleep 2
sleep 5
echo "Retry $i..."
done
echo "$name failed to boot."
@ -383,41 +421,6 @@ EOF
fi
}
fetchPrivateKey() {
(
declare nodeName
declare nodeIp
declare nodeZone
IFS=: read -r nodeName nodeIp _ nodeZone < <(echo "${instances[0]}")
# Make sure the machine is alive or pingable
timeout_sec=90
cloud_WaitForInstanceReady "$nodeName" "$nodeIp" "$nodeZone" "$timeout_sec"
if [[ ! -r $sshPrivateKey ]]; then
echo "Fetching $sshPrivateKey from $nodeName"
# Try to scp in a couple times, sshd may not yet be up even though the
# machine can be pinged...
set -x -o pipefail
for i in $(seq 1 30); do
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa "$sshPrivateKey" "$nodeZone"; then
if cloud_FetchFile "$nodeName" "$nodeIp" /solana-id_ecdsa.pub "$sshPrivateKey.pub" "$nodeZone"; then
break
fi
fi
sleep 1
echo "Retry $i..."
done
chmod 400 "$sshPrivateKey"
ls -l "$sshPrivateKey"
fi
)
}
if $externalNodes; then
echo "Bootstrap leader is already configured"
else
@ -428,8 +431,6 @@ EOF
exit 1
}
fetchPrivateKey
echo "fullnodeIpList=()" >> "$configFile"
echo "fullnodeIpListPrivate=()" >> "$configFile"
cloud_ForEachInstance recordInstanceIp true fullnodeIpList
@ -440,7 +441,6 @@ EOF
echo "Looking for additional fullnode instances in $zone ..."
cloud_FindInstances "$prefix-$zone-fullnode"
if [[ ${#instances[@]} -gt 0 ]]; then
fetchPrivateKey
cloud_ForEachInstance recordInstanceIp "$failOnValidatorBootupFailure" fullnodeIpList
else
echo "Unable to find additional fullnodes"
@ -697,7 +697,7 @@ info)
ipAddress=${clientIpList[$i]}
ipAddressPrivate=${clientIpListPrivate[$i]}
zone=${clientIpListZone[$i]}
printNode bench-tps "$ipAddress" "$ipAddressPrivate" "$zone"
printNode client "$ipAddress" "$ipAddressPrivate" "$zone"
done
for i in $(seq 0 $(( ${#blockstreamerIpList[@]} - 1)) ); do

View File

@ -121,20 +121,15 @@ cloud_Initialize() {
region=$(__cloud_GetRegion "$zone")
__cloud_SshPrivateKeyCheck
(
set -x
aws ec2 delete-key-pair --region "$region" --key-name "$networkName"
aws ec2 import-key-pair --region "$region" --key-name "$networkName" \
--public-key-material file://"${sshPrivateKey}".pub
)
aws ec2 delete-key-pair --region "$region" --key-name "$networkName"
aws ec2 import-key-pair --region "$region" --key-name "$networkName" \
--public-key-material file://"${sshPrivateKey}".pub
(
set -x
aws ec2 delete-security-group --region "$region" --group-name "$networkName" || true
aws ec2 create-security-group --region "$region" --group-name "$networkName" --description "Created automatically by $0"
rules=$(cat "$(dirname "${BASH_SOURCE[0]}")"/ec2-security-group-config.json)
aws ec2 authorize-security-group-ingress --region "$region" --group-name "$networkName" --cli-input-json "$rules"
)
declare rules
rules=$(cat "$(dirname "${BASH_SOURCE[0]}")"/ec2-security-group-config.json)
aws ec2 delete-security-group --region "$region" --group-name "$networkName" || true
aws ec2 create-security-group --region "$region" --group-name "$networkName" --description "Created automatically by $0"
aws ec2 authorize-security-group-ingress --output table --region "$region" --group-name "$networkName" --cli-input-json "$rules"
}
#
@ -273,7 +268,7 @@ cloud_CreateInstances() {
(
set -x
aws ec2 run-instances "${args[@]}"
aws ec2 run-instances --output table "${args[@]}"
)
if [[ -n $optionalAddress ]]; then
@ -318,7 +313,7 @@ cloud_DeleteInstances() {
region=$(__cloud_GetRegion "$zone")
(
set -x
aws ec2 terminate-instances --region "$region" --instance-ids "$name"
aws ec2 terminate-instances --output table --region "$region" --instance-ids "$name"
)
done