Use a common solana user on all testnet instances
This commit is contained in:
parent
7029e4395c
commit
ebcac3c2d1
|
@ -7,8 +7,12 @@
|
|||
# shellcheck disable=2034
|
||||
#
|
||||
|
||||
netConfigDir="$(dirname "${BASH_SOURCE[0]}")"/config
|
||||
netLogDir="$(dirname "${BASH_SOURCE[0]}")"/log
|
||||
netDir=$(
|
||||
cd "$(dirname "${BASH_SOURCE[0]}")" || exit
|
||||
echo "$PWD"
|
||||
)
|
||||
netConfigDir="$netDir"/config
|
||||
netLogDir="$netDir"/log
|
||||
mkdir -p "$netConfigDir" "$netLogDir"
|
||||
|
||||
# shellcheck source=scripts/configure-metrics.sh
|
||||
|
@ -21,7 +25,6 @@ publicNetwork=
|
|||
leaderIp=
|
||||
netBasename=
|
||||
sshPrivateKey=
|
||||
sshUsername=
|
||||
clientIpList=()
|
||||
sshOptions=()
|
||||
validatorIpList=()
|
||||
|
@ -31,9 +34,10 @@ buildSshOptions() {
|
|||
-o "BatchMode=yes"
|
||||
-o "StrictHostKeyChecking=no"
|
||||
-o "UserKnownHostsFile=/dev/null"
|
||||
-o "User=$sshUsername"
|
||||
-o "User=solana"
|
||||
-o "IdentityFile=$sshPrivateKey"
|
||||
-o "LogLevel=ERROR"
|
||||
-F /dev/null
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -47,7 +51,6 @@ loadConfigFile() {
|
|||
[[ -n "$leaderIp" ]] || usage "Config file invalid, leaderIp unspecified: $configFile"
|
||||
[[ -n "$netBasename" ]] || usage "Config file invalid, netBasename unspecified: $configFile"
|
||||
[[ -n $sshPrivateKey ]] || usage "Config file invalid, sshPrivateKey unspecified: $configFile"
|
||||
[[ -n $sshUsername ]] || usage "Config file invalid, sshUsername unspecified: $configFile"
|
||||
[[ ${#validatorIpList[@]} -gt 0 ]] || usage "Config file invalid, validatorIpList unspecified: $configFile"
|
||||
|
||||
buildSshOptions
|
||||
|
|
88
net/gce.sh
88
net/gce.sh
|
@ -106,6 +106,7 @@ done
|
|||
shift $((OPTIND - 1))
|
||||
|
||||
[[ -z $1 ]] || usage "Unexpected argument: $1"
|
||||
sshPrivateKey="$netConfigDir/id_$prefix"
|
||||
|
||||
prepareInstancesAndWriteConfigFile() {
|
||||
$metricsWriteDatapoint "testnet-deploy net-config-begin=1"
|
||||
|
@ -114,15 +115,10 @@ prepareInstancesAndWriteConfigFile() {
|
|||
# autogenerated at $(date)
|
||||
netBasename=$prefix
|
||||
publicNetwork=$publicNetwork
|
||||
sshPrivateKey=$sshPrivateKey
|
||||
EOF
|
||||
|
||||
declare sshPrivateKey="$netConfigDir/id_$prefix"
|
||||
rm -rf "$sshPrivateKey"{,.pub}
|
||||
(
|
||||
set -x
|
||||
ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
|
||||
)
|
||||
echo "sshPrivateKey=$sshPrivateKey" >> "$configFile"
|
||||
buildSshOptions
|
||||
|
||||
recordInstanceIp() {
|
||||
declare name="$1"
|
||||
|
@ -141,38 +137,79 @@ EOF
|
|||
fi
|
||||
}
|
||||
|
||||
waitForStartupComplete() {
|
||||
declare name="$1"
|
||||
declare publicIp="$3"
|
||||
|
||||
echo "Waiting for $name to finish booting..."
|
||||
(
|
||||
for i in $(seq 1 30); do
|
||||
if (set -x; ssh "${sshOptions[@]}" "$publicIp" "test -f /.gce-startup-complete"); then
|
||||
break
|
||||
fi
|
||||
sleep 2
|
||||
echo "Retry $i..."
|
||||
done
|
||||
)
|
||||
}
|
||||
|
||||
echo "Looking for leader instance..."
|
||||
gcloud_FindInstances "name=$prefix-leader" show
|
||||
[[ ${#instances[@]} -eq 1 ]] || {
|
||||
echo "Unable to start leader"
|
||||
echo "Unable to find leader"
|
||||
exit 1
|
||||
}
|
||||
gcloud_FigureRemoteUsername "${instances[0]}"
|
||||
sshUsername=$gcloud_username
|
||||
echo "sshUsername=$sshUsername" >> "$configFile"
|
||||
buildSshOptions
|
||||
|
||||
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
|
||||
echo "Fetching $sshPrivateKey from $leaderName"
|
||||
(
|
||||
rm -rf "$sshPrivateKey"{,pub}
|
||||
|
||||
declare leaderName
|
||||
declare leaderZone
|
||||
declare leaderIp
|
||||
IFS=: read -r leaderName leaderZone leaderIp _ < <(echo "${instances[0]}")
|
||||
|
||||
set -x
|
||||
|
||||
# Try to ping the machine first. There can be a delay between when the
|
||||
# instance is reported as RUNNING and when it's reachable over the network
|
||||
timeout 30s bash -c "set -o pipefail; until ping -c 3 $leaderIp | tr - _; do echo .; done"
|
||||
|
||||
# Try to scp in a couple times, sshd may not yet be up even though the
|
||||
# machine can be pinged...
|
||||
set -o pipefail
|
||||
for i in $(seq 1 10); do
|
||||
if gcloud compute scp --zone "$leaderZone" \
|
||||
"$leaderName:/solana-id_ecdsa" "$sshPrivateKey"; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
echo "Retry $i..."
|
||||
done
|
||||
|
||||
chmod 400 "$sshPrivateKey"
|
||||
)
|
||||
|
||||
echo "leaderIp=()" >> "$configFile"
|
||||
gcloud_ForEachInstance recordInstanceIp leaderIp
|
||||
gcloud_ForEachInstance waitForStartupComplete
|
||||
|
||||
echo "Looking for validator instances..."
|
||||
gcloud_FindInstances "name~^$prefix-validator" show
|
||||
[[ ${#instances[@]} -gt 0 ]] || {
|
||||
echo "Unable to start validators"
|
||||
echo "Unable to find validators"
|
||||
exit 1
|
||||
}
|
||||
echo "validatorIpList=()" >> "$configFile"
|
||||
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
|
||||
gcloud_ForEachInstance recordInstanceIp validatorIpList
|
||||
gcloud_ForEachInstance waitForStartupComplete
|
||||
|
||||
echo "clientIpList=()" >> "$configFile"
|
||||
echo "Looking for client instances..."
|
||||
gcloud_FindInstances "name~^$prefix-client" show
|
||||
[[ ${#instances[@]} -eq 0 ]] || {
|
||||
gcloud_PrepInstancesForSsh "$gcloud_username" "$sshPrivateKey"
|
||||
gcloud_ForEachInstance recordInstanceIp clientIpList
|
||||
gcloud_ForEachInstance waitForStartupComplete
|
||||
}
|
||||
|
||||
echo "Wrote $configFile"
|
||||
|
@ -206,6 +243,9 @@ create)
|
|||
|
||||
$metricsWriteDatapoint "testnet-deploy net-create-begin=1"
|
||||
|
||||
rm -rf "$sshPrivateKey"{,.pub}
|
||||
ssh-keygen -t ecdsa -N '' -f "$sshPrivateKey"
|
||||
|
||||
printNetworkInfo() {
|
||||
cat <<EOF
|
||||
========================================================================================
|
||||
|
@ -233,15 +273,29 @@ cat > /etc/motd <<EOM
|
|||
See "startup-script" log messages in /var/log/syslog for status:
|
||||
$ sudo cat /var/log/syslog | grep startup-script
|
||||
|
||||
To block until setup is complete, run:
|
||||
$ until [[ -f /.gce-startup-complete ]]; do sleep 1; done
|
||||
|
||||
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
EOM
|
||||
|
||||
# Place the generated private key at /solana-id_ecdsa so it's retrievable by anybody
|
||||
# who is able to log into this machine
|
||||
cat > /solana-id_ecdsa <<EOK
|
||||
$(cat "$sshPrivateKey")
|
||||
EOK
|
||||
cat > /solana-id_ecdsa.pub <<EOK
|
||||
$(cat "$sshPrivateKey.pub")
|
||||
EOK
|
||||
chmod 444 /solana-id_ecdsa
|
||||
|
||||
USER=\$(id -un)
|
||||
|
||||
$(
|
||||
cd "$here"/scripts/
|
||||
cat \
|
||||
disable-background-upgrades.sh \
|
||||
create-solana-user.sh \
|
||||
install-earlyoom.sh \
|
||||
install-rsync.sh \
|
||||
install-libssl-compatability.sh \
|
||||
|
@ -251,6 +305,8 @@ cat > /etc/motd <<EOM
|
|||
$(printNetworkInfo)
|
||||
EOM
|
||||
|
||||
touch /.gce-startup-complete
|
||||
|
||||
EOF
|
||||
|
||||
gcloud_CreateInstances "$prefix-leader" 1 "$zone" \
|
||||
|
|
16
net/net.sh
16
net/net.sh
|
@ -116,16 +116,7 @@ build() {
|
|||
startCommon() {
|
||||
declare ipAddress=$1
|
||||
test -d "$SOLANA_ROOT"
|
||||
ssh "${sshOptions[@]}" "$ipAddress" "
|
||||
mkdir -p ~/solana ~/.cargo/bin
|
||||
|
||||
# Help other users of the machine locate network logs
|
||||
[[ -d /tmp/solana/ ]] || {
|
||||
mkdir /tmp/solana/
|
||||
chmod go+w /tmp/solana/
|
||||
}
|
||||
ln -sfT ~/solana /tmp/solana/=
|
||||
"
|
||||
ssh "${sshOptions[@]}" "$ipAddress" "mkdir -p ~/solana ~/.cargo/bin"
|
||||
rsync -vPrc -e "ssh ${sshOptions[*]}" \
|
||||
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
|
||||
"$ipAddress":~/solana/
|
||||
|
@ -231,7 +222,10 @@ start() {
|
|||
"
|
||||
)
|
||||
else
|
||||
snap download --channel="$snapChannel" solana
|
||||
(
|
||||
cd "$SOLANA_ROOT"
|
||||
snap download --channel="$snapChannel" solana
|
||||
)
|
||||
fi
|
||||
snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)"
|
||||
[[ -r $snapFilename ]] || {
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
cd "$(dirname "$0")"/../..
|
||||
|
||||
echo "$(date) | $0 $*" > client.log
|
||||
|
||||
deployMethod="$1"
|
||||
entrypointIp="$2"
|
||||
numNodes="$3"
|
||||
|
|
|
@ -0,0 +1,27 @@
|
|||
#!/bin/bash -ex
|
||||
|
||||
[[ $(uname) = Linux ]] || exit 1
|
||||
[[ $USER = root ]] || exit 1
|
||||
|
||||
adduser solana --gecos "" --disabled-password --quiet
|
||||
adduser solana sudo
|
||||
echo "solana ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
|
||||
id solana
|
||||
|
||||
[[ -r /solana-id_ecdsa ]] || exit 1
|
||||
[[ -r /solana-id_ecdsa.pub ]] || exit 1
|
||||
|
||||
sudo -u solana bash -c "
|
||||
mkdir -p /home/solana/.ssh/
|
||||
cd /home/solana/.ssh/
|
||||
cp /solana-id_ecdsa.pub authorized_keys
|
||||
umask 377
|
||||
cp /solana-id_ecdsa id_ecdsa
|
||||
echo \"
|
||||
Host *
|
||||
BatchMode yes
|
||||
IdentityFile ~/.ssh/id_ecdsa
|
||||
StrictHostKeyChecking no
|
||||
\" > config
|
||||
"
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash -ex
|
||||
|
||||
#
|
||||
# Prevent background upgrades that block |apt-get|
|
||||
#
|
||||
# TODO: This approach is pretty uncompromising. An alternative solution that
|
||||
|
@ -18,4 +18,3 @@ while fuser /var/lib/dpkg/lock; do
|
|||
sleep 1
|
||||
done
|
||||
|
||||
|
||||
|
|
|
@ -185,149 +185,3 @@ gcloud_DeleteInstances() {
|
|||
)
|
||||
}
|
||||
|
||||
#
|
||||
# gcloud_FigureRemoteUsername [instanceInfo]
|
||||
#
|
||||
# The remote username when ssh-ing into GCP instances tends to not be the same
|
||||
# as the user's local username, but it needs to be discovered by ssh-ing into an
|
||||
# instance and examining the system.
|
||||
#
|
||||
# On success the gcloud_username global variable is updated
|
||||
#
|
||||
# instanceInfo - an entry from the `instances` array
|
||||
#
|
||||
# example:
|
||||
# gcloud_FigureRemoteUsername "name:zone:..."
|
||||
#
|
||||
gcloud_FigureRemoteUsername() {
|
||||
if [[ -n $gcloud_username ]]; then
|
||||
return
|
||||
fi
|
||||
|
||||
declare instanceInfo="$1"
|
||||
declare name zone publicIp
|
||||
IFS=: read -r name zone publicIp _ < <(echo "$instanceInfo")
|
||||
|
||||
echo "Detecting remote username using $zone in $zone:"
|
||||
|
||||
# Figure the gcp ssh username
|
||||
(
|
||||
set -x
|
||||
|
||||
# Try to ping the machine first. There can be a delay between when the
|
||||
# instance is reported as RUNNING and when it's reachable over the network
|
||||
timeout 30s bash -c "set -o pipefail; until ping -c 3 $publicIp | tr - _; do echo .; done"
|
||||
|
||||
# Try to ssh in a couple times, sshd may not yet be up even though the
|
||||
# machine can be pinged...
|
||||
set -o pipefail
|
||||
for i in $(seq 1 10); do
|
||||
if gcloud compute ssh "$name" \
|
||||
--zone "$zone" -- "echo whoami:\$USER:iamwho" \
|
||||
| tr -d $'\r '| tee /tmp/whoami-$$; then
|
||||
break
|
||||
fi
|
||||
sleep 1
|
||||
echo "Retry $i..."
|
||||
done
|
||||
)
|
||||
while IFS=: read -r whoami gcloud_username iamwho ; do
|
||||
[[ $whoami == "whoami" && $iamwho == "iamwho" ]] && break;
|
||||
done < /tmp/whoami-$$
|
||||
rm -f /tmp/whoami-$$
|
||||
|
||||
if [[ -z $gcloud_username ]]; then
|
||||
echo Unable to figure remote user name
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Remote username: $gcloud_username"
|
||||
}
|
||||
|
||||
#
|
||||
# gcloud_PrepInstancesForSsh [username] [privateKey]
|
||||
#
|
||||
# Prepares all the instances in the `instances` array for ssh with the specified
|
||||
# keypair. This eliminates the need to use the restrictive |gcloud compute ssh|,
|
||||
# use plain |ssh| instead.
|
||||
#
|
||||
# username - gcp ssh username as computed by gcloud_FigureRemoteUsername
|
||||
# privateKey - private key to install on all the instances
|
||||
#
|
||||
gcloud_PrepInstancesForSsh() {
|
||||
declare username="$1"
|
||||
declare privateKey="$2"
|
||||
declare publicKey="$privateKey".pub
|
||||
declare logDir=log/
|
||||
|
||||
mkdir -p $logDir
|
||||
rm -rf $logDir/gcloud_PrepInstancesForSsh-*
|
||||
|
||||
[[ -r $publicKey ]] || {
|
||||
echo "Unable to read public key: $publicKey"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ -r $privateKey ]] || {
|
||||
echo "Unable to read private key: $privateKey"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[[ -d $logDir ]] || {
|
||||
echo "logDir does not exist: $logDir"
|
||||
exit 1
|
||||
}
|
||||
|
||||
declare -a pids
|
||||
for instanceInfo in "${instances[@]}"; do
|
||||
declare name zone publicIp
|
||||
IFS=: read -r name zone publicIp _ < <(echo "$instanceInfo")
|
||||
|
||||
logFile="$logDir/gcloud_PrepInstancesForSsh-$name.log"
|
||||
|
||||
# TODO: This next subshell runs in series because for unknown reason running
|
||||
# multiple |gcloud compute ssh| commands in parallel cause the macOS
|
||||
# terminal to misbehave
|
||||
(
|
||||
set -x
|
||||
|
||||
# Try to ping the machine first. There can be a delay between when the
|
||||
# instance is reported as RUNNING and when it's reachable over the network
|
||||
timeout 60s bash -c "set -o pipefail; until ping -c 3 $publicIp | tr - _; do echo .; done"
|
||||
|
||||
gcloud compute ssh --zone "$zone" "$name" -- "
|
||||
set -x;
|
||||
mkdir -p .ssh;
|
||||
echo \"$(cat "$publicKey")\" >> .ssh/authorized_keys;
|
||||
echo \"
|
||||
Host *
|
||||
BatchMode yes
|
||||
IdentityFile ~/.ssh/id_testnet
|
||||
StrictHostKeyChecking no
|
||||
\" > .ssh/config;
|
||||
"
|
||||
) >> "$logFile" 2>&1
|
||||
(
|
||||
set -x
|
||||
scp \
|
||||
-o StrictHostKeyChecking=no \
|
||||
-o UserKnownHostsFile=/dev/null \
|
||||
-i "$privateKey" \
|
||||
"$privateKey" "$username@$publicIp:.ssh/id_testnet"
|
||||
) >> "$logFile" 2>&1 &
|
||||
declare pid=$!
|
||||
|
||||
ln -sfT "$logFile" "$logDir/gcloud_PrepInstancesForSsh-$pid.log"
|
||||
pids+=("$pid")
|
||||
done
|
||||
|
||||
for pid in "${pids[@]}"; do
|
||||
declare ok=true
|
||||
wait "$pid" || ok=false
|
||||
if ! $ok; then
|
||||
cat "$logDir/gcloud_PrepInstancesForSsh-$pid.log"
|
||||
echo ^^^ +++
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ fi
|
|||
printNode() {
|
||||
declare nodeType=$1
|
||||
declare ip=$2
|
||||
printf " %-25s | For logs run: $0 $ip tail -f /tmp/solana/=/$nodeType.log\n" "$0 $ip"
|
||||
printf " %-25s | For logs run: $0 $ip tail -f solana/$nodeType.log\n" "$0 $ip"
|
||||
}
|
||||
|
||||
echo Leader:
|
||||
|
|
Loading…
Reference in New Issue