Streamline multinode-demo/ restart logic (#7094)

* bootstrap-leader.sh will now restart the node automatically by default
* Streamline validator restart
This commit is contained in:
Michael Vines 2019-11-22 09:44:16 -07:00 committed by GitHub
parent ef55c15537
commit 68bad56e7d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 57 additions and 15 deletions

View File

@ -75,6 +75,7 @@ source multinode-demo/common.sh
nodes=(
"multinode-demo/drone.sh"
"multinode-demo/bootstrap-leader.sh \
--no-restart \
--init-complete-file init-complete-node1.log \
--dynamic-port-range 8000-8050"
"multinode-demo/validator.sh \

View File

@ -19,6 +19,8 @@ else
program=$solana_validator
fi
no_restart=0
args=()
while [[ -n $1 ]]; do
if [[ ${1:0:1} = - ]]; then
@ -43,6 +45,9 @@ while [[ -n $1 ]]; do
elif [[ $1 = --log ]]; then
args+=("$1" "$2")
shift 2
elif [[ $1 = --no-restart ]]; then
no_restart=1
shift
else
echo "Unknown argument: $1"
$program --help
@ -87,6 +92,46 @@ args+=(
default_arg --gossip-port 8001
default_arg --log -
set -x
# shellcheck disable=SC2086 # Don't want to double quote $program
exec $program "${args[@]}"
pid=
kill_node() {
# Note: do not echo anything from this function to ensure $pid is actually
# killed when stdout/stderr are redirected
set +ex
if [[ -n $pid ]]; then
declare _pid=$pid
pid=
kill "$_pid" || true
wait "$_pid" || true
fi
}
kill_node_and_exit() {
kill_node
exit
}
trap 'kill_node_and_exit' INT TERM ERR
while true; do
echo "$program ${args[*]}"
$program "${args[@]}" &
pid=$!
echo "pid: $pid"
if ((no_restart)); then
wait "$pid"
exit $?
fi
while true; do
if [[ -z $pid ]] || ! kill -0 "$pid"; then
echo "############## validator exited, restarting ##############"
break
fi
sleep 1
done
kill_node
done

View File

@ -278,14 +278,15 @@ setup_validator_accounts() {
return 0
}
rpc_url=$($solana_gossip get-rpc-url --entrypoint "$gossip_entrypoint")
[[ -r "$identity_keypair_path" ]] || $solana_keygen new -o "$identity_keypair_path"
[[ -r "$voting_keypair_path" ]] || $solana_keygen new -o "$voting_keypair_path"
[[ -r "$storage_keypair_path" ]] || $solana_keygen new -o "$storage_keypair_path"
setup_validator_accounts "$node_lamports"
while true; do
rpc_url=$($solana_gossip get-rpc-url --entrypoint "$gossip_entrypoint")
[[ -r "$identity_keypair_path" ]] || $solana_keygen new -o "$identity_keypair_path"
[[ -r "$voting_keypair_path" ]] || $solana_keygen new -o "$voting_keypair_path"
[[ -r "$storage_keypair_path" ]] || $solana_keygen new -o "$storage_keypair_path"
setup_validator_accounts "$node_lamports"
echo "$PS4$program ${args[*]}"
$program "${args[@]}" &
@ -306,9 +307,4 @@ while true; do
done
kill_node
# give the cluster time to come back up
(
set -x
sleep 60
)
done