From 68bad56e7d1ec95d122eb0ae1f0c6f53716cfb15 Mon Sep 17 00:00:00 2001 From: Michael Vines Date: Fri, 22 Nov 2019 09:44:16 -0700 Subject: [PATCH] Streamline multinode-demo/ restart logic (#7094) * bootstrap-leader.sh will now restart the node automatically by default * Streamline validator restart --- ci/localnet-sanity.sh | 1 + multinode-demo/bootstrap-leader.sh | 51 ++++++++++++++++++++++++++++-- multinode-demo/validator.sh | 20 +++++------- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/ci/localnet-sanity.sh b/ci/localnet-sanity.sh index dc3e187bd..17a2c5db1 100755 --- a/ci/localnet-sanity.sh +++ b/ci/localnet-sanity.sh @@ -75,6 +75,7 @@ source multinode-demo/common.sh nodes=( "multinode-demo/drone.sh" "multinode-demo/bootstrap-leader.sh \ + --no-restart \ --init-complete-file init-complete-node1.log \ --dynamic-port-range 8000-8050" "multinode-demo/validator.sh \ diff --git a/multinode-demo/bootstrap-leader.sh b/multinode-demo/bootstrap-leader.sh index 22baa0c85..75b1e1f86 100755 --- a/multinode-demo/bootstrap-leader.sh +++ b/multinode-demo/bootstrap-leader.sh @@ -19,6 +19,8 @@ else program=$solana_validator fi +no_restart=0 + args=() while [[ -n $1 ]]; do if [[ ${1:0:1} = - ]]; then @@ -43,6 +45,9 @@ while [[ -n $1 ]]; do elif [[ $1 = --log ]]; then args+=("$1" "$2") shift 2 + elif [[ $1 = --no-restart ]]; then + no_restart=1 + shift else echo "Unknown argument: $1" $program --help @@ -87,6 +92,46 @@ args+=( default_arg --gossip-port 8001 default_arg --log - -set -x -# shellcheck disable=SC2086 # Don't want to double quote $program -exec $program "${args[@]}" + + +pid= +kill_node() { + # Note: do not echo anything from this function to ensure $pid is actually + # killed when stdout/stderr are redirected + set +ex + if [[ -n $pid ]]; then + declare _pid=$pid + pid= + kill "$_pid" || true + wait "$_pid" || true + fi +} + +kill_node_and_exit() { + kill_node + exit +} + +trap 'kill_node_and_exit' INT TERM ERR + +while true; do + echo "$program ${args[*]}" + $program "${args[@]}" & + pid=$! + echo "pid: $pid" + + if ((no_restart)); then + wait "$pid" + exit $? + fi + + while true; do + if [[ -z $pid ]] || ! kill -0 "$pid"; then + echo "############## validator exited, restarting ##############" + break + fi + sleep 1 + done + + kill_node +done diff --git a/multinode-demo/validator.sh b/multinode-demo/validator.sh index 4936931d5..a1d4a54d1 100755 --- a/multinode-demo/validator.sh +++ b/multinode-demo/validator.sh @@ -278,14 +278,15 @@ setup_validator_accounts() { return 0 } +rpc_url=$($solana_gossip get-rpc-url --entrypoint "$gossip_entrypoint") + +[[ -r "$identity_keypair_path" ]] || $solana_keygen new -o "$identity_keypair_path" +[[ -r "$voting_keypair_path" ]] || $solana_keygen new -o "$voting_keypair_path" +[[ -r "$storage_keypair_path" ]] || $solana_keygen new -o "$storage_keypair_path" + +setup_validator_accounts "$node_lamports" + while true; do - rpc_url=$($solana_gossip get-rpc-url --entrypoint "$gossip_entrypoint") - - [[ -r "$identity_keypair_path" ]] || $solana_keygen new -o "$identity_keypair_path" - [[ -r "$voting_keypair_path" ]] || $solana_keygen new -o "$voting_keypair_path" - [[ -r "$storage_keypair_path" ]] || $solana_keygen new -o "$storage_keypair_path" - - setup_validator_accounts "$node_lamports" echo "$PS4$program ${args[*]}" $program "${args[@]}" & @@ -306,9 +307,4 @@ while true; do done kill_node - # give the cluster time to come back up - ( - set -x - sleep 60 - ) done