From 76413cbfac389051a99cb179787380e71c81e6ad Mon Sep 17 00:00:00 2001 From: Michael Vines Date: Fri, 28 Jun 2019 14:24:44 -0700 Subject: [PATCH] Ensure validator process is killed when fullnode.sh is killed (#4869) automerge --- install/src/command.rs | 3 ++- multinode-demo/fullnode.sh | 24 +++++++++++++++++++----- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/install/src/command.rs b/install/src/command.rs index 0babe37d8a..972cc74052 100644 --- a/install/src/command.rs +++ b/install/src/command.rs @@ -793,7 +793,8 @@ pub fn run( Ok(true) => { // Update successful, kill current process so it will be restart if let Some(ref mut child) = child_option { - println!("Killing program: {:?}", child.kill()); + let id = child.id(); + println!("Killing pid {}: {:?}", id, child.kill()); } } Ok(false) => {} // No update available diff --git a/multinode-demo/fullnode.sh b/multinode-demo/fullnode.sh index f28c5f3c05..6e5dd9abc9 100755 --- a/multinode-demo/fullnode.sh +++ b/multinode-demo/fullnode.sh @@ -402,8 +402,20 @@ new_gensis_block() { set -e PS4="$(basename "$0"): " + pid= -trap '[[ -n $pid ]] && kill "$pid" >/dev/null 2>&1 && wait "$pid"' INT TERM ERR +kill_fullnode() { + if [[ -n $pid ]]; then + declare _pid=$pid + pid= + echo "killing pid $_pid" + kill "$_pid" || true + wait "$_pid" || true + echo "$_pid killed" + fi +} +trap 'kill_fullnode' INT TERM ERR + while true; do if new_gensis_block; then # If the genesis block has changed remove the now stale ledger and vote @@ -464,6 +476,7 @@ while true; do echo "$PS4$program ${args[*]}" $program "${args[@]}" & pid=$! + echo "pid: $pid" oom_score_adj "$pid" 1000 if ((no_restart)); then @@ -517,15 +530,16 @@ while true; do ) || ( echo "Error: failed to rsync ledger" ) - new_gensis_block && break + if new_gensis_block; then + echo "############## New genesis detected, restarting $node_type ##############" + break + fi secs_to_next_genesis_poll=60 fi done - echo "############## New genesis detected, restarting $node_type ##############" - kill "$pid" || true - wait "$pid" || true + kill_fullnode # give the cluster time to come back up ( set -x