diff --git a/net/gce.sh b/net/gce.sh index 9fee5cab6..df3552487 100755 --- a/net/gce.sh +++ b/net/gce.sh @@ -598,6 +598,11 @@ EOF # autogenerated at $(date) set -ex +if [[ -f /solana-scratch/.instance-startup-complete ]]; then + # Skip on instance reboot + exit 0 +fi + cat > /etc/motd < client.log - deployMethod="$1" entrypointIp="$2" clientToRun="$3" @@ -49,17 +47,6 @@ skip) exit 1 esac -( - sudo SOLANA_METRICS_CONFIG="$SOLANA_METRICS_CONFIG" scripts/oom-monitor.sh -) > oom-monitor.log 2>&1 & -echo $! > oom-monitor.pid -scripts/fd-monitor.sh > fd-monitor.log 2>&1 & -echo $! > fd-monitor.pid -scripts/net-stats.sh > net-stats.log 2>&1 & -echo $! > net-stats.pid - -! tmux list-sessions || tmux kill-session - case $clientToRun in solana-bench-tps) net/scripts/rsync-retry.sh -vPrc \ @@ -97,6 +84,26 @@ solana-bench-exchange) exit 1 esac + +cat > ~/solana/on-reboot <> client.log + +( + sudo SOLANA_METRICS_CONFIG="$SOLANA_METRICS_CONFIG" scripts/oom-monitor.sh +) > oom-monitor.log 2>&1 & +echo $! > oom-monitor.pid +scripts/fd-monitor.sh > fd-monitor.log 2>&1 & +echo $! > fd-monitor.pid +scripts/net-stats.sh > net-stats.log 2>&1 & +echo $! > net-stats.pid +! tmux list-sessions || tmux kill-session + tmux new -s "$clientToRun" -d " while true; do echo === Client start: \$(date) | tee -a client.log @@ -106,5 +113,11 @@ tmux new -s "$clientToRun" -d " $metricsWriteDatapoint 'testnet-deploy client-complete=1' done " +EOF +chmod +x ~/solana/on-reboot +echo "@reboot ~/solana/on-reboot" | crontab - + +~/solana/on-reboot + sleep 1 tmux capture-pane -t "$clientToRun" -p -S -100 diff --git a/net/remote/remote-node.sh b/net/remote/remote-node.sh index 63b6e40a1..0222847d1 100755 --- a/net/remote/remote-node.sh +++ b/net/remote/remote-node.sh @@ -59,11 +59,19 @@ genesisOptions="$genesisOptions" airdropsEnabled=$airdropsEnabled EOF -source scripts/oom-score-adj.sh source net/common.sh loadConfigFile initCompleteFile=init-complete-node.log + +cat > ~/solana/on-reboot <> ~/solana/on-reboot < net-stats.log 2>&1 & echo $! > net-stats.pid + if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-validator-cuda ]]; then + echo Selecting solana-validator-cuda + export SOLANA_CUDA=1 + fi +EOF + case $nodeType in bootstrap-leader) - if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-validator-cuda ]]; then - echo Selecting solana-validator-cuda - export SOLANA_CUDA=1 - fi set -x if [[ $skipSetup != true ]]; then rm -rf ./solana-node-keys @@ -175,16 +192,24 @@ EOF ) if [[ $airdropsEnabled = true ]]; then +cat >> ~/solana/on-reboot < drone.log 2>&1 & +EOF fi # shellcheck disable=SC2206 # Don't want to double quote $extraNodeArgs args+=($extraNodeArgs) - nohup ./multinode-demo/bootstrap-leader.sh "${args[@]}" > fullnode.log 2>&1 & - pid=$! - oom_score_adj "$pid" 1000 + +cat >> ~/solana/on-reboot < fullnode.log 2>&1 & + pid=\$! + oom_score_adj "\$pid" 1000 + disown +EOF + ~/solana/on-reboot waitForNodeToInit - solana --url http://"$entrypointIp":8899 -k ~/solana/fullnode-identity.json \ + solana --url http://"$entrypointIp":8899 \ + --keypair ~/solana/config/bootstrap-leader/identity-keypair.json \ validator-info publish "$(hostname)" -n team/solana --force || true ;; validator|blockstreamer) @@ -197,11 +222,6 @@ EOF "$entrypointIp":~/solana/solana-node-keys/"$nodeIndex" ~/solana/fullnode-identity.json fi - if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-validator-cuda ]]; then - echo Selecting solana-validator-cuda - export SOLANA_CUDA=1 - fi - args=( --entrypoint "$entrypointIp:8001" --gossip-port 8001 @@ -240,7 +260,9 @@ EOF # a location that somebody would expect to be able to airdrop from scp "$entrypointIp":~/solana/config/mint-keypair.json config/ if [[ $airdropsEnabled = true ]]; then +cat >> ~/solana/on-reboot < drone.log 2>&1 & +EOF fi # Grab the TLS cert generated by /certbot-restore.sh @@ -249,30 +271,39 @@ EOF ls -l .cert.pem .key.pem fi - export BLOCKEXPLORER_GEOIP_WHITELIST=$PWD/net/config/geoip.yml npm install @solana/blockexplorer@1 - npx solana-blockexplorer > blockexplorer.log 2>&1 & - # Confirm the blockexplorer is accessible - curl --head --retry 3 --retry-connrefused http://localhost:5000/ +cat >> ~/solana/on-reboot < blockexplorer.log 2>&1 & # Redirect port 80 to port 5000 sudo iptables -A INPUT -p tcp --dport 80 -j ACCEPT sudo iptables -A INPUT -p tcp --dport 5000 -j ACCEPT sudo iptables -A PREROUTING -t nat -p tcp --dport 80 -j REDIRECT --to-port 5000 - - # Confirm the blockexplorer is now globally accessible - curl --head "$(curl ifconfig.io)" +EOF fi args+=(--init-complete-file "$initCompleteFile") # shellcheck disable=SC2206 # Don't want to double quote $extraNodeArgs args+=($extraNodeArgs) - nohup ./multinode-demo/validator.sh "${args[@]}" > fullnode.log 2>&1 & - pid=$! - oom_score_adj "$pid" 1000 +cat >> ~/solana/on-reboot < fullnode.log 2>&1 & + pid=\$! + oom_score_adj "\$pid" 1000 + disown +EOF + ~/solana/on-reboot waitForNodeToInit + if [[ $nodeType = blockstreamer ]]; then + # Confirm the blockexplorer is accessible + curl --head --retry 3 --retry-connrefused http://localhost:5000/ + + # Confirm the blockexplorer is now globally accessible + curl --head "$(curl ifconfig.io)" + fi + if [[ $skipSetup != true && $nodeType != blockstreamer ]]; then args=( --url http://"$entrypointIp":8899 @@ -289,7 +320,8 @@ EOF ./multinode-demo/delegate-stake.sh "${args[@]}" fi - solana --url http://"$entrypointIp":8899 -k ~/solana/fullnode-identity.json \ + solana --url http://"$entrypointIp":8899 \ + --keypair ~/solana/fullnode-identity.json \ validator-info publish "$(hostname)" -n team/solana --force || true ;; replicator) @@ -308,9 +340,13 @@ EOF exit 1 fi - nohup ./multinode-demo/replicator.sh "${args[@]}" > fullnode.log 2>&1 & - pid=$! - oom_score_adj "$pid" 1000 +cat >> ~/solana/on-reboot < fullnode.log 2>&1 & + pid=\$! + oom_score_adj "\$pid" 1000 + disown +EOF + ~/solana/on-reboot sleep 1 ;; *) @@ -318,9 +354,9 @@ EOF exit 1 ;; esac - disown ;; *) echo "Unknown deployment method: $deployMethod" exit 1 esac + diff --git a/net/scripts/gce-provider.sh b/net/scripts/gce-provider.sh index 1621dcd74..e52264db8 100755 --- a/net/scripts/gce-provider.sh +++ b/net/scripts/gce-provider.sh @@ -167,8 +167,6 @@ cloud_CreateInstances() { --tags testnet --metadata "testnet=$networkName" --image "$imageName" - --maintenance-policy TERMINATE - --no-restart-on-failure ) # shellcheck disable=SC2206 # Do not want to quote $imageName as it may contain extra args