net.sh: Add support for selecting validator GPU mode (#6326)

automerge
This commit is contained in:
Trent Nelson 2019-10-14 10:33:32 -06:00 committed by Grimes
parent 79e32c92c1
commit 82fea9ce73
4 changed files with 58 additions and 2 deletions

View File

@ -8,6 +8,11 @@ here=$(dirname "$0")
# shellcheck source=multinode-demo/common.sh
source "$here"/common.sh
if [[ "$SOLANA_GPU_MISSING" -eq 1 ]]; then
echo "Testnet requires GPUs, but none were found! Aborting..."
exit 1
fi
if [[ -n $SOLANA_CUDA ]]; then
program=$solana_validator_cuda
else

View File

@ -152,6 +152,11 @@ while [[ -n $1 ]]; do
fi
done
if [[ "$SOLANA_GPU_MISSING" -eq 1 ]]; then
echo "Testnet requires GPUs, but none were found! Aborting..."
exit 1
fi
if [[ ${#positional_args[@]} -gt 1 ]]; then
usage "$@"
fi

View File

@ -49,7 +49,13 @@ Operate a configured testnet
This will start 2 bench-tps clients, and supply "--tx_count 25000"
to the bench-tps client.
-n NUM_FULL_NODES - Number of fullnodes to apply command to.
--gpu-mode GPU_MODE - Specify GPU mode to launch validators with (default: $gpuMode).
MODE must be one of
on - GPU *required*, any vendor *
off - No GPU, CPU-only
auto - Use GPU if available, any vendor *
cuda - GPU *required*, Nvidia CUDA only
* Currently, Nvidia CUDA is the only supported GPU vendor
--hashes-per-tick NUM_HASHES|sleep|auto
- Override the default --hashes-per-tick for the cluster
--no-airdrop
@ -130,6 +136,7 @@ maybeSkipLedgerVerify=""
maybeDisableAirdrops=""
debugBuild=false
doBuild=true
gpuMode=auto
command=$1
[[ -n $command ]] || usage
@ -187,6 +194,17 @@ while [[ -n $1 ]]; do
elif [[ $1 = --debug ]]; then
debugBuild=true
shift 1
elif [[ $1 = --gpu-mode ]]; then
gpuMode=$2
case "$gpuMode" in
on|off|auto|cuda)
;;
*)
echo "Unexpected GPU mode: \"$gpuMode\""
exit 1
;;
esac
shift 2
else
usage "Unknown long option: $1"
fi
@ -424,6 +442,7 @@ startBootstrapLeader() {
$numBenchExchangeClients \"$benchExchangeExtraArgs\" \
\"$genesisOptions\" \
\"$maybeNoSnapshot $maybeSkipLedgerVerify $maybeLimitLedgerSize\" \
\"$gpuMode\" \
"
) >> "$logFile" 2>&1 || {
cat "$logFile"
@ -488,6 +507,7 @@ startNode() {
$numBenchExchangeClients \"$benchExchangeExtraArgs\" \
\"$genesisOptions\" \
\"$maybeNoSnapshot $maybeSkipLedgerVerify $maybeLimitLedgerSize\" \
\"$gpuMode\" \
"
) >> "$logFile" 2>&1 &
declare pid=$!

View File

@ -24,6 +24,7 @@ numBenchExchangeClients="${15}"
benchExchangeExtraArgs="${16}"
genesisOptions="${17}"
extraNodeArgs="${18}"
gpuMode="${19:-auto}"
set +x
# Use a very large stake (relative to the default multinode-demo/ stake of 42)
@ -75,6 +76,28 @@ EOF
chmod +x ~/solana/on-reboot
echo "@reboot ~/solana/on-reboot" | crontab -
GPU_CUDA_OK=false
GPU_FAIL_IF_NONE=false
case "$gpuMode" in
on) # GPU *required*, any vendor
GPU_CUDA_OK=true
GPU_FAIL_IF_NONE=true
;;
off) # CPU-only
;;
auto) # Use GPU if installed, any vendor
GPU_CUDA_OK=true
;;
cuda) # GPU *required*, CUDA-only
GPU_CUDA_OK=true
GPU_FAIL_IF_NONE=true
;;
*)
echo "Unexpected gpuMode: \"$gpuMode\""
exit 1
;;
esac
waitForNodeToInit() {
echo "--- waiting for node to boot up"
SECONDS=
@ -113,9 +136,12 @@ cat >> ~/solana/on-reboot <<EOF
scripts/net-stats.sh > net-stats.log 2>&1 &
echo \$! > net-stats.pid
if [[ -e /dev/nvidia0 ]]; then
if ${GPU_CUDA_OK} && [[ -e /dev/nvidia0 ]]; then
echo Selecting solana-validator-cuda
export SOLANA_CUDA=1
elif ${GPU_FAIL_IF_NONE} ; then
echo "Expected GPU, found none!"
export SOLANA_GPU_MISSING=1
fi
EOF