net/ can now deploy Snaps

This commit is contained in:
Michael Vines 2018-09-04 09:21:03 -07:00
parent 2cb1375217
commit bcaa0fdcb1
6 changed files with 152 additions and 90 deletions

View File

@ -169,6 +169,7 @@ delete)
exit 0
fi
gcloud_DeleteInstances
rm -f "$configFile"
;;
create)

View File

@ -17,9 +17,10 @@ usage: $0 [start|stop]
Operate a configured testnet
start - Start the network
sanity - Sanity check the network
stop - Stop the network
start - Start the network
sanity - Sanity check the network
stop - Stop the network
restart - Shortcut for stop then start
start-specific options:
-S snapFilename - Deploy the specified Snap file
@ -29,7 +30,7 @@ Operate a configured testnet
Note: if RUST_LOG is set in the environment it will be propogated into the
network nodes.
sanity-specific options:
sanity/start-specific options:
-o noLedgerVerify - Skip ledger verification
-o noValidatorSanity - Skip validatory sanity
@ -49,8 +50,6 @@ sanityExtraArgs=
command=$1
[[ -n $command ]] || usage
shift
[[ $command = start || $command = sanity || $command = stop ]] ||
usage "Invalid command: $command"
while getopts "h?S:s:a:o:" opt; do
case $opt in
@ -58,7 +57,6 @@ while getopts "h?S:s:a:o:" opt; do
usage
;;
S)
[[ $command = start ]] || usage "-s is only valid with the 'start' command"
snapFilename=$OPTARG
[[ -f $snapFilename ]] || usage "Snap not readable: $snapFilename"
deployMethod=snap
@ -67,6 +65,7 @@ while getopts "h?S:s:a:o:" opt; do
case $OPTARG in
edge|beta|stable)
snapChannel=$OPTARG
deployMethod=snap
;;
*)
usage "Invalid snap channel: $OPTARG"
@ -94,6 +93,7 @@ while getopts "h?S:s:a:o:" opt; do
done
loadConfigFile
expectedNodeCount=$((${#validatorIpList[@]} + 1))
build() {
declare MAYBE_DOCKER=
@ -103,8 +103,7 @@ build() {
SECONDS=0
(
cd "$SOLANA_ROOT"
echo "****************"
echo "Build started at $(date)"
echo "--- Build started at $(date)"
set -x
rm -rf farf
@ -121,17 +120,16 @@ common_start_setup() {
set -x
test -d "$SOLANA_ROOT"
ssh "${sshOptions[@]}" "$ipAddress" "mkdir -p ~/solana ~/.cargo/bin"
rsync -vPrz -e "ssh ${sshOptions[*]}" \
rsync -vPr -e "ssh ${sshOptions[*]}" \
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
"$ipAddress":~/solana/
) >> "$logFile"
) >> "$logFile" 2>&1
}
startLeader() {
declare ipAddress=$1
declare logFile="$2"
echo "****************"
echo "Starting leader: $leaderIp"
echo "--- Starting leader: $leaderIp"
common_start_setup "$ipAddress" "$logFile"
@ -141,58 +139,58 @@ startLeader() {
set -x
case $deployMethod in
snap)
rsync -vPrz -e "ssh ${sshOptions[*]}" "$snapFilename" "$ipAddress:~/solana/solana.snap"
rsync -vPr -e "ssh ${sshOptions[*]}" "$snapFilename" "$ipAddress:~/solana/solana.snap"
;;
local)
rsync -vPrz -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/"
rsync -vPr -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/"
;;
*)
usage "Internal error: invalid deployMethod: $deployMethod"
;;
esac
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote_node.sh $deployMethod leader $leaderIp \"$nodeSetupArgs\" \"$RUST_LOG\""
) >> "$logFile"
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote_node.sh $deployMethod leader $leaderIp $expectedNodeCount \"$nodeSetupArgs\" \"$RUST_LOG\""
) >> "$logFile" 2>&1
}
startValidator() {
declare ipAddress=$1
declare logFile="$2"
echo "*******************"
echo "Starting validator: $leaderIp"
common_start_setup "$ipAddress" "$logFile"
echo "--- Starting validator: $leaderIp"
(
common_start_setup "$ipAddress" /dev/stdout
set -x
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote_node.sh $deployMethod validator $leaderIp \"$nodeSetupArgs\" \"$RUST_LOG\""
) >> "$logFile"
ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote_node.sh $deployMethod validator $leaderIp $expectedNodeCount \"$nodeSetupArgs\" \"$RUST_LOG\""
) >> "$netLogDir/validator-$ipAddress.log" 2>&1 &
declare pid=$!
ln -sfT "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
pids+=("$pid")
}
startClient() {
declare ipAddress=$1
declare logFile="$2"
echo "****************"
echo "Starting client: $leaderIp"
echo "--- Starting client: $leaderIp"
common_start_setup "$ipAddress" "$logFile"
declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
(
set -x
ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote_client.sh $deployMethod $leaderIp $expectedNodeCount \"$RUST_LOG\""
) >> "$logFile"
) >> "$logFile" 2>&1
}
sanity() {
declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
echo "--- Sanity"
(
set -x
# shellcheck disable=SC2029 # remote_client.sh are expanded on client side intentionally...
# shellcheck disable=SC2029 # remote_client.sh args are expanded on client side intentionally
ssh "${sshOptions[@]}" "$leaderIp" \
"./solana/net/remote/remote_sanity.sh $deployMethod $leaderIp $expectedNodeCount $sanityExtraArgs"
"./solana/net/remote/remote_sanity.sh $sanityExtraArgs"
)
}
@ -200,13 +198,25 @@ start() {
case $deployMethod in
snap)
if [[ -n $snapChannel ]]; then
rm -f "$SOLANA_ROOT"/solana_*.snap
if [[ $(uname) != Linux ]]; then
echo Error: snap channel deployment only supported in Linux
exit 1
(
set -x
SOLANA_DOCKER_RUN_NOSETUID=1 "$SOLANA_ROOT"/ci/docker-run.sh ubuntu:18.04 bash -c "
set -ex;
apt-get -qq update;
apt-get -qq -y install snapd;
snap download --channel=$snapChannel solana;
"
)
else
snap download --channel="$snapChannel" solana
fi
usage "TODO: the snap download command below is probably wrong..."
snap download --"$snapChannel" solana
snapFilename=solana.snap
snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)"
[[ -r $snapFilename ]] || {
echo "Error: Snap not readable: $snapFilename"
exit 1
}
fi
;;
local)
@ -226,10 +236,21 @@ start() {
leaderDeployTime=$SECONDS
SECONDS=0
pids=()
for ipAddress in "${validatorIpList[@]}"; do
startValidator "$ipAddress" "$netLogDir/validator-$ipAddress.log" &
startValidator "$ipAddress"
done
wait
for pid in "${pids[@]}"; do
declare ok=true
wait "$pid" || ok=false
if ! $ok; then
cat "$netLogDir/validator-$pid.log"
echo ^^^ +++
exit 1
fi
done
validatorDeployTime=$SECONDS
sanity
@ -239,7 +260,6 @@ start() {
startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
done
clientDeployTime=$SECONDS
wait
if [[ $deployMethod = "snap" ]]; then
IFS=\ read -r _ networkVersion _ < <(
@ -264,8 +284,7 @@ start() {
stop_node() {
local ipAddress=$1
echo "**************"
echo "Stopping node: $ipAddress"
echo "--- Stopping node: $ipAddress"
(
set -x
ssh "${sshOptions[@]}" "$ipAddress" "
@ -273,8 +292,8 @@ stop_node() {
if snap list solana; then
sudo snap set solana mode=;
sudo snap remove solana;
fi; \
pkill -9 solana- remote_ oom-monitor;
fi;
for pattern in solana- remote_ oom-monitor; do pkill -9 \$pattern; done;
"
) || true
}
@ -294,10 +313,13 @@ stop() {
}
case $command in
start)
restart)
stop
start
;;
start)
start
;;
sanity)
sanity
;;

25
net/remote/remote_client.sh Executable file → Normal file
View File

@ -1,14 +1,15 @@
#!/bin/bash -e
cd "$(dirname "$0")"/../..
deployMethod="$1"
netEntrypoint="$2"
leaderIp="$2"
numNodes="$3"
RUST_LOG="$4"
[[ -n $deployMethod ]] || exit
[[ -n $netEntrypoint ]] || exit
[[ -n $leaderIp ]] || exit
[[ -n $numNodes ]] || exit
cd "$(dirname "$0")"/../..
source net/common.sh
loadConfigFile
@ -17,14 +18,19 @@ if [[ $threadCount -gt 4 ]]; then
threadCount=4
fi
./script/install-earlyoom.sh
scripts/install-earlyoom.sh
case $deployMethod in
snap)
rsync -vPr "$leaderIp:~/solana/solana.snap" .
sudo snap install solana.snap --devmode --dangerous
rm solana.snap
sudo snap set solana metrics-config="$SOLANA_METRICS_CONFIG" rust-log="$RUST_LOG"
sudo snap set solana "\
leader-ip=$leaderIp \
metrics-config=$SOLANA_METRICS_CONFIG \
rust-log=$RUST_LOG \
"
solana_bench_tps=/snap/bin/solana.bench-tps
;;
local)
@ -32,20 +38,19 @@ local)
export USE_INSTALL=1
export RUST_LOG
rsync -vPrz "$netEntrypoint:~/.cargo/bin/solana*" ~/.cargo/bin/
solana_bench_tps=multinode-demo/client.sh
netEntrypoint="$:~/solana"
rsync -vPr "$leaderIp:~/.cargo/bin/solana*" ~/.cargo/bin/
solana_bench_tps="multinode-demo/client.sh $leaderIp:~/solana"
;;
*)
echo "Unknown deployment method: $deployMethod"
exit 1
esac
./scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
while true; do
echo "=== Client start: $(date)" >> client.log
clientCommand="$solana_bench_tps $netEntrypoint $numNodes --loop -s 600 --sustained -t threadCount"
clientCommand="$solana_bench_tps --num-nodes $numNodes --loop -s 600 --sustained -t threadCount"
echo "$ $clientCommand" >> client.log
$clientCommand >> client.log 2>&1

View File

@ -1,37 +1,51 @@
#!/bin/bash -e
cd "$(dirname "$0")"/../..
deployMethod="$1"
nodeType="$2"
netEntrypoint="$3"
setupArgs="$4"
RUST_LOG="$5"
leaderIp="$3"
numNodes="$4"
setupArgs="$5"
RUST_LOG="$6"
cat > deployConfig <<EOF
deployMethod="$deployMethod"
leaderIp="$leaderIp"
numNodes="$numNodes"
EOF
[[ -n $deployMethod ]] || exit
[[ -n $nodeType ]] || exit
[[ -n $netEntrypoint ]] || exit
[[ -n $leaderIp ]] || exit
cd "$(dirname "$0")"/../..
source net/common.sh
loadConfigFile
./script/install-earlyoom.sh
scripts/install-earlyoom.sh
case $deployMethod in
snap)
SECONDS=0
rsync -vPr "$leaderIp:~/solana/solana.snap" .
sudo snap install solana.snap --devmode --dangerous
rm solana.snap
commonNodeConfig="\
rust-log=$RUST_LOG \
leader-ip=$leaderIp \
metrics-config=$SOLANA_METRICS_CONFIG \
rust-log=$RUST_LOG \
setup-args=$setupArgs \
enable-cuda=1 \
"
if [[ -e /dev/nvidia0 ]]; then
commonNodeConfig="$commonNodeConfig enable-cuda=1"
fi
if [[ $nodeType = leader ]]; then
nodeConfig="mode=leader+drone $commonNodeConfig"
else
nodeConfig="mode=validator leader-address=$netEntrypoint $commonNodeConfig"
nodeConfig="mode=validator $commonNodeConfig"
fi
logmarker="solana deploy $(date)/$RANDOM"
@ -50,11 +64,13 @@ snap)
local)
PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1
export SOLANA_CUDA=1
export RUST_LOG=1
export RUST_LOG
if [[ -e /dev/nvidia0 ]]; then
export SOLANA_CUDA=1
fi
./fetch-perf-libs.sh
./scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
case $nodeType in
leader)
@ -64,11 +80,11 @@ local)
./multinode-demo/leader.sh > leader.log 2>&1 &
;;
validator)
rsync -vPrz "$netEntrypoint:~/.cargo/bin/solana*" ~/.cargo/bin/
rsync -vPr "$leaderIp:~/.cargo/bin/solana*" ~/.cargo/bin/
# shellcheck disable=SC2086 # Don't want to double quote "$setupArgs"
./multinode-demo/setup.sh -t validator -p $setupArgs
./multinode-demo/validator.sh "$netEntrypoint":~/solana "$netEntrypoint" >validator.log 2>&1 &
./multinode-demo/validator.sh "$leaderIp":~/solana "$leaderIp" >validator.log 2>&1 &
;;
*)
echo "Error: unknown node type: $nodeType"
@ -80,4 +96,3 @@ local)
echo "Unknown deployment method: $deployMethod"
exit 1
esac

View File

@ -1,14 +1,25 @@
#!/bin/bash -e
deployMethod="$1"
netEntrypoint="$2"
numNodes="$3"
cd "$(dirname "$0")"/../..
[[ -n $deployMethod ]] || exit
[[ -n $netEntrypoint ]] || exit
[[ -n $numNodes ]] || exit
deployMethod=
leaderIp=
numNodes=
# shellcheck source=/dev/null # deployConfig is written by remote_sanity.sh
source deployConfig
shift 3
[[ -n $deployMethod ]] || {
echo "deployMethod empty"
exit 1
}
[[ -n $leaderIp ]] || {
echo "leaderIp empty"
exit 1
}
[[ -n $numNodes ]] || {
echo "numNodes empty"
exit 1
}
ledgerVerify=true
validatorSanity=true
@ -29,26 +40,25 @@ while [[ $1 = "-o" ]]; do
esac
done
cd "$(dirname "$0")"/../..
source net/common.sh
loadConfigFile
case $deployMethod in
snap)
PATH="/snap/bin:$PATH"
export USE_SNAP=1
solana_bench_tps=/snap/bin/solana.bench-tps
solana_ledger_tool=/snap/bin/solana.ledger-tool
solana_bench_tps=solana.bench-tps
solana_ledger_tool=solana.ledger-tool
ledger=/var/snap/solana/current/config/ledger
;;
local)
PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1
solana_bench_tps=multinode-demo/client.sh
solana_bench_tps="multinode-demo/client.sh $leaderIp:~/solana"
solana_ledger_tool=solana-ledger-tool
ledger=config/ledger
netEntrypoint="$:~/solana"
;;
*)
echo "Unknown deployment method: $deployMethod"
@ -56,19 +66,19 @@ local)
esac
echo "--- $netEntrypoint: wallet sanity"
echo "--- $leaderIp: wallet sanity"
(
set -x
multinode-demo/test/wallet-sanity.sh "$netEntrypoint"
multinode-demo/test/wallet-sanity.sh "$leaderIp"
)
echo "--- $netEntrypoint: node count"
echo "--- $leaderIp: node count"
(
set -x
$solana_bench_tps "$netEntrypoint" "$numNodes" -c
$solana_bench_tps --num-nodes "$numNodes" --converge-only
)
echo "--- $netEntrypoint: verify ledger"
echo "--- $leaderIp: verify ledger"
if $ledgerVerify; then
if [[ -d $ledger ]]; then
(
@ -87,12 +97,12 @@ else
fi
echo "--- $netEntrypoint: validator sanity"
echo "--- $leaderIp: validator sanity"
if $validatorSanity; then
(
./multinode-demo/setup.sh -t validator
set -e pipefail
timeout 10s ./multinode-demo/validator.sh "$netEntrypoint" 2>&1 | tee validator.log
timeout 10s ./multinode-demo/validator.sh "$leaderIp" 2>&1 | tee validator.log
)
wc -l validator.log
if grep -C100 panic validator.log; then

View File

@ -9,3 +9,12 @@ systemctl disable apt-daily.service # disable run when system boot
systemctl disable apt-daily.timer # disable timer run
apt-get --assume-yes install rsync libssl-dev
cat > /etc/rsyncd.conf <<-EOF
[config]
path = /var/snap/solana/current/config
hosts allow = *
read only = true
EOF
systemctl enable rsync
systemctl start rsync