net/ can now deploy Snaps

This commit is contained in:
Michael Vines 2018-09-04 09:21:03 -07:00
parent 2cb1375217
commit bcaa0fdcb1
6 changed files with 152 additions and 90 deletions

View File

@ -169,6 +169,7 @@ delete)
exit 0 exit 0
fi fi
gcloud_DeleteInstances gcloud_DeleteInstances
rm -f "$configFile"
;; ;;
create) create)

View File

@ -17,9 +17,10 @@ usage: $0 [start|stop]
Operate a configured testnet Operate a configured testnet
start - Start the network start - Start the network
sanity - Sanity check the network sanity - Sanity check the network
stop - Stop the network stop - Stop the network
restart - Shortcut for stop then start
start-specific options: start-specific options:
-S snapFilename - Deploy the specified Snap file -S snapFilename - Deploy the specified Snap file
@ -29,7 +30,7 @@ Operate a configured testnet
Note: if RUST_LOG is set in the environment it will be propogated into the Note: if RUST_LOG is set in the environment it will be propogated into the
network nodes. network nodes.
sanity-specific options: sanity/start-specific options:
-o noLedgerVerify - Skip ledger verification -o noLedgerVerify - Skip ledger verification
-o noValidatorSanity - Skip validatory sanity -o noValidatorSanity - Skip validatory sanity
@ -49,8 +50,6 @@ sanityExtraArgs=
command=$1 command=$1
[[ -n $command ]] || usage [[ -n $command ]] || usage
shift shift
[[ $command = start || $command = sanity || $command = stop ]] ||
usage "Invalid command: $command"
while getopts "h?S:s:a:o:" opt; do while getopts "h?S:s:a:o:" opt; do
case $opt in case $opt in
@ -58,7 +57,6 @@ while getopts "h?S:s:a:o:" opt; do
usage usage
;; ;;
S) S)
[[ $command = start ]] || usage "-s is only valid with the 'start' command"
snapFilename=$OPTARG snapFilename=$OPTARG
[[ -f $snapFilename ]] || usage "Snap not readable: $snapFilename" [[ -f $snapFilename ]] || usage "Snap not readable: $snapFilename"
deployMethod=snap deployMethod=snap
@ -67,6 +65,7 @@ while getopts "h?S:s:a:o:" opt; do
case $OPTARG in case $OPTARG in
edge|beta|stable) edge|beta|stable)
snapChannel=$OPTARG snapChannel=$OPTARG
deployMethod=snap
;; ;;
*) *)
usage "Invalid snap channel: $OPTARG" usage "Invalid snap channel: $OPTARG"
@ -94,6 +93,7 @@ while getopts "h?S:s:a:o:" opt; do
done done
loadConfigFile loadConfigFile
expectedNodeCount=$((${#validatorIpList[@]} + 1))
build() { build() {
declare MAYBE_DOCKER= declare MAYBE_DOCKER=
@ -103,8 +103,7 @@ build() {
SECONDS=0 SECONDS=0
( (
cd "$SOLANA_ROOT" cd "$SOLANA_ROOT"
echo "****************" echo "--- Build started at $(date)"
echo "Build started at $(date)"
set -x set -x
rm -rf farf rm -rf farf
@ -121,17 +120,16 @@ common_start_setup() {
set -x set -x
test -d "$SOLANA_ROOT" test -d "$SOLANA_ROOT"
ssh "${sshOptions[@]}" "$ipAddress" "mkdir -p ~/solana ~/.cargo/bin" ssh "${sshOptions[@]}" "$ipAddress" "mkdir -p ~/solana ~/.cargo/bin"
rsync -vPrz -e "ssh ${sshOptions[*]}" \ rsync -vPr -e "ssh ${sshOptions[*]}" \
"$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \ "$SOLANA_ROOT"/{fetch-perf-libs.sh,scripts,net,multinode-demo} \
"$ipAddress":~/solana/ "$ipAddress":~/solana/
) >> "$logFile" ) >> "$logFile" 2>&1
} }
startLeader() { startLeader() {
declare ipAddress=$1 declare ipAddress=$1
declare logFile="$2" declare logFile="$2"
echo "****************" echo "--- Starting leader: $leaderIp"
echo "Starting leader: $leaderIp"
common_start_setup "$ipAddress" "$logFile" common_start_setup "$ipAddress" "$logFile"
@ -141,58 +139,58 @@ startLeader() {
set -x set -x
case $deployMethod in case $deployMethod in
snap) snap)
rsync -vPrz -e "ssh ${sshOptions[*]}" "$snapFilename" "$ipAddress:~/solana/solana.snap" rsync -vPr -e "ssh ${sshOptions[*]}" "$snapFilename" "$ipAddress:~/solana/solana.snap"
;; ;;
local) local)
rsync -vPrz -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/" rsync -vPr -e "ssh ${sshOptions[*]}" "$SOLANA_ROOT"/farf/bin/* "$ipAddress:~/.cargo/bin/"
;; ;;
*) *)
usage "Internal error: invalid deployMethod: $deployMethod" usage "Internal error: invalid deployMethod: $deployMethod"
;; ;;
esac esac
ssh "${sshOptions[@]}" -f "$ipAddress" \ ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote_node.sh $deployMethod leader $leaderIp \"$nodeSetupArgs\" \"$RUST_LOG\"" "./solana/net/remote/remote_node.sh $deployMethod leader $leaderIp $expectedNodeCount \"$nodeSetupArgs\" \"$RUST_LOG\""
) >> "$logFile" ) >> "$logFile" 2>&1
} }
startValidator() { startValidator() {
declare ipAddress=$1 declare ipAddress=$1
declare logFile="$2" declare logFile="$2"
echo "*******************"
echo "Starting validator: $leaderIp"
common_start_setup "$ipAddress" "$logFile"
echo "--- Starting validator: $leaderIp"
( (
common_start_setup "$ipAddress" /dev/stdout
set -x set -x
ssh "${sshOptions[@]}" -f "$ipAddress" \ ssh "${sshOptions[@]}" -n "$ipAddress" \
"./solana/net/remote/remote_node.sh $deployMethod validator $leaderIp \"$nodeSetupArgs\" \"$RUST_LOG\"" "./solana/net/remote/remote_node.sh $deployMethod validator $leaderIp $expectedNodeCount \"$nodeSetupArgs\" \"$RUST_LOG\""
) >> "$logFile" ) >> "$netLogDir/validator-$ipAddress.log" 2>&1 &
declare pid=$!
ln -sfT "validator-$ipAddress.log" "$netLogDir/validator-$pid.log"
pids+=("$pid")
} }
startClient() { startClient() {
declare ipAddress=$1 declare ipAddress=$1
declare logFile="$2" declare logFile="$2"
echo "****************" echo "--- Starting client: $leaderIp"
echo "Starting client: $leaderIp"
common_start_setup "$ipAddress" "$logFile" common_start_setup "$ipAddress" "$logFile"
declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
( (
set -x set -x
ssh "${sshOptions[@]}" -f "$ipAddress" \ ssh "${sshOptions[@]}" -f "$ipAddress" \
"./solana/net/remote/remote_client.sh $deployMethod $leaderIp $expectedNodeCount \"$RUST_LOG\"" "./solana/net/remote/remote_client.sh $deployMethod $leaderIp $expectedNodeCount \"$RUST_LOG\""
) >> "$logFile" ) >> "$logFile" 2>&1
} }
sanity() { sanity() {
declare expectedNodeCount=$((${#validatorIpList[@]} + 1)) declare expectedNodeCount=$((${#validatorIpList[@]} + 1))
echo "--- Sanity"
( (
set -x set -x
# shellcheck disable=SC2029 # remote_client.sh are expanded on client side intentionally... # shellcheck disable=SC2029 # remote_client.sh args are expanded on client side intentionally
ssh "${sshOptions[@]}" "$leaderIp" \ ssh "${sshOptions[@]}" "$leaderIp" \
"./solana/net/remote/remote_sanity.sh $deployMethod $leaderIp $expectedNodeCount $sanityExtraArgs" "./solana/net/remote/remote_sanity.sh $sanityExtraArgs"
) )
} }
@ -200,13 +198,25 @@ start() {
case $deployMethod in case $deployMethod in
snap) snap)
if [[ -n $snapChannel ]]; then if [[ -n $snapChannel ]]; then
rm -f "$SOLANA_ROOT"/solana_*.snap
if [[ $(uname) != Linux ]]; then if [[ $(uname) != Linux ]]; then
echo Error: snap channel deployment only supported in Linux (
exit 1 set -x
SOLANA_DOCKER_RUN_NOSETUID=1 "$SOLANA_ROOT"/ci/docker-run.sh ubuntu:18.04 bash -c "
set -ex;
apt-get -qq update;
apt-get -qq -y install snapd;
snap download --channel=$snapChannel solana;
"
)
else
snap download --channel="$snapChannel" solana
fi fi
usage "TODO: the snap download command below is probably wrong..." snapFilename="$(echo "$SOLANA_ROOT"/solana_*.snap)"
snap download --"$snapChannel" solana [[ -r $snapFilename ]] || {
snapFilename=solana.snap echo "Error: Snap not readable: $snapFilename"
exit 1
}
fi fi
;; ;;
local) local)
@ -226,10 +236,21 @@ start() {
leaderDeployTime=$SECONDS leaderDeployTime=$SECONDS
SECONDS=0 SECONDS=0
pids=()
for ipAddress in "${validatorIpList[@]}"; do for ipAddress in "${validatorIpList[@]}"; do
startValidator "$ipAddress" "$netLogDir/validator-$ipAddress.log" & startValidator "$ipAddress"
done done
wait
for pid in "${pids[@]}"; do
declare ok=true
wait "$pid" || ok=false
if ! $ok; then
cat "$netLogDir/validator-$pid.log"
echo ^^^ +++
exit 1
fi
done
validatorDeployTime=$SECONDS validatorDeployTime=$SECONDS
sanity sanity
@ -239,7 +260,6 @@ start() {
startClient "$ipAddress" "$netLogDir/client-$ipAddress.log" startClient "$ipAddress" "$netLogDir/client-$ipAddress.log"
done done
clientDeployTime=$SECONDS clientDeployTime=$SECONDS
wait
if [[ $deployMethod = "snap" ]]; then if [[ $deployMethod = "snap" ]]; then
IFS=\ read -r _ networkVersion _ < <( IFS=\ read -r _ networkVersion _ < <(
@ -264,8 +284,7 @@ start() {
stop_node() { stop_node() {
local ipAddress=$1 local ipAddress=$1
echo "**************" echo "--- Stopping node: $ipAddress"
echo "Stopping node: $ipAddress"
( (
set -x set -x
ssh "${sshOptions[@]}" "$ipAddress" " ssh "${sshOptions[@]}" "$ipAddress" "
@ -273,8 +292,8 @@ stop_node() {
if snap list solana; then if snap list solana; then
sudo snap set solana mode=; sudo snap set solana mode=;
sudo snap remove solana; sudo snap remove solana;
fi; \ fi;
pkill -9 solana- remote_ oom-monitor; for pattern in solana- remote_ oom-monitor; do pkill -9 \$pattern; done;
" "
) || true ) || true
} }
@ -294,10 +313,13 @@ stop() {
} }
case $command in case $command in
start) restart)
stop stop
start start
;; ;;
start)
start
;;
sanity) sanity)
sanity sanity
;; ;;

25
net/remote/remote_client.sh Executable file → Normal file
View File

@ -1,14 +1,15 @@
#!/bin/bash -e #!/bin/bash -e
cd "$(dirname "$0")"/../..
deployMethod="$1" deployMethod="$1"
netEntrypoint="$2" leaderIp="$2"
numNodes="$3" numNodes="$3"
RUST_LOG="$4" RUST_LOG="$4"
[[ -n $deployMethod ]] || exit [[ -n $deployMethod ]] || exit
[[ -n $netEntrypoint ]] || exit [[ -n $leaderIp ]] || exit
[[ -n $numNodes ]] || exit [[ -n $numNodes ]] || exit
cd "$(dirname "$0")"/../..
source net/common.sh source net/common.sh
loadConfigFile loadConfigFile
@ -17,14 +18,19 @@ if [[ $threadCount -gt 4 ]]; then
threadCount=4 threadCount=4
fi fi
./script/install-earlyoom.sh scripts/install-earlyoom.sh
case $deployMethod in case $deployMethod in
snap) snap)
rsync -vPr "$leaderIp:~/solana/solana.snap" .
sudo snap install solana.snap --devmode --dangerous sudo snap install solana.snap --devmode --dangerous
rm solana.snap rm solana.snap
sudo snap set solana metrics-config="$SOLANA_METRICS_CONFIG" rust-log="$RUST_LOG" sudo snap set solana "\
leader-ip=$leaderIp \
metrics-config=$SOLANA_METRICS_CONFIG \
rust-log=$RUST_LOG \
"
solana_bench_tps=/snap/bin/solana.bench-tps solana_bench_tps=/snap/bin/solana.bench-tps
;; ;;
local) local)
@ -32,20 +38,19 @@ local)
export USE_INSTALL=1 export USE_INSTALL=1
export RUST_LOG export RUST_LOG
rsync -vPrz "$netEntrypoint:~/.cargo/bin/solana*" ~/.cargo/bin/ rsync -vPr "$leaderIp:~/.cargo/bin/solana*" ~/.cargo/bin/
solana_bench_tps=multinode-demo/client.sh solana_bench_tps="multinode-demo/client.sh $leaderIp:~/solana"
netEntrypoint="$:~/solana"
;; ;;
*) *)
echo "Unknown deployment method: $deployMethod" echo "Unknown deployment method: $deployMethod"
exit 1 exit 1
esac esac
./scripts/oom-monitor.sh > oom-monitor.log 2>&1 & scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
while true; do while true; do
echo "=== Client start: $(date)" >> client.log echo "=== Client start: $(date)" >> client.log
clientCommand="$solana_bench_tps $netEntrypoint $numNodes --loop -s 600 --sustained -t threadCount" clientCommand="$solana_bench_tps --num-nodes $numNodes --loop -s 600 --sustained -t threadCount"
echo "$ $clientCommand" >> client.log echo "$ $clientCommand" >> client.log
$clientCommand >> client.log 2>&1 $clientCommand >> client.log 2>&1

View File

@ -1,37 +1,51 @@
#!/bin/bash -e #!/bin/bash -e
cd "$(dirname "$0")"/../..
deployMethod="$1" deployMethod="$1"
nodeType="$2" nodeType="$2"
netEntrypoint="$3" leaderIp="$3"
setupArgs="$4" numNodes="$4"
RUST_LOG="$5" setupArgs="$5"
RUST_LOG="$6"
cat > deployConfig <<EOF
deployMethod="$deployMethod"
leaderIp="$leaderIp"
numNodes="$numNodes"
EOF
[[ -n $deployMethod ]] || exit [[ -n $deployMethod ]] || exit
[[ -n $nodeType ]] || exit [[ -n $nodeType ]] || exit
[[ -n $netEntrypoint ]] || exit [[ -n $leaderIp ]] || exit
cd "$(dirname "$0")"/../..
source net/common.sh source net/common.sh
loadConfigFile loadConfigFile
./script/install-earlyoom.sh scripts/install-earlyoom.sh
case $deployMethod in case $deployMethod in
snap) snap)
SECONDS=0 SECONDS=0
rsync -vPr "$leaderIp:~/solana/solana.snap" .
sudo snap install solana.snap --devmode --dangerous sudo snap install solana.snap --devmode --dangerous
rm solana.snap
commonNodeConfig="\ commonNodeConfig="\
rust-log=$RUST_LOG \ leader-ip=$leaderIp \
metrics-config=$SOLANA_METRICS_CONFIG \ metrics-config=$SOLANA_METRICS_CONFIG \
rust-log=$RUST_LOG \
setup-args=$setupArgs \ setup-args=$setupArgs \
enable-cuda=1 \
" "
if [[ -e /dev/nvidia0 ]]; then
commonNodeConfig="$commonNodeConfig enable-cuda=1"
fi
if [[ $nodeType = leader ]]; then if [[ $nodeType = leader ]]; then
nodeConfig="mode=leader+drone $commonNodeConfig" nodeConfig="mode=leader+drone $commonNodeConfig"
else else
nodeConfig="mode=validator leader-address=$netEntrypoint $commonNodeConfig" nodeConfig="mode=validator $commonNodeConfig"
fi fi
logmarker="solana deploy $(date)/$RANDOM" logmarker="solana deploy $(date)/$RANDOM"
@ -50,11 +64,13 @@ snap)
local) local)
PATH="$HOME"/.cargo/bin:"$PATH" PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1 export USE_INSTALL=1
export SOLANA_CUDA=1 export RUST_LOG
export RUST_LOG=1 if [[ -e /dev/nvidia0 ]]; then
export SOLANA_CUDA=1
fi
./fetch-perf-libs.sh ./fetch-perf-libs.sh
./scripts/oom-monitor.sh > oom-monitor.log 2>&1 & scripts/oom-monitor.sh > oom-monitor.log 2>&1 &
case $nodeType in case $nodeType in
leader) leader)
@ -64,11 +80,11 @@ local)
./multinode-demo/leader.sh > leader.log 2>&1 & ./multinode-demo/leader.sh > leader.log 2>&1 &
;; ;;
validator) validator)
rsync -vPrz "$netEntrypoint:~/.cargo/bin/solana*" ~/.cargo/bin/ rsync -vPr "$leaderIp:~/.cargo/bin/solana*" ~/.cargo/bin/
# shellcheck disable=SC2086 # Don't want to double quote "$setupArgs" # shellcheck disable=SC2086 # Don't want to double quote "$setupArgs"
./multinode-demo/setup.sh -t validator -p $setupArgs ./multinode-demo/setup.sh -t validator -p $setupArgs
./multinode-demo/validator.sh "$netEntrypoint":~/solana "$netEntrypoint" >validator.log 2>&1 & ./multinode-demo/validator.sh "$leaderIp":~/solana "$leaderIp" >validator.log 2>&1 &
;; ;;
*) *)
echo "Error: unknown node type: $nodeType" echo "Error: unknown node type: $nodeType"
@ -80,4 +96,3 @@ local)
echo "Unknown deployment method: $deployMethod" echo "Unknown deployment method: $deployMethod"
exit 1 exit 1
esac esac

View File

@ -1,14 +1,25 @@
#!/bin/bash -e #!/bin/bash -e
deployMethod="$1" cd "$(dirname "$0")"/../..
netEntrypoint="$2"
numNodes="$3"
[[ -n $deployMethod ]] || exit deployMethod=
[[ -n $netEntrypoint ]] || exit leaderIp=
[[ -n $numNodes ]] || exit numNodes=
# shellcheck source=/dev/null # deployConfig is written by remote_sanity.sh
source deployConfig
shift 3 [[ -n $deployMethod ]] || {
echo "deployMethod empty"
exit 1
}
[[ -n $leaderIp ]] || {
echo "leaderIp empty"
exit 1
}
[[ -n $numNodes ]] || {
echo "numNodes empty"
exit 1
}
ledgerVerify=true ledgerVerify=true
validatorSanity=true validatorSanity=true
@ -29,26 +40,25 @@ while [[ $1 = "-o" ]]; do
esac esac
done done
cd "$(dirname "$0")"/../..
source net/common.sh source net/common.sh
loadConfigFile loadConfigFile
case $deployMethod in case $deployMethod in
snap) snap)
PATH="/snap/bin:$PATH"
export USE_SNAP=1 export USE_SNAP=1
solana_bench_tps=/snap/bin/solana.bench-tps
solana_ledger_tool=/snap/bin/solana.ledger-tool solana_bench_tps=solana.bench-tps
solana_ledger_tool=solana.ledger-tool
ledger=/var/snap/solana/current/config/ledger ledger=/var/snap/solana/current/config/ledger
;; ;;
local) local)
PATH="$HOME"/.cargo/bin:"$PATH" PATH="$HOME"/.cargo/bin:"$PATH"
export USE_INSTALL=1 export USE_INSTALL=1
solana_bench_tps=multinode-demo/client.sh solana_bench_tps="multinode-demo/client.sh $leaderIp:~/solana"
solana_ledger_tool=solana-ledger-tool solana_ledger_tool=solana-ledger-tool
ledger=config/ledger ledger=config/ledger
netEntrypoint="$:~/solana"
;; ;;
*) *)
echo "Unknown deployment method: $deployMethod" echo "Unknown deployment method: $deployMethod"
@ -56,19 +66,19 @@ local)
esac esac
echo "--- $netEntrypoint: wallet sanity" echo "--- $leaderIp: wallet sanity"
( (
set -x set -x
multinode-demo/test/wallet-sanity.sh "$netEntrypoint" multinode-demo/test/wallet-sanity.sh "$leaderIp"
) )
echo "--- $netEntrypoint: node count" echo "--- $leaderIp: node count"
( (
set -x set -x
$solana_bench_tps "$netEntrypoint" "$numNodes" -c $solana_bench_tps --num-nodes "$numNodes" --converge-only
) )
echo "--- $netEntrypoint: verify ledger" echo "--- $leaderIp: verify ledger"
if $ledgerVerify; then if $ledgerVerify; then
if [[ -d $ledger ]]; then if [[ -d $ledger ]]; then
( (
@ -87,12 +97,12 @@ else
fi fi
echo "--- $netEntrypoint: validator sanity" echo "--- $leaderIp: validator sanity"
if $validatorSanity; then if $validatorSanity; then
( (
./multinode-demo/setup.sh -t validator ./multinode-demo/setup.sh -t validator
set -e pipefail set -e pipefail
timeout 10s ./multinode-demo/validator.sh "$netEntrypoint" 2>&1 | tee validator.log timeout 10s ./multinode-demo/validator.sh "$leaderIp" 2>&1 | tee validator.log
) )
wc -l validator.log wc -l validator.log
if grep -C100 panic validator.log; then if grep -C100 panic validator.log; then

View File

@ -9,3 +9,12 @@ systemctl disable apt-daily.service # disable run when system boot
systemctl disable apt-daily.timer # disable timer run systemctl disable apt-daily.timer # disable timer run
apt-get --assume-yes install rsync libssl-dev apt-get --assume-yes install rsync libssl-dev
cat > /etc/rsyncd.conf <<-EOF
[config]
path = /var/snap/solana/current/config
hosts allow = *
read only = true
EOF
systemctl enable rsync
systemctl start rsync