Partner node setup tweaks (#5715)

automerge
This commit is contained in:
Trent Nelson 2019-09-03 08:45:20 -06:00 committed by Grimes
parent 5840e3bbdf
commit a82754913f
6 changed files with 89 additions and 22 deletions

View File

@ -6,6 +6,20 @@ nodes. They have been tested as working from a clean installation of Ubuntu
# Installation
Both installation methods require that the NVIDIA proprietary driver installer
programs be downloaded alongside [setup-cuda.sh](./setup-cuda.sh). If they do
not exist at runtime, an attempt will be made to download them automatically. To
avoid downloading the installers at runtime, they may be downloaded in advance
and placed as siblings to [setup-cuda.sh](./setup-cuda.sh).
For up-to-date NVIDIA driver version requirements, see [setup-cuda.sh](./setup-cuda.sh)
## Datacenter Node
1) `sudo ./setup-dc-node-1.sh`
2) `sudo reboot`
3) `sudo ./setup-dc-node-2.sh`
## Partner Node
1) `$ sudo ./setup-partner-node.sh`

View File

@ -1,5 +1,10 @@
#!/usr/bin/env bash
# https://developer.nvidia.com/cuda-toolkit-archive
VERSIONS=()
VERSIONS+=("https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux")
VERSIONS+=("https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.168_418.67_linux.run")
HERE="$(dirname "$0")"
# shellcheck source=net/datacenter-node-install/utils.sh
@ -9,8 +14,37 @@ ensure_env || exit 1
set -xe
RUN_FILES=()
FAILED=()
for i in "${!VERSIONS[@]}"; do
URL=${VERSIONS[$i]}
RUN_FILE="$(basename "$URL")"
DEST="${HERE}/${RUN_FILE}"
if [[ -f "$DEST" ]]; then
RUN_FILES+=( "$DEST" )
else
echo -ne "Downloading ${RUN_FILE}:\t"
if wget --read-timeout=180 --tries=3 -O "$DEST" "$URL"; then
echo "OK"
RUN_FILES+=( "$DEST" )
else
echo "FAILED. Retrying..."
FAILED+=( "$URL" )
fi
fi
done
if [[ 0 -ne ${#FAILED[@]} ]]; then
for f in "${FAILED[@]}"; do
echo "Failed to download required resource: $f"
done
echo "Please manually download the above resources, save them to \"${HERE}\" and rerun $0"
exit 1
fi
apt update
apt install -y gcc make dkms
sh cuda_10.0.130_410.48_linux.run --silent --driver --toolkit
sh cuda_10.1.168_418.67_linux.run --silent --driver --toolkit
for rf in "${RUN_FILES[@]}"; do
sh "$rf" --silent --driver --toolkit
done

View File

@ -7,8 +7,6 @@ source "$HERE"/utils.sh
ensure_env || exit 1
exit
if [[ -n "$1" ]]; then
PUBKEY_FILE="$1"
else
@ -49,21 +47,7 @@ chown "$SETUP_USER:$SETUP_USER" "${BASE_SSH_DIR}/.ssh/authorized_keys"
"$HERE"/../scripts/install-redis.sh
"$HERE"/../scripts/install-rsync.sh
"$HERE"/../scripts/install-libssl-compatability.sh
# Setup kernel constants
cat > /etc/sysctl.d/20-solana-node.conf <<EOF
# Solana networking requirements
net.core.rmem_default=1610612736
net.core.rmem_max=1610612736
net.core.wmem_default=1610612736
net.core.wmem_max=1610612736
# Solana earlyoom setup
kernel.sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
EOF
# Allow more files to be opened by a user
sed -i 's/^\(# End of file\)/* soft nofile 65535\n\n\1/' /etc/security/limits.conf
"$HERE"/setup-procfs-knobs.sh
"$HERE"/setup-limits.sh
echo "Please reboot then run setup-dc-node-2.sh"

View File

@ -0,0 +1,12 @@
#!/usr/bin/env bash
HERE="$(dirname "$0")"
# shellcheck source=net/datacenter-node-install/utils.sh
source "$HERE"/utils.sh
ensure_env || exit 1
# Allow more files to be opened by a user
sed -i 's/^\(# End of file\)/* soft nofile 65535\n\n\1/' /etc/security/limits.conf

View File

@ -9,11 +9,12 @@ ensure_env || exit 1
set -xe
"$HERE"/disable-nouveau.sh
"$HERE"/disable-networkd-wait.sh
"$HERE"/setup-grub.sh
"$HERE"/setup-cuda.sh
"$HERE"/setup-procfs-knobs.sh
"$HERE"/setup-limits.sh
PASSWORD="$(dd if=/dev/urandom bs=1 count=9 status=none | base64)"
echo "$PASSWORD"

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
HERE="$(dirname "$0")"
# shellcheck source=net/datacenter-node-install/utils.sh
source "$HERE"/utils.sh
ensure_env || exit 1
# Setup kernel constants
cat > /etc/sysctl.d/20-solana-node.conf <<EOF
# Solana networking requirements
net.core.rmem_default=1610612736
net.core.rmem_max=1610612736
net.core.wmem_default=1610612736
net.core.wmem_max=1610612736
# Solana earlyoom setup
kernel.sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
EOF