Partner node setup tweaks (#5715)

automerge
This commit is contained in:
Trent Nelson 2019-09-03 08:45:20 -06:00 committed by Grimes
parent 5840e3bbdf
commit a82754913f
6 changed files with 89 additions and 22 deletions

View File

@ -6,6 +6,20 @@ nodes. They have been tested as working from a clean installation of Ubuntu
# Installation # Installation
Both installation methods require that the NVIDIA proprietary driver installer
programs be downloaded alongside [setup-cuda.sh](./setup-cuda.sh). If they do
not exist at runtime, an attempt will be made to download them automatically. To
avoid downloading the installers at runtime, they may be downloaded in advance
and placed as siblings to [setup-cuda.sh](./setup-cuda.sh).
For up-to-date NVIDIA driver version requirements, see [setup-cuda.sh](./setup-cuda.sh)
## Datacenter Node
1) `sudo ./setup-dc-node-1.sh` 1) `sudo ./setup-dc-node-1.sh`
2) `sudo reboot` 2) `sudo reboot`
3) `sudo ./setup-dc-node-2.sh` 3) `sudo ./setup-dc-node-2.sh`
## Partner Node
1) `$ sudo ./setup-partner-node.sh`

View File

@ -1,5 +1,10 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# https://developer.nvidia.com/cuda-toolkit-archive
VERSIONS=()
VERSIONS+=("https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux")
VERSIONS+=("https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.168_418.67_linux.run")
HERE="$(dirname "$0")" HERE="$(dirname "$0")"
# shellcheck source=net/datacenter-node-install/utils.sh # shellcheck source=net/datacenter-node-install/utils.sh
@ -9,8 +14,37 @@ ensure_env || exit 1
set -xe set -xe
RUN_FILES=()
FAILED=()
for i in "${!VERSIONS[@]}"; do
URL=${VERSIONS[$i]}
RUN_FILE="$(basename "$URL")"
DEST="${HERE}/${RUN_FILE}"
if [[ -f "$DEST" ]]; then
RUN_FILES+=( "$DEST" )
else
echo -ne "Downloading ${RUN_FILE}:\t"
if wget --read-timeout=180 --tries=3 -O "$DEST" "$URL"; then
echo "OK"
RUN_FILES+=( "$DEST" )
else
echo "FAILED. Retrying..."
FAILED+=( "$URL" )
fi
fi
done
if [[ 0 -ne ${#FAILED[@]} ]]; then
for f in "${FAILED[@]}"; do
echo "Failed to download required resource: $f"
done
echo "Please manually download the above resources, save them to \"${HERE}\" and rerun $0"
exit 1
fi
apt update apt update
apt install -y gcc make dkms apt install -y gcc make dkms
sh cuda_10.0.130_410.48_linux.run --silent --driver --toolkit for rf in "${RUN_FILES[@]}"; do
sh cuda_10.1.168_418.67_linux.run --silent --driver --toolkit sh "$rf" --silent --driver --toolkit
done

View File

@ -7,8 +7,6 @@ source "$HERE"/utils.sh
ensure_env || exit 1 ensure_env || exit 1
exit
if [[ -n "$1" ]]; then if [[ -n "$1" ]]; then
PUBKEY_FILE="$1" PUBKEY_FILE="$1"
else else
@ -49,21 +47,7 @@ chown "$SETUP_USER:$SETUP_USER" "${BASE_SSH_DIR}/.ssh/authorized_keys"
"$HERE"/../scripts/install-redis.sh "$HERE"/../scripts/install-redis.sh
"$HERE"/../scripts/install-rsync.sh "$HERE"/../scripts/install-rsync.sh
"$HERE"/../scripts/install-libssl-compatability.sh "$HERE"/../scripts/install-libssl-compatability.sh
"$HERE"/setup-procfs-knobs.sh
# Setup kernel constants "$HERE"/setup-limits.sh
cat > /etc/sysctl.d/20-solana-node.conf <<EOF
# Solana networking requirements
net.core.rmem_default=1610612736
net.core.rmem_max=1610612736
net.core.wmem_default=1610612736
net.core.wmem_max=1610612736
# Solana earlyoom setup
kernel.sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
EOF
# Allow more files to be opened by a user
sed -i 's/^\(# End of file\)/* soft nofile 65535\n\n\1/' /etc/security/limits.conf
echo "Please reboot then run setup-dc-node-2.sh" echo "Please reboot then run setup-dc-node-2.sh"

View File

@ -0,0 +1,12 @@
#!/usr/bin/env bash
HERE="$(dirname "$0")"
# shellcheck source=net/datacenter-node-install/utils.sh
source "$HERE"/utils.sh
ensure_env || exit 1
# Allow more files to be opened by a user
sed -i 's/^\(# End of file\)/* soft nofile 65535\n\n\1/' /etc/security/limits.conf

View File

@ -9,11 +9,12 @@ ensure_env || exit 1
set -xe set -xe
"$HERE"/disable-nouveau.sh
"$HERE"/disable-networkd-wait.sh "$HERE"/disable-networkd-wait.sh
"$HERE"/setup-grub.sh "$HERE"/setup-grub.sh
"$HERE"/setup-cuda.sh "$HERE"/setup-cuda.sh
"$HERE"/setup-procfs-knobs.sh
"$HERE"/setup-limits.sh
PASSWORD="$(dd if=/dev/urandom bs=1 count=9 status=none | base64)" PASSWORD="$(dd if=/dev/urandom bs=1 count=9 status=none | base64)"
echo "$PASSWORD" echo "$PASSWORD"

View File

@ -0,0 +1,22 @@
#!/usr/bin/env bash
HERE="$(dirname "$0")"
# shellcheck source=net/datacenter-node-install/utils.sh
source "$HERE"/utils.sh
ensure_env || exit 1
# Setup kernel constants
cat > /etc/sysctl.d/20-solana-node.conf <<EOF
# Solana networking requirements
net.core.rmem_default=1610612736
net.core.rmem_max=1610612736
net.core.wmem_default=1610612736
net.core.wmem_max=1610612736
# Solana earlyoom setup
kernel.sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
EOF