parent
5840e3bbdf
commit
a82754913f
|
@ -6,6 +6,20 @@ nodes. They have been tested as working from a clean installation of Ubuntu
|
|||
|
||||
# Installation
|
||||
|
||||
Both installation methods require that the NVIDIA proprietary driver installer
|
||||
programs be downloaded alongside [setup-cuda.sh](./setup-cuda.sh). If they do
|
||||
not exist at runtime, an attempt will be made to download them automatically. To
|
||||
avoid downloading the installers at runtime, they may be downloaded in advance
|
||||
and placed as siblings to [setup-cuda.sh](./setup-cuda.sh).
|
||||
|
||||
For up-to-date NVIDIA driver version requirements, see [setup-cuda.sh](./setup-cuda.sh)
|
||||
|
||||
## Datacenter Node
|
||||
|
||||
1) `sudo ./setup-dc-node-1.sh`
|
||||
2) `sudo reboot`
|
||||
3) `sudo ./setup-dc-node-2.sh`
|
||||
|
||||
## Partner Node
|
||||
|
||||
1) `$ sudo ./setup-partner-node.sh`
|
||||
|
|
|
@ -1,5 +1,10 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# https://developer.nvidia.com/cuda-toolkit-archive
|
||||
VERSIONS=()
|
||||
VERSIONS+=("https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux")
|
||||
VERSIONS+=("https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.168_418.67_linux.run")
|
||||
|
||||
HERE="$(dirname "$0")"
|
||||
|
||||
# shellcheck source=net/datacenter-node-install/utils.sh
|
||||
|
@ -9,8 +14,37 @@ ensure_env || exit 1
|
|||
|
||||
set -xe
|
||||
|
||||
RUN_FILES=()
|
||||
FAILED=()
|
||||
for i in "${!VERSIONS[@]}"; do
|
||||
URL=${VERSIONS[$i]}
|
||||
RUN_FILE="$(basename "$URL")"
|
||||
DEST="${HERE}/${RUN_FILE}"
|
||||
if [[ -f "$DEST" ]]; then
|
||||
RUN_FILES+=( "$DEST" )
|
||||
else
|
||||
echo -ne "Downloading ${RUN_FILE}:\t"
|
||||
if wget --read-timeout=180 --tries=3 -O "$DEST" "$URL"; then
|
||||
echo "OK"
|
||||
RUN_FILES+=( "$DEST" )
|
||||
else
|
||||
echo "FAILED. Retrying..."
|
||||
FAILED+=( "$URL" )
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ 0 -ne ${#FAILED[@]} ]]; then
|
||||
for f in "${FAILED[@]}"; do
|
||||
echo "Failed to download required resource: $f"
|
||||
done
|
||||
echo "Please manually download the above resources, save them to \"${HERE}\" and rerun $0"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
apt update
|
||||
apt install -y gcc make dkms
|
||||
|
||||
sh cuda_10.0.130_410.48_linux.run --silent --driver --toolkit
|
||||
sh cuda_10.1.168_418.67_linux.run --silent --driver --toolkit
|
||||
for rf in "${RUN_FILES[@]}"; do
|
||||
sh "$rf" --silent --driver --toolkit
|
||||
done
|
||||
|
|
|
@ -7,8 +7,6 @@ source "$HERE"/utils.sh
|
|||
|
||||
ensure_env || exit 1
|
||||
|
||||
exit
|
||||
|
||||
if [[ -n "$1" ]]; then
|
||||
PUBKEY_FILE="$1"
|
||||
else
|
||||
|
@ -49,21 +47,7 @@ chown "$SETUP_USER:$SETUP_USER" "${BASE_SSH_DIR}/.ssh/authorized_keys"
|
|||
"$HERE"/../scripts/install-redis.sh
|
||||
"$HERE"/../scripts/install-rsync.sh
|
||||
"$HERE"/../scripts/install-libssl-compatability.sh
|
||||
|
||||
# Setup kernel constants
|
||||
cat > /etc/sysctl.d/20-solana-node.conf <<EOF
|
||||
|
||||
# Solana networking requirements
|
||||
net.core.rmem_default=1610612736
|
||||
net.core.rmem_max=1610612736
|
||||
net.core.wmem_default=1610612736
|
||||
net.core.wmem_max=1610612736
|
||||
|
||||
# Solana earlyoom setup
|
||||
kernel.sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
|
||||
EOF
|
||||
|
||||
# Allow more files to be opened by a user
|
||||
sed -i 's/^\(# End of file\)/* soft nofile 65535\n\n\1/' /etc/security/limits.conf
|
||||
"$HERE"/setup-procfs-knobs.sh
|
||||
"$HERE"/setup-limits.sh
|
||||
|
||||
echo "Please reboot then run setup-dc-node-2.sh"
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
HERE="$(dirname "$0")"
|
||||
|
||||
# shellcheck source=net/datacenter-node-install/utils.sh
|
||||
source "$HERE"/utils.sh
|
||||
|
||||
ensure_env || exit 1
|
||||
|
||||
# Allow more files to be opened by a user
|
||||
sed -i 's/^\(# End of file\)/* soft nofile 65535\n\n\1/' /etc/security/limits.conf
|
||||
|
|
@ -9,11 +9,12 @@ ensure_env || exit 1
|
|||
|
||||
set -xe
|
||||
|
||||
"$HERE"/disable-nouveau.sh
|
||||
"$HERE"/disable-networkd-wait.sh
|
||||
|
||||
"$HERE"/setup-grub.sh
|
||||
|
||||
"$HERE"/setup-cuda.sh
|
||||
"$HERE"/setup-procfs-knobs.sh
|
||||
"$HERE"/setup-limits.sh
|
||||
|
||||
PASSWORD="$(dd if=/dev/urandom bs=1 count=9 status=none | base64)"
|
||||
echo "$PASSWORD"
|
|
@ -0,0 +1,22 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
HERE="$(dirname "$0")"
|
||||
|
||||
# shellcheck source=net/datacenter-node-install/utils.sh
|
||||
source "$HERE"/utils.sh
|
||||
|
||||
ensure_env || exit 1
|
||||
|
||||
# Setup kernel constants
|
||||
cat > /etc/sysctl.d/20-solana-node.conf <<EOF
|
||||
|
||||
# Solana networking requirements
|
||||
net.core.rmem_default=1610612736
|
||||
net.core.rmem_max=1610612736
|
||||
net.core.wmem_default=1610612736
|
||||
net.core.wmem_max=1610612736
|
||||
|
||||
# Solana earlyoom setup
|
||||
kernel.sysrq=$(( $(cat /proc/sys/kernel/sysrq) | 64 ))
|
||||
EOF
|
||||
|
Loading…
Reference in New Issue