diff --git a/net/datacenter-node-install/README.md b/net/datacenter-node-install/README.md index 78ac41a43d..1d204ddf89 100644 --- a/net/datacenter-node-install/README.md +++ b/net/datacenter-node-install/README.md @@ -6,6 +6,20 @@ nodes. They have been tested as working from a clean installation of Ubuntu # Installation +Both installation methods require that the NVIDIA proprietary driver installer +programs be downloaded alongside [setup-cuda.sh](./setup-cuda.sh). If they do +not exist at runtime, an attempt will be made to download them automatically. To +avoid downloading the installers at runtime, they may be downloaded in advance +and placed as siblings to [setup-cuda.sh](./setup-cuda.sh). + +For up-to-date NVIDIA driver version requirements, see [setup-cuda.sh](./setup-cuda.sh) + +## Datacenter Node + 1) `sudo ./setup-dc-node-1.sh` 2) `sudo reboot` 3) `sudo ./setup-dc-node-2.sh` + +## Partner Node + +1) `$ sudo ./setup-partner-node.sh` diff --git a/net/datacenter-node-install/setup-cuda.sh b/net/datacenter-node-install/setup-cuda.sh index 76d2d6a9b9..f2fbbfb967 100644 --- a/net/datacenter-node-install/setup-cuda.sh +++ b/net/datacenter-node-install/setup-cuda.sh @@ -1,5 +1,10 @@ #!/usr/bin/env bash +# https://developer.nvidia.com/cuda-toolkit-archive +VERSIONS=() +VERSIONS+=("https://developer.nvidia.com/compute/cuda/10.0/Prod/local_installers/cuda_10.0.130_410.48_linux") +VERSIONS+=("https://developer.nvidia.com/compute/cuda/10.1/Prod/local_installers/cuda_10.1.168_418.67_linux.run") + HERE="$(dirname "$0")" # shellcheck source=net/datacenter-node-install/utils.sh @@ -9,8 +14,37 @@ ensure_env || exit 1 set -xe +RUN_FILES=() +FAILED=() +for i in "${!VERSIONS[@]}"; do + URL=${VERSIONS[$i]} + RUN_FILE="$(basename "$URL")" + DEST="${HERE}/${RUN_FILE}" + if [[ -f "$DEST" ]]; then + RUN_FILES+=( "$DEST" ) + else + echo -ne "Downloading ${RUN_FILE}:\t" + if wget --read-timeout=180 --tries=3 -O "$DEST" "$URL"; then + echo "OK" + RUN_FILES+=( "$DEST" ) + else + echo "FAILED. Retrying..." + FAILED+=( "$URL" ) + fi + fi +done + +if [[ 0 -ne ${#FAILED[@]} ]]; then + for f in "${FAILED[@]}"; do + echo "Failed to download required resource: $f" + done + echo "Please manually download the above resources, save them to \"${HERE}\" and rerun $0" + exit 1 +fi + apt update apt install -y gcc make dkms -sh cuda_10.0.130_410.48_linux.run --silent --driver --toolkit -sh cuda_10.1.168_418.67_linux.run --silent --driver --toolkit +for rf in "${RUN_FILES[@]}"; do + sh "$rf" --silent --driver --toolkit +done diff --git a/net/datacenter-node-install/setup-dc-node-1.sh b/net/datacenter-node-install/setup-dc-node-1.sh index be43a588d8..7dc91ca6b3 100644 --- a/net/datacenter-node-install/setup-dc-node-1.sh +++ b/net/datacenter-node-install/setup-dc-node-1.sh @@ -7,8 +7,6 @@ source "$HERE"/utils.sh ensure_env || exit 1 -exit - if [[ -n "$1" ]]; then PUBKEY_FILE="$1" else @@ -49,21 +47,7 @@ chown "$SETUP_USER:$SETUP_USER" "${BASE_SSH_DIR}/.ssh/authorized_keys" "$HERE"/../scripts/install-redis.sh "$HERE"/../scripts/install-rsync.sh "$HERE"/../scripts/install-libssl-compatability.sh - -# Setup kernel constants -cat > /etc/sysctl.d/20-solana-node.conf < /etc/sysctl.d/20-solana-node.conf <