From b4da83a3ab516ebfdfffe26b82f19f9972bdf6b3 Mon Sep 17 00:00:00 2001 From: Michael Vines Date: Thu, 26 Sep 2019 13:36:51 -0700 Subject: [PATCH] Remove CUDA feature (#6094) --- Cargo.lock | 33 +++- Cargo.toml | 62 ------- .../running-validator/validator-software.md | 10 - book/src/running-validator/validator-start.md | 4 +- ci/publish-tarball.sh | 50 +---- ci/test-checks.sh | 2 +- ci/test-stable.sh | 19 +- ci/testnet-automation.sh | 4 +- core/Cargo.toml | 3 +- core/build.rs | 50 ----- core/src/chacha_cuda.rs | 33 ++-- core/src/cuda_runtime.rs | 61 ++++--- core/src/entry.rs | 26 +-- core/src/lib.rs | 5 +- core/src/perf_libs.rs | 171 ++++++++++++++++++ core/src/sigverify.rs | 104 ++--------- core/src/sigverify_stage.rs | 16 +- core/src/storage_stage.rs | 36 ++-- core/src/validator.rs | 9 +- fetch-perf-libs.sh | 85 ++------- local_cluster/Cargo.toml | 4 - multinode-demo/common.sh | 7 +- net/README.md | 6 - net/net.sh | 13 +- net/remote/remote-client.sh | 7 +- net/remote/remote-node.sh | 5 +- net/remote/remote-sanity.sh | 5 - scripts/cargo-install-all.sh | 24 +-- scripts/coverage.sh | 2 +- validator-cuda/.gitignore | 2 - validator-cuda/Cargo.toml | 14 -- validator-cuda/src/main.rs | 3 - validator/src/lib.rs | 12 +- 33 files changed, 375 insertions(+), 512 deletions(-) delete mode 100644 core/build.rs create mode 100644 core/src/perf_libs.rs delete mode 100644 validator-cuda/.gitignore delete mode 100644 validator-cuda/Cargo.toml delete mode 100644 validator-cuda/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 996343d22..d90c5b4e0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -844,6 +844,27 @@ dependencies = [ "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "dlopen" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "dlopen_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "dlopen_derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.42 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "docopt" version = "1.1.0" @@ -3245,6 +3266,8 @@ dependencies = [ "crc 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "dir-diff 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "dlopen 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", + "dlopen_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "hex-literal 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "indexmap 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3890,14 +3913,6 @@ dependencies = [ "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "solana-validator-cuda" -version = "0.20.0" -dependencies = [ - "solana-core 0.20.0", - "solana-validator 0.20.0", -] - [[package]] name = "solana-vote-api" version = "0.20.0" @@ -5436,6 +5451,8 @@ dependencies = [ "checksum dirs 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3fd78930633bd1c6e35c4b42b1df7b0cbc6bc191146e512bb3bedf243fcc3901" "checksum dirs 2.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3" "checksum dirs-sys 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "afa0b23de8fd801745c471deffa6e12d248f962c9fd4b4c33787b055599bde7b" +"checksum dlopen 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "71e80ad39f814a9abe68583cd50a2d45c8a67561c3361ab8da240587dda80937" +"checksum dlopen_derive 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "f236d9e1b1fbd81cea0f9cbdc8dcc7e8ebcd80e6659cd7cb2ad5f6c05946c581" "checksum docopt 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f525a586d310c87df72ebcd98009e57f1cc030c8c268305287a476beb653969" "checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e" "checksum ed25519-dalek 1.0.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)" = "81956bcf7ef761fb4e1d88de3fa181358a0d26cbcb9755b587a08f9119824b86" diff --git a/Cargo.toml b/Cargo.toml index 1e5b36f53..d7efbbc03 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,65 +1,4 @@ [workspace] -# The members list excluding the `validator-cuda` crate -default-members = [ - "bench-exchange", - "bench-streamer", - "bench-tps", - "banking_bench", - "chacha-sys", - "client", - "core", - "drone", - "validator", - "genesis", - "genesis_programs", - "gossip", - "install", - "keygen", - "ledger-tool", - "local_cluster", - "logger", - "merkle-tree", - "measure", - "metrics", - "programs/bpf_loader_api", - "programs/bpf_loader_program", - "programs/budget_api", - "programs/budget_program", - "programs/btc_spv_program", - "programs/btc_spv_api", - "programs/btc_spv_bin", - "programs/config_api", - "programs/config_program", - "programs/config_tests", - "programs/exchange_api", - "programs/exchange_program", - "programs/failure_program", - "programs/move_loader_api", - "programs/move_loader_program", - "programs/librapay_api", - "programs/noop_program", - "programs/stake_api", - "programs/stake_program", - "programs/stake_tests", - "programs/storage_api", - "programs/storage_program", - "programs/token_api", - "programs/token_program", - "programs/vote_api", - "programs/vote_program", - "replicator", - "runtime", - "sdk", - "sdk-c", - "upload-perf", - "netutil", - "fixed-buf", - "vote-signer", - "cli", - "rayon-threadlimit", -] - -# The default-members list and the `validator-cuda` crate members = [ "bench-exchange", "bench-streamer", @@ -117,7 +56,6 @@ members = [ "vote-signer", "cli", "rayon-threadlimit", - "validator-cuda", ] exclude = [ diff --git a/book/src/running-validator/validator-software.md b/book/src/running-validator/validator-software.md index b4ba4ea99..7525c7049 100644 --- a/book/src/running-validator/validator-software.md +++ b/book/src/running-validator/validator-software.md @@ -48,13 +48,3 @@ If you are unable to use the prebuilt binaries or prefer to build it yourself fr $ ./scripts/cargo-install-all.sh . $ export PATH=$PWD/bin:$PATH ``` - -If building for CUDA \(Linux only\), fetch the perf-libs first then include the `cuda` feature flag when building: - -```bash -$ ./fetch-perf-libs.sh -$ source target/perf-libs/env.sh -$ ./scripts/cargo-install-all.sh . cuda -$ export PATH=$PWD/bin:$PATH -``` - diff --git a/book/src/running-validator/validator-start.md b/book/src/running-validator/validator-start.md index 2bebd8012..81b566354 100644 --- a/book/src/running-validator/validator-start.md +++ b/book/src/running-validator/validator-start.md @@ -93,9 +93,9 @@ $ NDEBUG=1 USE_INSTALL=1 ./multinode-demo/validator.sh --identity ~/validator-ke ### Enabling CUDA -If your machine has a GPU with CUDA installed \(Linux-only currently\), use the `solana-validator-cuda` executable instead of `solana-validator`. +If your machine has a GPU with CUDA installed \(Linux-only currently\), include the `--cuda` argument to `solana-validator`. -Or if you built from source, define the SOLANA\_CUDA flag in your environment _before_ running any of the previusly mentioned commands +Or if you built from source, define the SOLANA\_CUDA flag in your environment _before_ running any of the previously mentioned commands ```bash $ export SOLANA_CUDA=1 diff --git a/ci/publish-tarball.sh b/ci/publish-tarball.sh index e456c5adf..f978181b0 100755 --- a/ci/publish-tarball.sh +++ b/ci/publish-tarball.sh @@ -37,14 +37,12 @@ if [[ -z $CHANNEL_OR_TAG ]]; then exit 1 fi -maybeCUDA= case "$CI_OS_NAME" in osx) TARGET=x86_64-apple-darwin ;; linux) TARGET=x86_64-unknown-linux-gnu - maybeCUDA=cuda ;; windows) TARGET=x86_64-pc-windows-msvc @@ -70,55 +68,17 @@ echo --- Creating tarball ) > solana-release/version.yml source ci/rust-version.sh stable - scripts/cargo-install-all.sh +"$rust_stable" solana-release $maybeCUDA + scripts/cargo-install-all.sh +"$rust_stable" solana-release # Reduce the Windows archive size until # https://github.com/appveyor/ci/issues/2997 is fixed if [[ -n $APPVEYOR ]]; then - rm -f solana-release/bin/solana-validator.exe solana-release/bin/solana-bench-exchange.exe + rm -f \ + solana-release/bin/solana-validator.exe \ + solana-release/bin/solana-bench-exchange.exe \ + fi - if [[ -n $maybeCUDA ]]; then - # Wrap `solana-validator-cuda` with a script that loads perf-libs - # automatically if possible - mkdir -p solana-release/target - cp -a target/perf-libs solana-release/target/perf-libs - mkdir -p solana-release/bin/_ - cp solana-release/bin/solana-validator-cuda solana-release/bin/_/solana-validator-cuda - cp -a solana-release/bin/deps solana-release/bin/_/deps - cat > solana-release/bin/solana-validator-cuda <<'EOF' -#!/usr/bin/env bash -set -e -SOLANA_ROOT="$(dirname "$0")"/.. -if [[ -f "$SOLANA_ROOT"/target/perf-libs/env.sh ]]; then - source "$SOLANA_ROOT"/target/perf-libs/env.sh -fi -if [[ -z $SOLANA_PERF_LIBS_CUDA ]]; then - echo - echo Error: SOLANA_PERF_LIBS_CUDA environment variable undefined - exit 1 -fi -exec "$SOLANA_ROOT"/bin/_/solana-validator-cuda "$@" -EOF - chmod +x solana-release/bin/solana-validator-cuda - fi - - # TODO: Remove scripts/ and multinode/... from tarball - cp -a scripts multinode-demo solana-release/ - - # Add a wrapper script for validator.sh - # TODO: Remove multinode/... from tarball - cat > solana-release/bin/validator.sh <<'EOF' -#!/usr/bin/env bash -set -e -cd "$(dirname "$0")"/.. -export USE_INSTALL=1 -export REQUIRE_LEDGER_DIR=1 -export REQUIRE_KEYPAIRS=1 -exec multinode-demo/validator.sh "$@" -EOF - chmod +x solana-release/bin/validator.sh - tar cvf solana-release-$TARGET.tar solana-release bzip2 solana-release-$TARGET.tar cp solana-release/bin/solana-install-init solana-install-init-$TARGET diff --git a/ci/test-checks.sh b/ci/test-checks.sh index 014546156..90336334e 100755 --- a/ci/test-checks.sh +++ b/ci/test-checks.sh @@ -15,7 +15,7 @@ _ cargo +"$rust_stable" fmt --all -- --check # Clippy gets stuck for unknown reasons if sdk-c is included in the build, so check it separately. # See https://github.com/solana-labs/solana/issues/5503 _ cargo +"$rust_stable" clippy --version -_ cargo +"$rust_stable" clippy --all --exclude solana-sdk-c --exclude solana-validator-cuda -- --deny=warnings +_ cargo +"$rust_stable" clippy --all --exclude solana-sdk-c -- --deny=warnings _ cargo +"$rust_stable" clippy --manifest-path sdk-c/Cargo.toml -- --deny=warnings _ cargo +"$rust_stable" audit --version diff --git a/ci/test-stable.sh b/ci/test-stable.sh index 9ab1c7438..96a0e84a3 100755 --- a/ci/test-stable.sh +++ b/ci/test-stable.sh @@ -33,7 +33,7 @@ test-stable) echo "Executing $testName" _ cargo +"$rust_stable" build --tests --bins ${V:+--verbose} - _ cargo +"$rust_stable" test --all --exclude solana-local-cluster --exclude solana-validator-cuda ${V:+--verbose} -- --nocapture + _ cargo +"$rust_stable" test --all --exclude solana-local-cluster ${V:+--verbose} -- --nocapture ;; test-stable-perf) echo "Executing $testName" @@ -61,8 +61,6 @@ test-stable-perf) --manifest-path programs/bpf/Cargo.toml \ --no-default-features --features=bpf_c,bpf_rust - # Run root package tests with these features - maybeCuda= if [[ $(uname) = Linux ]]; then # Enable persistence mode to keep the CUDA kernel driver loaded, avoiding a # lengthy and unexpected delay the first time CUDA is involved when the driver @@ -71,19 +69,20 @@ test-stable-perf) rm -rf target/perf-libs ./fetch-perf-libs.sh - # shellcheck source=/dev/null - source ./target/perf-libs/env.sh - maybeCuda=--features=cuda + + # Force CUDA for solana-core unit tests + export TEST_PERF_LIBS_CUDA=1 + + # Force CUDA in ci/localnet-sanity.sh export SOLANA_CUDA=1 fi - # Run root package library tests - _ cargo +"$rust_stable" build --tests --bins ${V:+--verbose} - _ cargo +"$rust_stable" test --all --manifest-path=core/Cargo.toml ${V:+--verbose} $maybeCuda --exclude solana-local-cluster -- --nocapture + _ cargo +"$rust_stable" build --bins ${V:+--verbose} + _ cargo +"$rust_stable" test --package solana-core --lib ${V:+--verbose} -- --nocapture ;; test-local-cluster) echo "Executing $testName" - _ cargo +"$rust_stable" build --release --tests --bins ${V:+--verbose} + _ cargo +"$rust_stable" build --release --bins ${V:+--verbose} _ cargo +"$rust_stable" test --release --package solana-local-cluster ${V:+--verbose} -- --nocapture exit 0 ;; diff --git a/ci/testnet-automation.sh b/ci/testnet-automation.sh index e1434877c..814aaa150 100755 --- a/ci/testnet-automation.sh +++ b/ci/testnet-automation.sh @@ -39,9 +39,9 @@ launchTestnet() { echo --- start "$nodeCount" node test if [[ -n $USE_PREBUILT_CHANNEL_TARBALL ]]; then - net/net.sh start -f "cuda" -o noValidatorSanity -t "$CHANNEL" + net/net.sh start -o noValidatorSanity -t "$CHANNEL" else - net/net.sh start -f "cuda" -o noValidatorSanity -T solana-release*.tar.bz2 + net/net.sh start -o noValidatorSanity -T solana-release*.tar.bz2 fi echo --- wait "$ITERATION_WAIT" seconds to complete test diff --git a/core/Cargo.toml b/core/Cargo.toml index 8ca11f7a2..a8e3a0736 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -14,7 +14,6 @@ edition = "2018" codecov = { repository = "solana-labs/solana", branch = "master", service = "github" } [features] -cuda = [] pin_gpu_memory = [] [dependencies] @@ -27,6 +26,8 @@ core_affinity = "0.5.9" crc = { version = "1.8.1", optional = true } crossbeam-channel = "0.3" dir-diff = "0.3.1" +dlopen = "0.1.8" +dlopen_derive = "0.1.4" fs_extra = "1.1.0" indexmap = "1.1" itertools = "0.8.0" diff --git a/core/build.rs b/core/build.rs deleted file mode 100644 index 9e1b51072..000000000 --- a/core/build.rs +++ /dev/null @@ -1,50 +0,0 @@ -use std::env; -use std::fs; -use std::path::Path; -use std::process::exit; - -fn main() { - println!("cargo:rerun-if-changed=build.rs"); - - if env::var("CARGO_FEATURE_CUDA").is_ok() { - if cfg!(not(target_os = "linux")) { - eprintln!("Error: CUDA feature is only available on Linux"); - exit(1); - } - println!("cargo:rustc-cfg=cuda"); - - let perf_libs_dir = { - let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); - let mut path = Path::new(&manifest_dir); - path = path.parent().unwrap(); - let mut path = path.join(Path::new("target/perf-libs")); - path.push(env::var("SOLANA_PERF_LIBS_CUDA").unwrap_or_else(|err| { - eprintln!("Error: SOLANA_PERF_LIBS_CUDA not defined: {}", err); - exit(1); - })); - path - }; - let perf_libs_dir = perf_libs_dir.to_str().unwrap(); - - // Ensure `perf_libs_dir` exists. It's been observed that - // a cargo:rerun-if-changed= directive with a non-existent - // directory triggers a rebuild on every |cargo build| invocation - fs::create_dir_all(&perf_libs_dir).unwrap_or_else(|err| { - if err.kind() != std::io::ErrorKind::AlreadyExists { - panic!("Unable to create {}: {:?}", perf_libs_dir, err); - } - }); - println!("cargo:rerun-if-changed={}", perf_libs_dir); - println!("cargo:rustc-link-search=native={}", perf_libs_dir); - if cfg!(windows) { - println!("cargo:rerun-if-changed={}/libcuda-crypt.dll", perf_libs_dir); - } else if cfg!(target_os = "macos") { - println!( - "cargo:rerun-if-changed={}/libcuda-crypt.dylib", - perf_libs_dir - ); - } else { - println!("cargo:rerun-if-changed={}/libcuda-crypt.so", perf_libs_dir); - } - } -} diff --git a/core/src/chacha_cuda.rs b/core/src/chacha_cuda.rs index 1bc8d83db..aeac34c4d 100644 --- a/core/src/chacha_cuda.rs +++ b/core/src/chacha_cuda.rs @@ -1,11 +1,8 @@ -// Module used by validators to approve storage mining proofs -// // in parallel using the GPU +// Module used by validators to approve storage mining proofs in parallel using the GPU use crate::blocktree::Blocktree; use crate::chacha::{CHACHA_BLOCK_SIZE, CHACHA_KEY_SIZE}; -use crate::sigverify::{ - chacha_cbc_encrypt_many_sample, chacha_end_sha_state, chacha_init_sha_state, -}; +use crate::perf_libs; use solana_sdk::hash::Hash; use std::io; use std::mem::size_of; @@ -22,6 +19,7 @@ pub fn chacha_cbc_encrypt_file_many_keys( ivecs: &mut [u8], samples: &[u64], ) -> io::Result> { + let api = perf_libs::api().expect("no perf libs"); if ivecs.len() % CHACHA_BLOCK_SIZE != 0 { return Err(io::Error::new( io::ErrorKind::Other, @@ -45,7 +43,7 @@ pub fn chacha_cbc_encrypt_file_many_keys( let mut total_size = 0; let mut time: f32 = 0.0; unsafe { - chacha_init_sha_state(int_sha_states.as_mut_ptr(), num_keys as u32); + (api.chacha_init_sha_state)(int_sha_states.as_mut_ptr(), num_keys as u32); } loop { match blocktree.get_data_shreds(current_slot, start_index, std::u64::MAX, &mut buffer) { @@ -73,7 +71,7 @@ pub fn chacha_cbc_encrypt_file_many_keys( } unsafe { - chacha_cbc_encrypt_many_sample( + (api.chacha_cbc_encrypt_many_sample)( buffer[..size].as_ptr(), int_sha_states.as_mut_ptr(), size, @@ -97,7 +95,7 @@ pub fn chacha_cbc_encrypt_file_many_keys( } } unsafe { - chacha_end_sha_state( + (api.chacha_end_sha_state)( int_sha_states.as_ptr(), sha_states.as_mut_ptr(), num_keys as u32, @@ -114,22 +112,23 @@ pub fn chacha_cbc_encrypt_file_many_keys( #[cfg(test)] mod tests { + use super::*; use crate::blocktree::get_tmp_ledger_path; - use crate::blocktree::Blocktree; use crate::chacha::chacha_cbc_encrypt_ledger; - use crate::chacha_cuda::chacha_cbc_encrypt_file_many_keys; use crate::entry::create_ticks; use crate::replicator::sample_file; use solana_sdk::clock::DEFAULT_SLOTS_PER_SEGMENT; - use solana_sdk::hash::Hash; use solana_sdk::signature::{Keypair, KeypairUtil}; use std::fs::{remove_dir_all, remove_file}; use std::path::Path; - use std::sync::Arc; #[test] fn test_encrypt_file_many_keys_single() { solana_logger::setup(); + if perf_libs::api().is_none() { + info!("perf-libs unavailable, skipped"); + return; + } let slots_per_segment = 32; let entries = create_ticks(slots_per_segment, Hash::default()); @@ -189,6 +188,10 @@ mod tests { #[test] fn test_encrypt_file_many_keys_multiple_keys() { solana_logger::setup(); + if perf_libs::api().is_none() { + info!("perf-libs unavailable, skipped"); + return; + } let entries = create_ticks(32, Hash::default()); let ledger_dir = "test_encrypt_file_many_keys_multiple"; @@ -255,6 +258,12 @@ mod tests { #[test] fn test_encrypt_file_many_keys_bad_key_length() { + solana_logger::setup(); + if perf_libs::api().is_none() { + info!("perf-libs unavailable, skipped"); + return; + } + let mut keys = hex!("abc123"); let ledger_dir = "test_encrypt_file_many_keys_bad_key_length"; let ledger_path = get_tmp_ledger_path(ledger_dir); diff --git a/core/src/cuda_runtime.rs b/core/src/cuda_runtime.rs index c554f2a78..252e81b3e 100644 --- a/core/src/cuda_runtime.rs +++ b/core/src/cuda_runtime.rs @@ -5,48 +5,55 @@ // copies from host memory to GPU memory unless the memory is page-pinned and // cannot be paged to disk. The cuda driver provides these interfaces to pin and unpin memory. +#[cfg(feature = "pin_gpu_memory")] +use crate::perf_libs; use crate::recycler::Reset; - -#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))] -use crate::sigverify::{cuda_host_register, cuda_host_unregister}; use std::ops::{Deref, DerefMut}; -#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))] +#[cfg(feature = "pin_gpu_memory")] use std::os::raw::c_int; -#[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))] +#[cfg(feature = "pin_gpu_memory")] const CUDA_SUCCESS: c_int = 0; pub fn pin(_mem: &mut Vec) { - #[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))] - unsafe { - use core::ffi::c_void; - use std::mem::size_of; + #[cfg(feature = "pin_gpu_memory")] + { + if let Some(api) = perf_libs::api() { + unsafe { + use core::ffi::c_void; + use std::mem::size_of; - let err = cuda_host_register( - _mem.as_mut_ptr() as *mut c_void, - _mem.capacity() * size_of::(), - 0, - ); - if err != CUDA_SUCCESS { - error!( - "cudaHostRegister error: {} ptr: {:?} bytes: {}", - err, - _mem.as_ptr(), - _mem.capacity() * size_of::() - ); + let err = (api.cuda_host_register)( + _mem.as_mut_ptr() as *mut c_void, + _mem.capacity() * size_of::(), + 0, + ); + if err != CUDA_SUCCESS { + error!( + "cudaHostRegister error: {} ptr: {:?} bytes: {}", + err, + _mem.as_ptr(), + _mem.capacity() * size_of::() + ); + } + } } } } pub fn unpin(_mem: *mut T) { - #[cfg(all(feature = "cuda", feature = "pin_gpu_memory"))] - unsafe { - use core::ffi::c_void; + #[cfg(feature = "pin_gpu_memory")] + { + if let Some(api) = perf_libs::api() { + unsafe { + use core::ffi::c_void; - let err = cuda_host_unregister(_mem as *mut c_void); - if err != CUDA_SUCCESS { - error!("cudaHostUnregister returned: {} ptr: {:?}", err, _mem); + let err = (api.cuda_host_unregister)(_mem as *mut c_void); + if err != CUDA_SUCCESS { + error!("cudaHostUnregister returned: {} ptr: {:?}", err, _mem); + } + } } } } diff --git a/core/src/entry.rs b/core/src/entry.rs index 16e55211d..ffc2f47d9 100644 --- a/core/src/entry.rs +++ b/core/src/entry.rs @@ -3,6 +3,7 @@ //! transactions within it. Entries cannot be reordered, and its field `num_hashes` //! represents an approximate amount of time since the last Entry was created. use crate::packet::{Blob, SharedBlob}; +use crate::perf_libs; use crate::poh::Poh; use crate::result::Result; use bincode::{deserialize, serialized_size}; @@ -10,20 +11,14 @@ use rayon::prelude::*; use rayon::ThreadPool; use solana_merkle_tree::MerkleTree; use solana_metrics::inc_new_counter_warn; +use solana_rayon_threadlimit::get_thread_count; use solana_sdk::hash::Hash; use solana_sdk::timing; use solana_sdk::transaction::Transaction; use std::borrow::Borrow; use std::cell::RefCell; use std::sync::mpsc::{Receiver, Sender}; -use std::sync::{Arc, RwLock}; - -#[cfg(feature = "cuda")] -use crate::sigverify::poh_verify_many; -use solana_rayon_threadlimit::get_thread_count; -#[cfg(feature = "cuda")] -use std::sync::Mutex; -#[cfg(feature = "cuda")] +use std::sync::{Arc, Mutex, RwLock}; use std::thread; use std::time::Instant; @@ -257,13 +252,12 @@ impl EntrySlice for [Entry] { res } - #[cfg(not(feature = "cuda"))] - fn verify(&self, start_hash: &Hash) -> bool { - self.verify_cpu(start_hash) - } - - #[cfg(feature = "cuda")] fn verify(&self, start_hash: &Hash) -> bool { + let api = perf_libs::api(); + if api.is_none() { + return self.verify_cpu(start_hash); + } + let api = api.unwrap(); inc_new_counter_warn!("entry_verify-num_entries", self.len() as usize); // Use CPU verify if the batch length is < 1K @@ -287,7 +281,7 @@ impl EntrySlice for [Entry] { .collect(); let num_hashes_vec: Vec = self - .into_iter() + .iter() .map(|entry| entry.num_hashes.saturating_sub(1)) .collect(); @@ -300,7 +294,7 @@ impl EntrySlice for [Entry] { let mut hashes = hashes_clone.lock().unwrap(); let res; unsafe { - res = poh_verify_many( + res = (api.poh_verify_many)( hashes.as_mut_ptr() as *mut u8, num_hashes_vec.as_ptr(), length, diff --git a/core/src/lib.rs b/core/src/lib.rs index 2283cb32b..3d74112e5 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -10,10 +10,10 @@ pub mod banking_stage; pub mod blob_fetch_stage; pub mod broadcast_stage; pub mod chacha; -#[cfg(cuda)] pub mod chacha_cuda; pub mod cluster_info_vote_listener; pub mod confidence; +pub mod perf_libs; pub mod recycler; #[macro_use] pub mod contact_info; @@ -75,6 +75,9 @@ pub(crate) mod version; pub mod weighted_shuffle; pub mod window_service; +#[macro_use] +extern crate dlopen_derive; + #[macro_use] extern crate solana_budget_program; diff --git a/core/src/perf_libs.rs b/core/src/perf_libs.rs new file mode 100644 index 000000000..0337f12ec --- /dev/null +++ b/core/src/perf_libs.rs @@ -0,0 +1,171 @@ +use crate::packet::Packet; +use core::ffi::c_void; +use dlopen::symbor::{Container, SymBorApi, Symbol}; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::os::raw::{c_int, c_uint}; +use std::path::{Path, PathBuf}; +use std::sync::Once; + +#[repr(C)] +pub struct Elems { + pub elems: *const Packet, + pub num: u32, +} + +#[derive(SymBorApi)] +pub struct Api<'a> { + pub ed25519_init: Symbol<'a, unsafe extern "C" fn() -> bool>, + pub ed25519_set_verbose: Symbol<'a, unsafe extern "C" fn(val: bool)>, + + #[allow(clippy::type_complexity)] + pub ed25519_verify_many: Symbol< + 'a, + unsafe extern "C" fn( + vecs: *const Elems, + num: u32, //number of vecs + message_size: u32, //size of each element inside the elems field of the vec + total_packets: u32, + total_signatures: u32, + message_lens: *const u32, + pubkey_offsets: *const u32, + signature_offsets: *const u32, + signed_message_offsets: *const u32, + out: *mut u8, //combined length of all the items in vecs + use_non_default_stream: u8, + ) -> u32, + >, + + pub chacha_cbc_encrypt_many_sample: Symbol< + 'a, + unsafe extern "C" fn( + input: *const u8, + sha_state: *mut u8, + in_len: usize, + keys: *const u8, + ivec: *mut u8, + num_keys: u32, + samples: *const u64, + num_samples: u32, + starting_block: u64, + time_us: *mut f32, + ), + >, + + pub chacha_init_sha_state: Symbol<'a, unsafe extern "C" fn(sha_state: *mut u8, num_keys: u32)>, + pub chacha_end_sha_state: + Symbol<'a, unsafe extern "C" fn(sha_state_in: *const u8, out: *mut u8, num_keys: u32)>, + + pub poh_verify_many: Symbol< + 'a, + unsafe extern "C" fn( + hashes: *mut u8, + num_hashes_arr: *const u64, + num_elems: usize, + use_non_default_stream: u8, + ) -> c_int, + >, + + pub cuda_host_register: + Symbol<'a, unsafe extern "C" fn(ptr: *mut c_void, size: usize, flags: c_uint) -> c_int>, + + pub cuda_host_unregister: Symbol<'a, unsafe extern "C" fn(ptr: *mut c_void) -> c_int>, +} + +static mut API: Option> = None; + +fn init(name: &OsStr) { + static INIT_HOOK: Once = Once::new(); + + info!("Loading {:?}", name); + unsafe { + INIT_HOOK.call_once(|| { + API = Some(Container::load(name).unwrap_or_else(|err| { + error!("Unable to load {:?}: {}", name, err); + std::process::exit(1); + })); + }) + } +} + +fn locate_perf_libs() -> Option { + let exe = env::current_exe().expect("Unable to get executable path"); + let perf_libs = exe.parent().unwrap().join("perf-libs"); + if perf_libs.is_dir() { + info!("perf-libs found at {:?}", perf_libs); + return Some(perf_libs); + } + warn!("{:?} does not exist", perf_libs); + None +} + +fn find_cuda_home(perf_libs_path: &Path) -> Option { + // Search /usr/local for a `cuda-` directory that matches a perf-libs subdirectory + for entry in fs::read_dir(&perf_libs_path).unwrap() { + if let Ok(entry) = entry { + let path = entry.path(); + if !path.is_dir() { + continue; + } + let dir_name = path.file_name().unwrap().to_str().unwrap_or(""); + if !dir_name.starts_with("cuda-") { + continue; + } + + let cuda_home: PathBuf = ["/", "usr", "local", dir_name].iter().collect(); + if !cuda_home.is_dir() { + continue; + } + + return Some(cuda_home); + } + } + None +} + +pub fn init_cuda() { + if let Some(perf_libs_path) = locate_perf_libs() { + if let Some(cuda_home) = find_cuda_home(&perf_libs_path) { + info!("CUDA installation found at {:?}", cuda_home); + + let cuda_lib64_dir = cuda_home.join("lib64"); + if cuda_lib64_dir.is_dir() { + let ld_library_path = cuda_lib64_dir.to_str().unwrap_or("").to_string() + + ":" + + &env::var("LD_LIBRARY_PATH").unwrap_or_else(|_| "".to_string()); + info!("LD_LIBRARY_PATH set to {:?}", ld_library_path); + + // Prefix LD_LIBRARY_PATH with $CUDA_HOME/lib64 directory + // to ensure the correct CUDA version is used + env::set_var("LD_LIBRARY_PATH", ld_library_path) + } else { + warn!("{:?} does not exist", cuda_lib64_dir); + } + + let libcuda_crypt = perf_libs_path + .join(cuda_home.file_name().unwrap()) + .join("libcuda-crypt.so"); + return init(libcuda_crypt.as_os_str()); + } else { + warn!("CUDA installation not found"); + } + } + + // Last resort! Blindly load the shared object and hope it all works out + init(OsStr::new("libcuda-crypt.so")) +} + +pub fn api() -> Option<&'static Container>> { + #[cfg(test)] + { + static INIT_HOOK: Once = Once::new(); + INIT_HOOK.call_once(|| { + if std::env::var("TEST_PERF_LIBS_CUDA").is_ok() { + init_cuda(); + } + }) + } + + unsafe { API.as_ref() } +} diff --git a/core/src/sigverify.rs b/core/src/sigverify.rs index 579f9c830..ef9b2699a 100644 --- a/core/src/sigverify.rs +++ b/core/src/sigverify.rs @@ -1,11 +1,12 @@ //! The `sigverify` module provides digital signature verification functions. //! By default, signatures are verified in parallel using all available CPU -//! cores. When `--features=cuda` is enabled, signature verification is -//! offloaded to the GPU. +//! cores. When perf-libs are available signature verification is offloaded +//! to the GPU. //! use crate::cuda_runtime::PinnedVec; use crate::packet::{Packet, Packets}; +use crate::perf_libs; use crate::recycler::Recycler; use crate::result::Result; use bincode::serialized_size; @@ -19,11 +20,7 @@ use solana_sdk::signature::Signature; use solana_sdk::transaction::Transaction; use std::mem::size_of; -#[cfg(feature = "cuda")] -use core::ffi::c_void; use solana_rayon_threadlimit::get_thread_count; -#[cfg(feature = "cuda")] -use std::os::raw::{c_int, c_uint}; pub const NUM_THREADS: u32 = 10; use std::cell::RefCell; @@ -36,62 +33,16 @@ pub type TxOffset = PinnedVec; type TxOffsets = (TxOffset, TxOffset, TxOffset, TxOffset, Vec>); -#[cfg(feature = "cuda")] -#[repr(C)] -struct Elems { - elems: *const Packet, - num: u32, -} - -#[cfg(feature = "cuda")] -#[link(name = "cuda-crypt")] -extern "C" { - fn ed25519_init() -> bool; - fn ed25519_set_verbose(val: bool); - fn ed25519_verify_many( - vecs: *const Elems, - num: u32, //number of vecs - message_size: u32, //size of each element inside the elems field of the vec - total_packets: u32, - total_signatures: u32, - message_lens: *const u32, - pubkey_offsets: *const u32, - signature_offsets: *const u32, - signed_message_offsets: *const u32, - out: *mut u8, //combined length of all the items in vecs - use_non_default_stream: u8, - ) -> u32; - - pub fn chacha_cbc_encrypt_many_sample( - input: *const u8, - sha_state: *mut u8, - in_len: usize, - keys: *const u8, - ivec: *mut u8, - num_keys: u32, - samples: *const u64, - num_samples: u32, - starting_block: u64, - time_us: *mut f32, - ); - - pub fn chacha_init_sha_state(sha_state: *mut u8, num_keys: u32); - pub fn chacha_end_sha_state(sha_state_in: *const u8, out: *mut u8, num_keys: u32); - - pub fn poh_verify_many( - hashes: *mut u8, - num_hashes_arr: *const u64, - num_elems: usize, - use_non_default_stream: u8, - ) -> c_int; - - pub fn cuda_host_register(ptr: *mut c_void, size: usize, flags: c_uint) -> c_int; - pub fn cuda_host_unregister(ptr: *mut c_void) -> c_int; -} - -#[cfg(not(feature = "cuda"))] pub fn init() { - // stub + if let Some(api) = perf_libs::api() { + unsafe { + (api.ed25519_set_verbose)(true); + if !(api.ed25519_init)() { + panic!("ed25519_init() failed"); + } + (api.ed25519_set_verbose)(false); + } + } } fn verify_packet(packet: &Packet) -> u8 { @@ -130,15 +81,6 @@ fn batch_size(batches: &[Packets]) -> usize { batches.iter().map(|p| p.packets.len()).sum() } -#[cfg(not(feature = "cuda"))] -pub fn ed25519_verify( - batches: &[Packets], - _recycler: &Recycler, - _recycler_out: &Recycler>, -) -> Vec> { - ed25519_verify_cpu(batches) -} - pub fn get_packet_offsets(packet: &Packet, current_offset: u32) -> (u32, u32, u32, u32) { let (sig_len, sig_size) = decode_len(&packet.data); let msg_start_offset = sig_size + sig_len * size_of::(); @@ -235,23 +177,17 @@ pub fn ed25519_verify_disabled(batches: &[Packets]) -> Vec> { rv } -#[cfg(feature = "cuda")] -pub fn init() { - unsafe { - ed25519_set_verbose(true); - if !ed25519_init() { - panic!("ed25519_init() failed"); - } - ed25519_set_verbose(false); - } -} - -#[cfg(feature = "cuda")] pub fn ed25519_verify( batches: &[Packets], recycler: &Recycler, recycler_out: &Recycler>, ) -> Vec> { + let api = perf_libs::api(); + if api.is_none() { + return ed25519_verify_cpu(batches); + } + let api = api.unwrap(); + use crate::packet::PACKET_DATA_SIZE; let count = batch_size(batches); @@ -276,7 +212,7 @@ pub fn ed25519_verify( let mut num_packets = 0; for p in batches { - elems.push(Elems { + elems.push(perf_libs::Elems { elems: p.packets.as_ptr(), num: p.packets.len() as u32, }); @@ -292,7 +228,7 @@ pub fn ed25519_verify( trace!("len offset: {}", PACKET_DATA_SIZE as u32); const USE_NON_DEFAULT_STREAM: u8 = 1; unsafe { - let res = ed25519_verify_many( + let res = (api.ed25519_verify_many)( elems.as_ptr(), elems.len() as u32, size_of::() as u32, diff --git a/core/src/sigverify_stage.rs b/core/src/sigverify_stage.rs index 8155db2e0..5918be3f2 100644 --- a/core/src/sigverify_stage.rs +++ b/core/src/sigverify_stage.rs @@ -3,10 +3,11 @@ //! top-level list with a list of booleans, telling the next stage whether the //! signature in that packet is valid. It assumes each packet contains one //! transaction. All processing is done on the CPU by default and on a GPU -//! if the `cuda` feature is enabled with `--features=cuda`. +//! if perf-libs are available use crate::cuda_runtime::PinnedVec; use crate::packet::Packets; +use crate::perf_libs; use crate::recycler::Recycler; use crate::result::{Error, Result}; use crate::service::Service; @@ -21,11 +22,8 @@ use std::sync::mpsc::{Receiver, RecvTimeoutError}; use std::sync::{Arc, Mutex}; use std::thread::{self, Builder, JoinHandle}; -#[cfg(feature = "cuda")] -const RECV_BATCH_MAX: usize = 5_000; - -#[cfg(not(feature = "cuda"))] -const RECV_BATCH_MAX: usize = 1000; +const RECV_BATCH_MAX_CPU: usize = 1_000; +const RECV_BATCH_MAX_GPU: usize = 5_000; pub type VerifiedPackets = Vec<(Packets, Vec)>; @@ -70,7 +68,11 @@ impl SigVerifyStage { ) -> Result<()> { let (batch, len, recv_time) = streamer::recv_batch( &recvr.lock().expect("'recvr' lock in fn verifier"), - RECV_BATCH_MAX, + if perf_libs::api().is_some() { + RECV_BATCH_MAX_GPU + } else { + RECV_BATCH_MAX_CPU + }, )?; inc_new_counter_info!("sigverify_stage-packets_received", len); diff --git a/core/src/storage_stage.rs b/core/src/storage_stage.rs index 220f3cc3d..d9d102af1 100644 --- a/core/src/storage_stage.rs +++ b/core/src/storage_stage.rs @@ -4,7 +4,6 @@ use crate::bank_forks::BankForks; use crate::blocktree::Blocktree; -#[cfg(cuda)] use crate::chacha_cuda::chacha_cbc_encrypt_file_many_keys; use crate::cluster_info::ClusterInfo; use crate::result::{Error, Result}; @@ -408,11 +407,11 @@ impl StorageStage { samples.push(rng.gen_range(0, 10)); } debug!("generated samples: {:?}", samples); + // TODO: cuda required to generate the reference values // but if it is missing, then we need to take care not to // process storage mining results. - #[cfg(cuda)] - { + if crate::perf_libs::api().is_some() { // Lock the keys, since this is the IV memory, // it will be updated in-place by the encryption. // Should be overwritten by the proof signatures which replace the @@ -729,10 +728,8 @@ mod tests { let keypair = Keypair::new(); let hash = Hash::default(); let signature = keypair.sign_message(&hash.as_ref()); - #[cfg(feature = "cuda")] + let mut result = storage_state.get_mining_result(&signature); - #[cfg(not(feature = "cuda"))] - let result = storage_state.get_mining_result(&signature); assert_eq!(result, Hash::default()); @@ -752,26 +749,27 @@ mod tests { .collect::>(); bank_sender.send(rooted_banks).unwrap(); - #[cfg(feature = "cuda")] - for _ in 0..5 { - result = storage_state.get_mining_result(&signature); - if result != Hash::default() { - info!("found result = {:?} sleeping..", result); - break; + if crate::perf_libs::api().is_some() { + for _ in 0..5 { + result = storage_state.get_mining_result(&signature); + if result != Hash::default() { + info!("found result = {:?} sleeping..", result); + break; + } + info!("result = {:?} sleeping..", result); + sleep(Duration::new(1, 0)); } - info!("result = {:?} sleeping..", result); - sleep(Duration::new(1, 0)); } info!("joining..?"); exit.store(true, Ordering::Relaxed); storage_stage.join().unwrap(); - #[cfg(not(cuda))] - assert_eq!(result, Hash::default()); - - #[cfg(cuda)] - assert_ne!(result, Hash::default()); + if crate::perf_libs::api().is_some() { + assert_ne!(result, Hash::default()); + } else { + assert_eq!(result, Hash::default()); + } remove_dir_all(ledger_path).unwrap(); } diff --git a/core/src/validator.rs b/core/src/validator.rs index 6843da2b6..723b9fbbe 100644 --- a/core/src/validator.rs +++ b/core/src/validator.rs @@ -118,7 +118,14 @@ impl Validator { warn!("identity pubkey: {:?}", id); warn!("vote pubkey: {:?}", vote_account); - warn!("CUDA is {}abled", if cfg!(cuda) { "en" } else { "dis" }); + warn!( + "CUDA is {}abled", + if crate::perf_libs::api().is_some() { + "en" + } else { + "dis" + } + ); info!("entrypoint: {:?}", entrypoint_info_option); Self::print_node_info(&node); diff --git a/fetch-perf-libs.sh b/fetch-perf-libs.sh index 2b907ec32..d7c2edb80 100755 --- a/fetch-perf-libs.sh +++ b/fetch-perf-libs.sh @@ -1,19 +1,20 @@ #!/usr/bin/env bash PERF_LIBS_VERSION=v0.15.0 +VERSION=$PERF_LIBS_VERSION-1 set -e cd "$(dirname "$0")" -if [[ ! -f target/perf-libs/.$PERF_LIBS_VERSION ]]; then +if [[ ! -f target/perf-libs/.$VERSION ]]; then if [[ $(uname) != Linux ]]; then - echo Performance libraries are only available for Linux - exit 1 + echo Note: Performance libraries are only available for Linux + exit 0 fi if [[ $(uname -m) != x86_64 ]]; then - echo Performance libraries are only available for x86_64 architecture - exit 1 + echo Note: Performance libraries are only available for x86_64 architecture + exit 0 fi mkdir -p target/perf-libs @@ -24,72 +25,16 @@ if [[ ! -f target/perf-libs/.$PERF_LIBS_VERSION ]]; then https://github.com/solana-labs/solana-perf-libs/releases/download/$PERF_LIBS_VERSION/solana-perf.tgz tar zxvf solana-perf.tgz rm -f solana-perf.tgz - touch .$PERF_LIBS_VERSION + touch .$VERSION ) - echo + + # Setup symlinks so the perf-libs/ can be found from all binaries run out of + # target/ + for dir in target/{debug,release}/{,deps/}; do + mkdir -p $dir + ln -sfT ../perf-libs ${dir}perf-libs + done + fi -cat > target/perf-libs/env.sh <<'EOF' -if [[ -n $SOLANA_PERF_LIBS ]]; then - echo "solana-perf-libs version: $(cat $SOLANA_PERF_LIBS/solana-perf-HEAD.txt)" - return -fi -SOLANA_PERF_LIBS="$(cd $(dirname "${BASH_SOURCE[0]}"); pwd)" - -SOLANA_PERF_LIBS_CUDA= -for _supported_cuda in $(cd $SOLANA_PERF_LIBS; find . -maxdepth 1 -type d -regex './cuda-.*' | sort -r); do - _supported_cuda=$(basename "$_supported_cuda") - CUDA_HOME=/usr/local/$_supported_cuda - [[ -d $CUDA_HOME ]] || { - echo "$_supported_cuda not detected: $CUDA_HOME directory does not exist" - continue - } - [[ -r $CUDA_HOME/version.txt ]] || { - echo "$_supported_cuda not detected: $CUDA_HOME/version.txt does not exist" - continue - } - echo - cat "$CUDA_HOME/version.txt" - echo "CUDA_HOME=$CUDA_HOME" - SOLANA_PERF_LIBS_CUDA=$_supported_cuda - export CUDA_HOME - export SOLANA_PERF_LIBS_CUDA - break -done - -if [[ -z $SOLANA_PERF_LIBS_CUDA ]]; then - echo No supported CUDA versions detected - echo - echo LD_LIBRARY_PATH="$SOLANA_PERF_LIBS:$LD_LIBRARY_PATH" - export LD_LIBRARY_PATH="$SOLANA_PERF_LIBS:$LD_LIBRARY_PATH" -else - echo - echo LD_LIBRARY_PATH="$SOLANA_PERF_LIBS:$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA:$CUDA_HOME/lib64:$LD_LIBRARY_PATH" - export LD_LIBRARY_PATH="$SOLANA_PERF_LIBS:$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA:$CUDA_HOME/lib64:$LD_LIBRARY_PATH" - - echo PATH="$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA:$CUDA_HOME/bin:$PATH" - export PATH="$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA:$CUDA_HOME/bin:$PATH" - - if [[ -r "$CUDA_HOME"/version.txt && -r $SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA/cuda-version.txt ]]; then - if ! diff "$CUDA_HOME"/version.txt "$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA"/cuda-version.txt > /dev/null; then - echo ============================================== - echo "Warning: possible CUDA version mismatch with $CUDA_HOME" - echo - echo "Expected version: $(cat "$SOLANA_PERF_LIBS/$SOLANA_PERF_LIBS_CUDA"/cuda-version.txt)" - echo "Detected version: $(cat "$CUDA_HOME"/version.txt)" - echo ============================================== - fi - else - echo ============================================== - echo Warning: unable to validate CUDA version - echo ============================================== - fi -fi -echo -echo "solana-perf-libs version: $(cat $SOLANA_PERF_LIBS/solana-perf-HEAD.txt)" - -EOF - -echo "Setup shell environment with:" -echo " source $PWD/target/perf-libs/env.sh" exit 0 diff --git a/local_cluster/Cargo.toml b/local_cluster/Cargo.toml index 20a91d0a0..fda0422c7 100644 --- a/local_cluster/Cargo.toml +++ b/local_cluster/Cargo.toml @@ -34,7 +34,3 @@ solana-rayon-threadlimit = { path = "../rayon-threadlimit", version = "0.20.0" } [dev-dependencies] serial_test = "0.2.0" serial_test_derive = "0.2.0" - -[features] -cuda = ["solana-core/cuda"] - diff --git a/multinode-demo/common.sh b/multinode-demo/common.sh index cbcb8e0a1..c89c306ef 100644 --- a/multinode-demo/common.sh +++ b/multinode-demo/common.sh @@ -18,11 +18,6 @@ if [[ $(uname) != Linux ]]; then fi fi -if [[ -f "$SOLANA_ROOT"/target/perf-libs/env.sh ]]; then - # shellcheck source=/dev/null - source "$SOLANA_ROOT"/target/perf-libs/env.sh -fi - if [[ -n $USE_INSTALL || ! -f "$SOLANA_ROOT"/Cargo.toml ]]; then solana_program() { declare program="$1" @@ -57,7 +52,7 @@ fi solana_bench_tps=$(solana_program bench-tps) solana_drone=$(solana_program drone) solana_validator=$(solana_program validator) -solana_validator_cuda=$(solana_program validator-cuda) +solana_validator_cuda="$solana_validator --cuda" solana_genesis=$(solana_program genesis) solana_gossip=$(solana_program gossip) solana_keygen=$(solana_program keygen) diff --git a/net/README.md b/net/README.md index 9c8db55be..f95fd457d 100644 --- a/net/README.md +++ b/net/README.md @@ -73,12 +73,6 @@ $ ./ec2.sh create -g ... If deploying a tarball-based network nothing further is required, as GPU presence is detected at runtime and the CUDA build is auto selected. -If deploying a locally-built network, first run `./fetch-perf-libs.sh` then -ensure the `cuda` feature is specified at network start: -```bash -$ ./net.sh start -f "cuda" -``` - ### How to interact with a CD testnet deployed by ci/testnet-deploy.sh **AWS-Specific Extra Setup**: Follow the steps in diff --git a/net/net.sh b/net/net.sh index 7512cb950..a360ee822 100755 --- a/net/net.sh +++ b/net/net.sh @@ -32,8 +32,6 @@ Operate a configured testnet -t edge|beta|stable|vX.Y.Z - Deploy the latest tarball release for the specified release channel (edge|beta|stable) or release tag (vX.Y.Z) - -f [cargoFeatures] - List of |cargo --feaures=| to activate - (ignored if -s or -S is specified) -r / --skip-setup - Reuse existing node/ledger configuration from a previous |start| (ie, don't run ./multinode-demo/setup.sh). -d / --debug - Build/deploy the testnet with debug binaries @@ -111,7 +109,6 @@ releaseChannel= deployMethod=local deployIfNewer= sanityExtraArgs= -cargoFeatures= skipSetup=false customPrograms= updatePlatforms= @@ -220,9 +217,6 @@ while getopts "h?T:t:o:f:rD:c:Fn:i:d" opt "${shortArgs[@]}"; do ;; esac ;; - f) - cargoFeatures=$OPTARG - ;; n) numFullnodesRequested=$OPTARG ;; @@ -340,11 +334,6 @@ build() { set -x rm -rf farf - if [[ -r target/perf-libs/env.sh ]]; then - # shellcheck source=/dev/null - source target/perf-libs/env.sh - fi - buildVariant= if $debugBuild; then buildVariant=debug @@ -352,7 +341,7 @@ build() { $MAYBE_DOCKER bash -c " set -ex - scripts/cargo-install-all.sh farf \"$cargoFeatures\" \"$buildVariant\" + scripts/cargo-install-all.sh farf \"$buildVariant\" if [[ -n \"$customPrograms\" ]]; then scripts/cargo-install-custom-programs.sh farf $customPrograms fi diff --git a/net/remote/remote-client.sh b/net/remote/remote-client.sh index 397d75723..59b3e0266 100755 --- a/net/remote/remote-client.sh +++ b/net/remote/remote-client.sh @@ -33,12 +33,7 @@ case $deployMethod in local|tar) PATH="$HOME"/.cargo/bin:"$PATH" export USE_INSTALL=1 - - ./fetch-perf-libs.sh - # shellcheck source=/dev/null - source ./target/perf-libs/env.sh - - net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/solana*" ~/.cargo/bin/ + net/scripts/rsync-retry.sh -vPrc "$entrypointIp:~/.cargo/bin/*" ~/.cargo/bin/ ;; skip) ;; diff --git a/net/remote/remote-node.sh b/net/remote/remote-node.sh index 69f28f26c..e63f62200 100755 --- a/net/remote/remote-node.sh +++ b/net/remote/remote-node.sh @@ -93,15 +93,12 @@ local|tar|skip) export USE_INSTALL=1 ./fetch-perf-libs.sh - # shellcheck source=/dev/null - source ./target/perf-libs/env.sh cat >> ~/solana/on-reboot <> ~/solana/on-reboot < net-stats.log 2>&1 & echo \$! > net-stats.pid - if [[ -e /dev/nvidia0 && -x ~/.cargo/bin/solana-validator-cuda ]]; then + if [[ -e /dev/nvidia0 ]]; then echo Selecting solana-validator-cuda export SOLANA_CUDA=1 fi diff --git a/net/remote/remote-sanity.sh b/net/remote/remote-sanity.sh index c916d1b38..1f0c07967 100755 --- a/net/remote/remote-sanity.sh +++ b/net/remote/remote-sanity.sh @@ -67,11 +67,6 @@ case $deployMethod in local|tar|skip) PATH="$HOME"/.cargo/bin:"$PATH" export USE_INSTALL=1 - if [[ -r target/perf-libs/env.sh ]]; then - # shellcheck source=/dev/null - source target/perf-libs/env.sh - fi - solana_gossip=solana-gossip solana_install=solana-install ;; diff --git a/scripts/cargo-install-all.sh b/scripts/cargo-install-all.sh index 61333f81a..7606eba3b 100755 --- a/scripts/cargo-install-all.sh +++ b/scripts/cargo-install-all.sh @@ -17,13 +17,7 @@ fi installDir="$(mkdir -p "$1"; cd "$1"; pwd)" cargo=cargo -cargoFeatures="$2" -debugBuild="$3" - -if [[ -n $cargoFeatures && $cargoFeatures != cuda ]]; then - echo "Unsupported feature flag: $cargoFeatures" - exit 1 -fi +debugBuild="$2" buildVariant=release maybeReleaseFlag=--release @@ -35,6 +29,7 @@ fi echo "Install location: $installDir ($buildVariant)" cd "$(dirname "$0")"/.. +./fetch-perf-libs.sh SECONDS=0 @@ -78,19 +73,8 @@ for bin in "${BINS[@]}"; do cp -fv "target/$buildVariant/$bin" "$installDir"/bin done - -if [[ "$cargoFeatures" = cuda ]]; then - ( - set -x - ./fetch-perf-libs.sh - - # shellcheck source=/dev/null - source ./target/perf-libs/env.sh - - # shellcheck disable=SC2086 # Don't want to double quote $rust_version - cargo $rust_version build $maybeReleaseFlag --package solana-validator-cuda - ) - cp -fv "target/$buildVariant/solana-validator-cuda" "$installDir"/bin +if [[ -d target/perf-libs ]]; then + cp -a target/perf-libs "$installDir"/bin/perf-libs fi for dir in programs/*; do diff --git a/scripts/coverage.sh b/scripts/coverage.sh index 886e72419..3574895f4 100755 --- a/scripts/coverage.sh +++ b/scripts/coverage.sh @@ -15,7 +15,7 @@ reportName="lcov-${CI_COMMIT:0:9}" if [[ -n $1 ]]; then crate="--package $1" else - crate="--all --exclude solana-local-cluster --exclude solana-validator-cuda" + crate="--all --exclude solana-local-cluster" fi coverageFlags=(-Zprofile) # Enable coverage diff --git a/validator-cuda/.gitignore b/validator-cuda/.gitignore deleted file mode 100644 index 5404b132d..000000000 --- a/validator-cuda/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/target/ -/farf/ diff --git a/validator-cuda/Cargo.toml b/validator-cuda/Cargo.toml deleted file mode 100644 index fb3c33309..000000000 --- a/validator-cuda/Cargo.toml +++ /dev/null @@ -1,14 +0,0 @@ -[package] -authors = ["Solana Maintainers "] -edition = "2018" -name = "solana-validator-cuda" -description = "Blockchain, Rebuilt for Scale" -version = "0.20.0" -repository = "https://github.com/solana-labs/solana" -license = "Apache-2.0" -homepage = "https://solana.com/" -publish = false - -[dependencies] -solana-core = { path = "../core", version = "0.20.0", features=["cuda"] } -solana-validator = { path = "../validator", version = "0.20.0" } diff --git a/validator-cuda/src/main.rs b/validator-cuda/src/main.rs deleted file mode 100644 index 1bba484af..000000000 --- a/validator-cuda/src/main.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - solana_validator::main() -} diff --git a/validator/src/lib.rs b/validator/src/lib.rs index fff52f087..a6fdb54d5 100644 --- a/validator/src/lib.rs +++ b/validator/src/lib.rs @@ -390,7 +390,17 @@ pub fn main() { .takes_value(false) .help("Skip ledger verification at node bootup"), ) - .get_matches(); + .arg( + clap::Arg::with_name("cuda") + .long("cuda") + .takes_value(false) + .help("Use CUDA"), + ) + .get_matches(); + + if matches.is_present("cuda") { + solana_core::perf_libs::init_cuda(); + } let mut validator_config = ValidatorConfig::default(); let keypair = if let Some(identity) = matches.value_of("identity") {