From 9f03110f3256e81173c65c2ed1810efaa74fbc5b Mon Sep 17 00:00:00 2001 From: Jeremy Rubin Date: Sat, 7 Jan 2017 16:34:50 -0500 Subject: [PATCH 1/2] Add Basic CheckQueue Benchmark --- src/Makefile.bench.include | 1 + src/bench/checkqueue.cpp | 94 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 src/bench/checkqueue.cpp diff --git a/src/Makefile.bench.include b/src/Makefile.bench.include index e58bd9dfb..8c699c2f8 100644 --- a/src/Makefile.bench.include +++ b/src/Makefile.bench.include @@ -15,6 +15,7 @@ bench_bench_bitcoin_SOURCES = \ bench/bench.cpp \ bench/bench.h \ bench/checkblock.cpp \ + bench/checkqueue.cpp \ bench/Examples.cpp \ bench/rollingbloom.cpp \ bench/crypto_hash.cpp \ diff --git a/src/bench/checkqueue.cpp b/src/bench/checkqueue.cpp new file mode 100644 index 000000000..280d24a5e --- /dev/null +++ b/src/bench/checkqueue.cpp @@ -0,0 +1,94 @@ +// Copyright (c) 2015 The Bitcoin Core developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include "bench.h" +#include "util.h" +#include "validation.h" +#include "checkqueue.h" +#include "prevector.h" +#include +#include +#include "random.h" + + +// This Benchmark tests the CheckQueue with the lightest +// weight Checks, so it should make any lock contention +// particularly visible +static void CCheckQueueSpeed(benchmark::State& state) +{ + struct FakeJobNoWork { + bool operator()() + { + return true; + } + void swap(FakeJobNoWork& x){}; + }; + CCheckQueue queue {128}; + boost::thread_group tg; + for (auto x = 0; x < std::max(2, GetNumCores()); ++x) { + tg.create_thread([&]{queue.Thread();}); + } + while (state.KeepRunning()) { + CCheckQueueControl control(&queue); + // We can make vChecks out of the loop because calling Add doesn't + // change the size of the vector. + std::vector vChecks; + vChecks.resize(30); + + // We call Add a number of times to simulate the behavior of adding + // a block of transactions at once. + for (size_t j = 0; j < 101; ++j) { + control.Add(vChecks); + } + // control waits for completion by RAII, but + // it is done explicitly here for clarity + control.Wait(); + } + tg.interrupt_all(); + tg.join_all(); +} + +// This Benchmark tests the CheckQueue with a slightly realistic workload, +// where checks all contain a prevector that is indirect 50% of the time +// and there is a little bit of work done between calls to Add. +static void CCheckQueueSpeedPrevectorJob(benchmark::State& state) +{ + struct PrevectorJob { + prevector<28, uint8_t> p; + PrevectorJob(){ + } + PrevectorJob(FastRandomContext& insecure_rand){ + p.resize(insecure_rand.rand32() % 56); + } + bool operator()() + { + return true; + } + void swap(PrevectorJob& x){p.swap(x.p);}; + }; + CCheckQueue queue {128}; + boost::thread_group tg; + for (auto x = 0; x < std::max(2, GetNumCores()); ++x) { + tg.create_thread([&]{queue.Thread();}); + } + while (state.KeepRunning()) { + // Make insecure_rand here so that each iteration is identical. + FastRandomContext insecure_rand(true); + CCheckQueueControl control(&queue); + for (size_t j = 0; j < 101; ++j) { + std::vector vChecks; + vChecks.reserve(30); + for (auto x = 0; x < 30; ++x) + vChecks.emplace_back(insecure_rand); + control.Add(vChecks); + } + // control waits for completion by RAII, but + // it is done explicitly here for clarity + control.Wait(); + } + tg.interrupt_all(); + tg.join_all(); +} +BENCHMARK(CCheckQueueSpeed); +BENCHMARK(CCheckQueueSpeedPrevectorJob); From aad4cb50599b15d6fc439e465a616635e96466d1 Mon Sep 17 00:00:00 2001 From: Jeremy Rubin Date: Wed, 11 Jan 2017 17:04:59 -0500 Subject: [PATCH 2/2] Address ryanofsky feedback on CCheckQueue benchmarks. Eliminated magic numbers, fixed scoping of vectors (and memory movement component of benchmark). --- src/bench/checkqueue.cpp | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/bench/checkqueue.cpp b/src/bench/checkqueue.cpp index 280d24a5e..6fa9fe4fe 100644 --- a/src/bench/checkqueue.cpp +++ b/src/bench/checkqueue.cpp @@ -15,6 +15,11 @@ // This Benchmark tests the CheckQueue with the lightest // weight Checks, so it should make any lock contention // particularly visible +static const int MIN_CORES = 2; +static const size_t BATCHES = 101; +static const size_t BATCH_SIZE = 30; +static const int PREVECTOR_SIZE = 28; +static const int QUEUE_BATCH_SIZE = 128; static void CCheckQueueSpeed(benchmark::State& state) { struct FakeJobNoWork { @@ -24,21 +29,25 @@ static void CCheckQueueSpeed(benchmark::State& state) } void swap(FakeJobNoWork& x){}; }; - CCheckQueue queue {128}; + CCheckQueue queue {QUEUE_BATCH_SIZE}; boost::thread_group tg; - for (auto x = 0; x < std::max(2, GetNumCores()); ++x) { + for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) { tg.create_thread([&]{queue.Thread();}); } while (state.KeepRunning()) { CCheckQueueControl control(&queue); - // We can make vChecks out of the loop because calling Add doesn't - // change the size of the vector. - std::vector vChecks; - vChecks.resize(30); // We call Add a number of times to simulate the behavior of adding // a block of transactions at once. - for (size_t j = 0; j < 101; ++j) { + + std::vector> vBatches(BATCHES); + for (auto& vChecks : vBatches) { + vChecks.resize(BATCH_SIZE); + } + for (auto& vChecks : vBatches) { + // We can't make vChecks in the inner loop because we want to measure + // the cost of getting the memory to each thread and we might get the same + // memory control.Add(vChecks); } // control waits for completion by RAII, but @@ -55,11 +64,11 @@ static void CCheckQueueSpeed(benchmark::State& state) static void CCheckQueueSpeedPrevectorJob(benchmark::State& state) { struct PrevectorJob { - prevector<28, uint8_t> p; + prevector p; PrevectorJob(){ } PrevectorJob(FastRandomContext& insecure_rand){ - p.resize(insecure_rand.rand32() % 56); + p.resize(insecure_rand.rand32() % (PREVECTOR_SIZE*2)); } bool operator()() { @@ -67,19 +76,19 @@ static void CCheckQueueSpeedPrevectorJob(benchmark::State& state) } void swap(PrevectorJob& x){p.swap(x.p);}; }; - CCheckQueue queue {128}; + CCheckQueue queue {QUEUE_BATCH_SIZE}; boost::thread_group tg; - for (auto x = 0; x < std::max(2, GetNumCores()); ++x) { + for (auto x = 0; x < std::max(MIN_CORES, GetNumCores()); ++x) { tg.create_thread([&]{queue.Thread();}); } while (state.KeepRunning()) { // Make insecure_rand here so that each iteration is identical. FastRandomContext insecure_rand(true); CCheckQueueControl control(&queue); - for (size_t j = 0; j < 101; ++j) { - std::vector vChecks; - vChecks.reserve(30); - for (auto x = 0; x < 30; ++x) + std::vector> vBatches(BATCHES); + for (auto& vChecks : vBatches) { + vChecks.reserve(BATCH_SIZE); + for (size_t x = 0; x < BATCH_SIZE; ++x) vChecks.emplace_back(insecure_rand); control.Add(vChecks); }