implementation of concurrent fixed size pool that leverages thread local caches to avoid mutexing

2021-03-25 14:18:00 +00:00 · 2021-03-25 14:18:00 +00:00 · e1523692c2
parent e200a3359e
commit e1523692c2
8 changed files with 344 additions and 122 deletions
--- a/lib/include/srsran/adt/detail/type_storage.h
+++ b/lib/include/srsran/adt/detail/type_storage.h
@ -13,6 +13,7 @@
 #ifndef SRSRAN_TYPE_STORAGE_H
 #define SRSRAN_TYPE_STORAGE_H

+#include <cstdint>
 #include <type_traits>
 #include <utility>

@ -20,6 +21,17 @@ namespace srsran {

 namespace detail {

+// NOTE: gcc 4.8.5 is missing std::max_align_t. Need to create a struct
+union max_alignment_t {
+  char        c;
+  float       f;
+  uint32_t    i;
+  uint64_t    i2;
+  double      d;
+  long double d2;
+  uint32_t*   ptr;
+};
+
 template <typename T>
 struct type_storage {
  using value_type = T;
--- a/lib/include/srsran/adt/pool/fixed_size_pool.h
+++ b/lib/include/srsran/adt/pool/fixed_size_pool.h
@ -0,0 +1,113 @@
+/**
+ *
+ * \section COPYRIGHT
+ *
+ * Copyright 2013-2021 Software Radio Systems Limited
+ *
+ * By using this file, you agree to the terms and conditions set
+ * forth in the LICENSE file which can be found at the top level of
+ * the distribution.
+ *
+ */
+
+#ifndef SRSRAN_FIXED_SIZE_POOL_H
+#define SRSRAN_FIXED_SIZE_POOL_H
+
+#include "memblock_cache.h"
+#include "srsran/adt/circular_buffer.h"
+#include "srsran/adt/singleton.h"
+#include <thread>
+
+namespace srsran {
+
+/**
+ * Concurrent fixed size memory pool made of blocks of equal size
+ * Each worker keeps a separate thread-local memory block cache that uses for fast allocation/deallocation.
+ * When this cache gets depleted, the worker tries to obtain blocks from a shared memory block cache
+ * Note: This pool does not implement stealing of blocks between workers, so it is possible that a worker can't allocate
+ *       while another worker still has blocks in its own cache. This situation is avoided by upper bounding the
+ *       size of each worker cache
+ * Note2: Taking into account the usage of thread_local, this class is made a singleton
+ * @tparam NofObjects number of objects in the pool
+ * @tparam ObjSize object size
+ */
+template <size_t NofObjects, size_t ObjSize, size_t MaxWorkerCacheSize = NofObjects / 16>
+class concurrent_fixed_memory_pool : public singleton_t<concurrent_fixed_memory_pool<NofObjects, ObjSize> >
+{
+  static_assert(NofObjects > 256, "This pool is particularly designed for a high number of objects");
+  static_assert(ObjSize > 256, "This pool is particularly designed for large objects");
+
+  struct obj_storage_t {
+    typename std::aligned_storage<ObjSize, alignof(detail::max_alignment_t)>::type buffer;
+    std::thread::id                                                                worker_id;
+    explicit obj_storage_t(std::thread::id id_) : worker_id(id_) {}
+  };
+
+  const static size_t batch_steal_size = 10;
+
+protected:
+  // ctor only accessible from singleton
+  concurrent_fixed_memory_pool()
+  {
+    allocated_blocks.resize(NofObjects);
+    for (std::unique_ptr<obj_storage_t>& b : allocated_blocks) {
+      b.reset(new obj_storage_t(std::this_thread::get_id()));
+      srsran_assert(b.get() != nullptr, "Failed to instantiate fixed memory pool");
+      shared_mem_cache.push(static_cast<void*>(b.get()));
+    }
+  }
+
+public:
+  static size_t size() { return NofObjects; }
+
+  void* allocate_node(size_t sz)
+  {
+    srsran_assert(sz <= ObjSize, "Allocated node size=%zd exceeds max object size=%zd", sz, ObjSize);
+    memblock_cache* worker_cache = get_worker_cache();
+
+    void* node = worker_cache->try_pop();
+    if (node == nullptr) {
+      // fill the thread local cache enough for this and next allocations
+      std::array<void*, batch_steal_size> popped_blocks;
+      size_t                              n = shared_mem_cache.try_pop(popped_blocks);
+      for (size_t i = 0; i < n; ++i) {
+        new (popped_blocks[i]) obj_storage_t(std::this_thread::get_id());
+        worker_cache->push(static_cast<uint8_t*>(popped_blocks[i]));
+      }
+      node = worker_cache->try_pop();
+    }
+    return node;
+  }
+
+  void deallocate_node(void* p)
+  {
+    srsran_assert(p != nullptr, "Deallocated nodes must have valid address");
+    memblock_cache* worker_cache = get_worker_cache();
+    obj_storage_t*  block_ptr    = static_cast<obj_storage_t*>(p);
+
+    if (block_ptr->worker_id != std::this_thread::get_id() or worker_cache->size() >= MaxWorkerCacheSize) {
+      // if block was allocated in a different thread or local cache reached max capacity, send block to shared
+      // container
+      shared_mem_cache.push(static_cast<void*>(block_ptr));
+      return;
+    }
+
+    // push to local memory block cache
+    worker_cache->push(static_cast<uint8_t*>(p));
+  }
+
+private:
+  memblock_cache* get_worker_cache()
+  {
+    thread_local memblock_cache worker_cache;
+    return &worker_cache;
+  }
+
+  mutexed_memblock_cache                       shared_mem_cache;
+  std::mutex                                   mutex;
+  std::vector<std::unique_ptr<obj_storage_t> > allocated_blocks;
+};
+
+} // namespace srsran
+
+#endif // SRSRAN_FIXED_SIZE_POOL_H
--- a/lib/include/srsran/adt/pool/mem_pool.h
+++ b/lib/include/srsran/adt/pool/mem_pool.h
@ -13,6 +13,7 @@
 #ifndef SRSRAN_MEM_POOL_H
 #define SRSRAN_MEM_POOL_H

+#include "memblock_cache.h"
 #include "srsran/common/thread_pool.h"
 #include <cassert>
 #include <cstdint>
@ -21,121 +22,6 @@

 namespace srsran {

-/// Stores provided mem blocks in a stack in an non-owning manner. Not thread-safe
-class memblock_stack
-{
-  struct node {
-    node* prev;
-    explicit node(node* prev_) : prev(prev_) {}
-  };
-
-public:
-  constexpr static size_t min_memblock_size() { return sizeof(node); }
-
-  memblock_stack() = default;
-
-  memblock_stack(const memblock_stack&) = delete;
-
-  memblock_stack(memblock_stack&& other) noexcept : head(other.head) { other.head = nullptr; }
-
-  memblock_stack& operator=(const memblock_stack&) = delete;
-
-  memblock_stack& operator=(memblock_stack&& other) noexcept
-  {
-    head       = other.head;
-    other.head = nullptr;
-    return *this;
-  }
-
-  void push(uint8_t* block) noexcept
-  {
-    // printf("head: %ld\n", (long)head);
-    node* next = ::new (block) node(head);
-    head       = next;
-    count++;
-  }
-
-  uint8_t* try_pop() noexcept
-  {
-    if (is_empty()) {
-      return nullptr;
-    }
-    node* last_head = head;
-    head            = head->prev;
-    count--;
-    return (uint8_t*)last_head;
-  }
-
-  bool is_empty() const { return head == nullptr; }
-
-  size_t size() const { return count; }
-
-  void clear() { head = nullptr; }
-
-private:
-  node*  head  = nullptr;
-  size_t count = 0;
-};
-
-/// memblock stack that mutexes pushing/popping
-class mutexed_memblock_stack
-{
-public:
-  mutexed_memblock_stack() = default;
-
-  mutexed_memblock_stack(const mutexed_memblock_stack&) = delete;
-
-  mutexed_memblock_stack(mutexed_memblock_stack&& other) noexcept
-  {
-    std::unique_lock<std::mutex> lk1(other.mutex, std::defer_lock);
-    std::unique_lock<std::mutex> lk2(mutex, std::defer_lock);
-    std::lock(lk1, lk2);
-    stack = std::move(other.stack);
-  }
-
-  mutexed_memblock_stack& operator=(const mutexed_memblock_stack&) = delete;
-
-  mutexed_memblock_stack& operator=(mutexed_memblock_stack&& other) noexcept
-  {
-    std::unique_lock<std::mutex> lk1(other.mutex, std::defer_lock);
-    std::unique_lock<std::mutex> lk2(mutex, std::defer_lock);
-    std::lock(lk1, lk2);
-    stack = std::move(other.stack);
-    return *this;
-  }
-
-  void push(uint8_t* block) noexcept
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-    stack.push(block);
-  }
-
-  uint8_t* try_pop() noexcept
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-    uint8_t*                    block = stack.try_pop();
-    return block;
-  }
-
-  bool is_empty() const noexcept { return stack.is_empty(); }
-
-  size_t size() const noexcept
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-    return stack.size();
-  }
-
-  void clear()
-  {
-    std::lock_guard<std::mutex> lock(mutex);
-    stack.clear();
-  }
-
-private:
-  memblock_stack     stack;
-  mutable std::mutex mutex;
-};
-
 /**
 * Pool specialized for big objects. Created objects are not contiguous in memory.
 * Relevant methods:
@ -149,7 +35,7 @@ template <typename T, bool ThreadSafe = false>
 class big_obj_pool
 {
  // memory stack type derivation (thread safe or not)
-  using stack_type = typename std::conditional<ThreadSafe, mutexed_memblock_stack, memblock_stack>::type;
+  using stack_type = typename std::conditional<ThreadSafe, mutexed_memblock_cache, memblock_cache>::type;

  // memory stack to cache allocate memory chunks
  stack_type stack;
@ -161,7 +47,7 @@ public:
  void* allocate_node(size_t sz)
  {
    assert(sz == sizeof(T));
-    static const size_t blocksize = std::max(sizeof(T), memblock_stack::min_memblock_size());
+    static const size_t blocksize = std::max(sizeof(T), memblock_cache::min_memblock_size());
    uint8_t*            block     = stack.try_pop();
    if (block == nullptr) {
      block = new uint8_t[blocksize];
@ -179,7 +65,7 @@ public:
  /// Pre-reserve N memory chunks for future object allocations
  void reserve(size_t N)
  {
-    static const size_t blocksize = std::max(sizeof(T), memblock_stack::min_memblock_size());
+    static const size_t blocksize = std::max(sizeof(T), memblock_cache::min_memblock_size());
    for (size_t i = 0; i < N; ++i) {
      stack.push(new uint8_t[blocksize]);
    }
@ -284,7 +170,7 @@ private:

  // memory stack to cache allocate memory chunks
  std::mutex                                 mutex;
-  memblock_stack                             obj_cache;
+  memblock_cache                             obj_cache;
  std::vector<std::unique_ptr<batch_obj_t> > batches;
 };

--- a/lib/include/srsran/adt/pool/memblock_cache.h
+++ b/lib/include/srsran/adt/pool/memblock_cache.h
@ -0,0 +1,151 @@
+/**
+ *
+ * \section COPYRIGHT
+ *
+ * Copyright 2013-2021 Software Radio Systems Limited
+ *
+ * By using this file, you agree to the terms and conditions set
+ * forth in the LICENSE file which can be found at the top level of
+ * the distribution.
+ *
+ */
+
+#ifndef SRSRAN_MEMBLOCK_CACHE_H
+#define SRSRAN_MEMBLOCK_CACHE_H
+
+#include <mutex>
+
+namespace srsran {
+
+/// Stores provided mem blocks in a stack in an non-owning manner. Not thread-safe
+class memblock_cache
+{
+  struct node {
+    node* prev;
+    explicit node(node* prev_) : prev(prev_) {}
+  };
+
+public:
+  constexpr static size_t min_memblock_size() { return sizeof(node); }
+
+  memblock_cache() = default;
+
+  memblock_cache(const memblock_cache&) = delete;
+
+  memblock_cache(memblock_cache&& other) noexcept : head(other.head) { other.head = nullptr; }
+
+  memblock_cache& operator=(const memblock_cache&) = delete;
+
+  memblock_cache& operator=(memblock_cache&& other) noexcept
+  {
+    head       = other.head;
+    other.head = nullptr;
+    return *this;
+  }
+
+  void push(void* block) noexcept
+  {
+    // printf("head: %ld\n", (long)head);
+    node* next = ::new (block) node(head);
+    head       = next;
+    count++;
+  }
+
+  uint8_t* try_pop() noexcept
+  {
+    if (is_empty()) {
+      return nullptr;
+    }
+    node* last_head = head;
+    head            = head->prev;
+    count--;
+    return (uint8_t*)last_head;
+  }
+
+  bool is_empty() const { return head == nullptr; }
+
+  size_t size() const { return count; }
+
+  void clear() { head = nullptr; }
+
+private:
+  node*  head  = nullptr;
+  size_t count = 0;
+};
+
+/// memblock stack that mutexes pushing/popping
+class mutexed_memblock_cache
+{
+public:
+  mutexed_memblock_cache() = default;
+
+  mutexed_memblock_cache(const mutexed_memblock_cache&) = delete;
+
+  mutexed_memblock_cache(mutexed_memblock_cache&& other) noexcept
+  {
+    std::unique_lock<std::mutex> lk1(other.mutex, std::defer_lock);
+    std::unique_lock<std::mutex> lk2(mutex, std::defer_lock);
+    std::lock(lk1, lk2);
+    stack = std::move(other.stack);
+  }
+
+  mutexed_memblock_cache& operator=(const mutexed_memblock_cache&) = delete;
+
+  mutexed_memblock_cache& operator=(mutexed_memblock_cache&& other) noexcept
+  {
+    std::unique_lock<std::mutex> lk1(other.mutex, std::defer_lock);
+    std::unique_lock<std::mutex> lk2(mutex, std::defer_lock);
+    std::lock(lk1, lk2);
+    stack = std::move(other.stack);
+    return *this;
+  }
+
+  void push(void* block) noexcept
+  {
+    std::lock_guard<std::mutex> lock(mutex);
+    stack.push(block);
+  }
+
+  uint8_t* try_pop() noexcept
+  {
+    std::lock_guard<std::mutex> lock(mutex);
+    uint8_t*                    block = stack.try_pop();
+    return block;
+  }
+
+  template <size_t N>
+  size_t try_pop(std::array<void*, N>& result) noexcept
+  {
+    std::lock_guard<std::mutex> lock(mutex);
+    size_t                      i = 0;
+    for (; i < N; ++i) {
+      result[i] = stack.try_pop();
+      if (result[i] == nullptr) {
+        break;
+      }
+    }
+    return i;
+  }
+
+  bool is_empty() const noexcept { return stack.is_empty(); }
+
+  size_t size() const noexcept
+  {
+    std::lock_guard<std::mutex> lock(mutex);
+    return stack.size();
+  }
+
+  void clear()
+  {
+    std::lock_guard<std::mutex> lock(mutex);
+    stack.clear();
+  }
+
+private:
+  memblock_cache     stack;
+  mutable std::mutex mutex;
+};
+
+} // namespace srsran
+
+#endif // SRSRAN_MEMBLOCK_CACHE_H
--- a/lib/include/srsran/common/singleton.h
+++ b/lib/include/srsran/common/singleton.h
--- a/lib/test/adt/mem_pool_test.cc
+++ b/lib/test/adt/mem_pool_test.cc
@ -10,7 +10,8 @@
 *
 */

-#include "srsran/adt/mem_pool.h"
+#include "srsran/adt/pool/fixed_size_pool.h"
+#include "srsran/adt/pool/mem_pool.h"
 #include "srsran/common/test_common.h"

 class C
@ -75,9 +76,68 @@ int test_nontrivial_obj_pool()
  return SRSRAN_SUCCESS;
 }

+struct BigObj {
+  C                        c;
+  std::array<uint8_t, 500> space;
+
+  using pool_t = srsran::concurrent_fixed_memory_pool<1024, 512>;
+
+  void* operator new(size_t sz)
+  {
+    srsran_assert(sz == sizeof(BigObj), "Allocated node size and object size do not match");
+    return pool_t::get_instance()->allocate_node(sizeof(BigObj));
+  }
+  void* operator new(size_t sz, const std::nothrow_t& nothrow_value) noexcept
+  {
+    srsran_assert(sz == sizeof(BigObj), "Allocated node size and object size do not match");
+    return pool_t::get_instance()->allocate_node(sizeof(BigObj));
+  }
+  void operator delete(void* ptr) { pool_t::get_instance()->deallocate_node(ptr); }
+};
+
+void test_fixedsize_pool()
+{
+  {
+    std::vector<std::unique_ptr<BigObj> > vec(BigObj::pool_t::size());
+    for (size_t i = 0; i < BigObj::pool_t::size(); ++i) {
+      vec[i].reset(new BigObj());
+      TESTASSERT(vec[i].get() != nullptr);
+    }
+    std::unique_ptr<BigObj> obj(new (std::nothrow) BigObj());
+    TESTASSERT(obj == nullptr);
+    vec.clear();
+    obj = std::unique_ptr<BigObj>(new (std::nothrow) BigObj());
+    TESTASSERT(obj != nullptr);
+    obj.reset();
+  }
+
+  // TEST: one thread allocates, and the other deallocates
+  {
+    std::unique_ptr<BigObj>                              obj;
+    std::atomic<bool>                                    stop(false);
+    srsran::dyn_blocking_queue<std::unique_ptr<BigObj> > queue(BigObj::pool_t::size() / 2);
+    std::thread                                          t([&queue, &stop]() {
+      while (not stop.load(std::memory_order_relaxed)) {
+        std::unique_ptr<BigObj> obj(new (std::nothrow) BigObj());
+        TESTASSERT(obj != nullptr);
+        queue.push_blocking(std::move(obj));
+      }
+    });
+
+    for (size_t i = 0; i < BigObj::pool_t::size() * 8; ++i) {
+      obj = queue.pop_blocking();
+      TESTASSERT(obj != nullptr);
+    }
+    stop.store(true);
+    t.join();
+  }
+}
+
 int main()
 {
  TESTASSERT(test_nontrivial_obj_pool() == SRSRAN_SUCCESS);
+  test_fixedsize_pool();
+
  srsran::console("Success\n");
  return 0;
 }
--- a/srsenb/hdr/stack/rrc/rrc_ue.h
+++ b/srsenb/hdr/stack/rrc/rrc_ue.h
@ -15,7 +15,7 @@

 #include "mac_controller.h"
 #include "rrc.h"
-#include "srsran/adt/mem_pool.h"
+#include "srsran/adt/pool/mem_pool.h"
 #include "srsran/interfaces/enb_phy_interfaces.h"
 #include "srsran/interfaces/pdcp_interface_types.h"

--- a/srsenb/src/stack/rrc/rrc_ue.cc
+++ b/srsenb/src/stack/rrc/rrc_ue.cc
@ -14,7 +14,7 @@
 #include "srsenb/hdr/stack/rrc/mac_controller.h"
 #include "srsenb/hdr/stack/rrc/rrc_mobility.h"
 #include "srsenb/hdr/stack/rrc/ue_rr_cfg.h"
-#include "srsran/adt/mem_pool.h"
+#include "srsran/adt/pool/mem_pool.h"
 #include "srsran/asn1/rrc_utils.h"
 #include "srsran/common/enb_events.h"
 #include "srsran/common/int_helpers.h"