From 6c5e28bc195a9a4f540e66589794a8306db2927d Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Wed, 14 Apr 2021 19:43:29 +0200 Subject: [PATCH] Optimised AVX512 LDPC decoder hard decision --- lib/src/phy/fec/ldpc/ldpc_dec_c_avx512.c | 4 +--- lib/src/phy/fec/ldpc/ldpc_dec_c_avx512long.c | 4 +--- lib/src/phy/fec/utils_avx512.h | 17 +++++++++++++++++ 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/lib/src/phy/fec/ldpc/ldpc_dec_c_avx512.c b/lib/src/phy/fec/ldpc/ldpc_dec_c_avx512.c index 9868f7112..4bf8dedfe 100644 --- a/lib/src/phy/fec/ldpc/ldpc_dec_c_avx512.c +++ b/lib/src/phy/fec/ldpc/ldpc_dec_c_avx512.c @@ -232,9 +232,7 @@ int extract_ldpc_message_c_avx512(void* p, uint8_t* message, uint16_t liftK) int ini = 0; for (int i = 0; i < liftK; i = i + vp->ls) { - for (int k = 0; k < vp->ls; k++) { - message[i + k] = (vp->soft_bits.c[ini + k] < 0); - } + fec_avx512_hard_decision_c(&vp->soft_bits.c[ini], &message[i], vp->ls); ini = ini + SRSRAN_AVX512_B_SIZE; } diff --git a/lib/src/phy/fec/ldpc/ldpc_dec_c_avx512long.c b/lib/src/phy/fec/ldpc/ldpc_dec_c_avx512long.c index 72c247879..8c71d9cdb 100644 --- a/lib/src/phy/fec/ldpc/ldpc_dec_c_avx512long.c +++ b/lib/src/phy/fec/ldpc/ldpc_dec_c_avx512long.c @@ -292,9 +292,7 @@ int extract_ldpc_message_c_avx512long(void* p, uint8_t* message, uint16_t liftK) int ini = 0; for (int i = 0; i < liftK; i = i + vp->ls) { - for (int k = 0; k < vp->ls; k++) { - message[i + k] = (vp->soft_bits->c[ini + k] < 0); - } + fec_avx512_hard_decision_c(&vp->soft_bits->c[ini], &message[i], vp->ls); ini = ini + vp->node_size; } diff --git a/lib/src/phy/fec/utils_avx512.h b/lib/src/phy/fec/utils_avx512.h index 5b47d14be..e4173dc62 100644 --- a/lib/src/phy/fec/utils_avx512.h +++ b/lib/src/phy/fec/utils_avx512.h @@ -26,4 +26,21 @@ #define SRSRAN_AVX512_B_SIZE 64 /*!< \brief Number of packed bytes in an AVX512 instruction. */ #define SRSRAN_AVX512_B_SIZE_LOG 6 /*!< \brief \f$\log_2\f$ of \ref SRSRAN_AVX512_B_SIZE. */ +#ifdef LV_HAVE_AVX512 + +#include + +static inline void fec_avx512_hard_decision_c(const int8_t* llr, uint8_t* message, int nof_llr) +{ + int k = 0; + for (; k < nof_llr - (SRSRAN_AVX512_B_SIZE - 1); k += SRSRAN_AVX512_B_SIZE) { + __mmask64 mask = _mm512_cmpge_epi8_mask(_mm512_load_si512((__m512i*)&llr[k]), _mm512_set1_epi8(0)); + _mm512_storeu_si512((__m512i*)&message[k], _mm512_mask_blend_epi8(mask, _mm512_set1_epi8(1), _mm512_set1_epi8(0))); + } + for (; k < nof_llr; k++) { + message[k] = (llr[k] < 0); + } +} +#endif // LV_HAVE_AVX512 + #endif // SRSRAN_UTILS_AVX512_H