From 86750b2db73516e3bb6107594005c4b686135176 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Tue, 17 Jan 2017 11:31:03 +0100 Subject: [PATCH 01/55] removed volk dependency. Checked and working --- cmake/modules/FindVolk.cmake | 144 ------------------ srslte/CMakeLists.txt | 19 --- srslte/include/srslte/utils/vector_simd.h | 20 +++ srslte/lib/CMakeLists.txt | 7 - srslte/lib/utils/vector.c | 167 ++++----------------- srslte/lib/utils/vector_simd.c | 170 ++++++++++++++++++++++ 6 files changed, 222 insertions(+), 305 deletions(-) delete mode 100644 cmake/modules/FindVolk.cmake diff --git a/cmake/modules/FindVolk.cmake b/cmake/modules/FindVolk.cmake deleted file mode 100644 index 5dbe17cd5..000000000 --- a/cmake/modules/FindVolk.cmake +++ /dev/null @@ -1,144 +0,0 @@ -INCLUDE(FindPkgConfig) -PKG_CHECK_MODULES(PC_VOLK volk QUIET) - -FIND_PATH( - VOLK_INCLUDE_DIRS - NAMES volk/volk.h - HINTS $ENV{VOLK_DIR}/include - ${CMAKE_INSTALL_PREFIX}/include - ${PC_VOLK_INCLUDE_DIR} - PATHS /usr/local/include - /usr/include -) - -FIND_LIBRARY( - VOLK_LIBRARIES - NAMES volk - HINTS $ENV{VOLK_DIR}/lib - ${CMAKE_INSTALL_PREFIX}/lib - ${CMAKE_INSTALL_PREFIX}/lib64 - ${PC_VOLK_LIBDIR} - PATHS /usr/local/lib - /usr/local/lib64 - /usr/lib - /usr/lib64 -) - -INCLUDE(FindPackageHandleStandardArgs) -FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS) -MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS) - -IF(VOLK_FOUND) - SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m) - CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32fc_multiply_32fc HAVE_VOLK_MULT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_conjugate_32fc HAVE_VOLK_CONJ_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_32fc HAVE_VOLK_MULT2_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_conjugate_32fc HAVE_VOLK_MULT2_CONJ_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_multiply_32fc HAVE_VOLK_MULT_REAL_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_multiply_32f HAVE_VOLK_MULT_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_32f HAVE_VOLK_MAG_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_squared_32f HAVE_VOLK_MAG_SQUARE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_divide_32f HAVE_VOLK_DIVIDE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_dot_prod_32fc HAVE_VOLK_DOTPROD_FC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_dot_prod_32fc HAVE_VOLK_DOTPROD_CFC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_conjugate_dot_prod_32fc HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_dot_prod_32f HAVE_VOLK_DOTPROD_F_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32f_atan2_32f HAVE_VOLK_ATAN_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_convert_16i HAVE_VOLK_CONVERT_FI_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_32f_x2 HAVE_VOLK_DEINTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_interleave_32fc HAVE_VOLK_INTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_add_32f HAVE_VOLK_ADD_FLOAT_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_multiply_32f HAVE_VOLK_MULT_REAL2_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_16i_max_star_16i HAVE_VOLK_MAX_STAR_S_FUNCTION) - CHECK_FUNCTION_EXISTS_MATH(volk_8i_convert_16i HAVE_VOLK_CONVERT_CI_FUNCTION) - - - - SET(VOLK_DEFINITIONS "HAVE_VOLK") - IF(${HAVE_VOLK_CONVERT_CI_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_CI_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_STAR_S_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_STAR_S_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_ABS_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_VEC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_VEC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAG_SQUARE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_SQUARE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_SQUARE_DIST_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SQUARE_DIST_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_INTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_INTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_SUB_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SUB_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ADD_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ADD_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT2_CONJ_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_CONJ_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONVERT_FI_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_FI_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAX_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ACC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ACC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_CONJ_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONJ_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT2_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_FLOAT_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FLOAT_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MULT_REAL_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_MAG_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DIVIDE_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DIVIDE_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_FC_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_FC_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_DOTPROD_F_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_F_FUNCTION") - ENDIF() - IF(${HAVE_VOLK_ATAN_FUNCTION}) - SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ATAN_FUNCTION") - ENDIF() -ENDIF(VOLK_FOUND) diff --git a/srslte/CMakeLists.txt b/srslte/CMakeLists.txt index 0ecf9ddf6..82daf4df2 100644 --- a/srslte/CMakeLists.txt +++ b/srslte/CMakeLists.txt @@ -74,25 +74,6 @@ else(BLADERF_FOUND OR UHD_FOUND) add_definitions(-DDISABLE_RF) endif(BLADERF_FOUND OR UHD_FOUND) -include(CheckFunctionExistsMath) -if(${DISABLE_VOLK}) - if(${DISABLE_VOLK} EQUAL 0) - find_package(Volk) - else(${DISABLE_VOLK} EQUAL 0) - message(STATUS "VOLK library disabled (DISABLE_VOLK=1)") - endif(${DISABLE_VOLK} EQUAL 0) -else(${DISABLE_VOLK}) - find_package(Volk) -endif(${DISABLE_VOLK}) - -if(VOLK_FOUND) - include_directories(${VOLK_INCLUDE_DIRS}) - link_directories(${VOLK_LIBRARY_DIRS}) - message(STATUS " Compiling with VOLK SIMD library.") -else(VOLK_FOUND) - message(STATUS " VOLK SIMD library NOT found. Using generic implementation.") -endif(VOLK_FOUND) - ######################################################################## # Add subdirectories ######################################################################## diff --git a/srslte/include/srslte/utils/vector_simd.h b/srslte/include/srslte/utils/vector_simd.h index cd6eb4d28..cfdef5ecd 100644 --- a/srslte/include/srslte/utils/vector_simd.h +++ b/srslte/include/srslte/utils/vector_simd.h @@ -49,6 +49,26 @@ SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); + +SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len); + +SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); + +SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); + +SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); + +SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); + +SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len); + +SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len); + +SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len); + +SRSLTE_API void srslte_vec_convert_if_simd(int16_t *x, float *z, float scale, uint32_t len); + #ifdef __cplusplus } #endif diff --git a/srslte/lib/CMakeLists.txt b/srslte/lib/CMakeLists.txt index 3a73d8761..1e7b11df6 100644 --- a/srslte/lib/CMakeLists.txt +++ b/srslte/lib/CMakeLists.txt @@ -90,13 +90,6 @@ if(RF_FOUND) endif(BLADERF_FOUND) endif(RF_FOUND) -if(VOLK_FOUND) - target_link_libraries(srslte ${VOLK_LIBRARIES}) - if(NOT DisableMEX) - target_link_libraries(srslte_static ${VOLK_LIBRARIES}) - endif(NOT DisableMEX) -endif(VOLK_FOUND) - INSTALL(TARGETS srslte DESTINATION ${LIBRARY_DIR}) SRSLTE_SET_PIC(srslte) diff --git a/srslte/lib/utils/vector.c b/srslte/lib/utils/vector.c index 469320177..578a644c2 100644 --- a/srslte/lib/utils/vector.c +++ b/srslte/lib/utils/vector.c @@ -35,10 +35,6 @@ #include "srslte/utils/vector_simd.h" #include "srslte/utils/bit.h" -#ifdef HAVE_VOLK -#include "volk/volk.h" -#endif - int srslte_vec_acc_ii(int *x, uint32_t len) { int i; int z=0; @@ -48,19 +44,14 @@ int srslte_vec_acc_ii(int *x, uint32_t len) { return z; } +// Used in PRACH detector float srslte_vec_acc_ff(float *x, uint32_t len) { -#ifdef HAVE_VOLK_ACC_FUNCTION - float result; - volk_32f_accumulator_s32f(&result,x,len); - return result; -#else int i; float z=0; for (i=0;i y[i]) { @@ -752,17 +656,11 @@ void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) { z[i] = y[i]; } } -#endif } +// CP autocorr uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) { -#ifdef HAVE_VOLK_MAX_ABS_FUNCTION - uint32_t target=0; - volk_32fc_index_max_16u(&target,x,len); - return target; - -#else uint32_t i; float m=-FLT_MAX; uint32_t p=0; @@ -775,7 +673,6 @@ uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) { } } return p; -#endif } void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) { diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index 1612f2c07..01a3d4c64 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -280,3 +280,173 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len) } #endif } + + +// for enb no-volk +void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) { +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int points = len / 4; + + const float* xPtr = (const float*) x; + const float* yPtr = (const float*) y; + float* zPtr = (float*) z; + + __m128 xVal, yVal, zVal; + for(;number < points; number++){ + + xVal = _mm_load_ps(xPtr); + yVal = _mm_load_ps(yPtr); + + zVal = _mm_add_ps(xVal, yVal); + + _mm_store_ps(zPtr, zVal); + + xPtr += 4; + yPtr += 4; + zPtr += 4; + } + + number = points * 4; + for(;number < len; number++){ + z[number] = x[number] + y[number]; + } +#endif +} + +static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) { + __m128 yl, yh, tmp1, tmp2; + yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr + yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di + tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr + x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br + tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di + return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di +} + +void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) +{ +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int halfPoints = len / 2; + + __m128 xVal, yVal, zVal; + float* zPtr = (float*) z; + const float* xPtr = (const float*) x; + const float* yPtr = (const float*) y; + + for(; number < halfPoints; number++){ + xVal = _mm_load_ps(xPtr); + yVal = _mm_load_ps(yPtr); + zVal = _mm_complexmul_ps(xVal, yVal); + _mm_store_ps(zPtr, zVal); + + xPtr += 4; + yPtr += 4; + zPtr += 4; + } + + if((len % 2) != 0){ + *zPtr = (*xPtr) * (*yPtr); + } +#endif +} + +static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) { + const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); + y = _mm_xor_ps(y, conjugator); + return _mm_complexmul_ps(x, y); +} + +void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) { +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int halfPoints = len / 2; + + __m128 xVal, yVal, zVal; + float* zPtr = (float*) z; + const float* xPtr = (const float*) x; + const float* yPtr = (const float*) y; + + for(; number < halfPoints; number++){ + xVal = _mm_load_ps(xPtr); + yVal = _mm_load_ps(yPtr); + zVal = _mm_complexmulconj_ps(xVal, yVal); + _mm_store_ps(zPtr, zVal); + + xPtr += 4; + yPtr += 4; + zPtr += 4; + } + + if((len % 2) != 0){ + *zPtr = (*xPtr) * (*yPtr); + } +#endif +} + +void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) { +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int halfPoints = len / 2; + + __m128 xVal, yl, yh, zVal, tmp1, tmp2; + float* zPtr = (float*) z; + const float* xPtr = (const float*) x; + + // Set up constant scalar vector + yl = _mm_set_ps1(creal(h)); + yh = _mm_set_ps1(cimag(h)); + + for(;number < halfPoints; number++){ + + xVal = _mm_load_ps(xPtr); + tmp1 = _mm_mul_ps(xVal,yl); + xVal = _mm_shuffle_ps(xVal,xVal,0xB1); + tmp2 = _mm_mul_ps(xVal,yh); + zVal = _mm_addsub_ps(tmp1,tmp2); + _mm_storeu_ps(zPtr,zVal); + + xPtr += 4; + zPtr += 4; + } + + if((len % 2) != 0) { + *zPtr = (*xPtr) * h; + } +#endif +} + +void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) { +#ifdef LV_HAVE_SSE + unsigned int number = 0; + const unsigned int quarterPoints = len / 4; + + const float* xPtr = (const float*) x; + float* zPtr = z; + + __m128 xVal1, xVal2, zVal; + for(; number < quarterPoints; number++){ + xVal1 = _mm_load_ps(xPtr); + xPtr += 4; + xVal2 = _mm_load_ps(xPtr); + xPtr += 4; + xVal1 = _mm_mul_ps(xVal1, xVal1); + xVal2 = _mm_mul_ps(xVal2, xVal2); + zVal = _mm_hadd_ps(xVal1, xVal2); + _mm_store_ps(zPtr, zVal); + zPtr += 4; + } + + number = quarterPoints * 4; + for(; number < len; number++){ + float val1Real = *xPtr++; + float val1Imag = *xPtr++; + *zPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag); + } +#endif +} + + + + From 40c161c2e6f74ed62765d7a6767f8d4a98295a8f Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 20 Jan 2017 11:50:16 +0100 Subject: [PATCH 02/55] ifdef for simd functions --- srslte/lib/utils/vector_simd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index 01a3d4c64..c150209ca 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -314,6 +314,7 @@ void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) { #endif } +#ifdef LV_HAVE_SSE static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) { __m128 yl, yh, tmp1, tmp2; yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr @@ -323,6 +324,7 @@ static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) { tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di } +#endif void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) { @@ -352,11 +354,13 @@ void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) #endif } +#ifdef LV_HAVE_SSE static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) { const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); y = _mm_xor_ps(y, conjugator); return _mm_complexmul_ps(x, y); } +#endif void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) { #ifdef LV_HAVE_SSE From 949d4b8df84a1f7895c03375c50ce6ed77eac155 Mon Sep 17 00:00:00 2001 From: Andre Puschmann Date: Wed, 25 Jan 2017 10:18:27 +0100 Subject: [PATCH 03/55] novolk: use unaligned load/store SSE intrinsics, allow debug builds --- srslte/lib/fec/rm_turbo.c | 13 +++++++++---- srslte/lib/utils/vector_simd.c | 18 ++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/srslte/lib/fec/rm_turbo.c b/srslte/lib/fec/rm_turbo.c index 5856a4e46..751c9fe0c 100644 --- a/srslte/lib/fec/rm_turbo.c +++ b/srslte/lib/fec/rm_turbo.c @@ -327,8 +327,11 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - int16_t x = (int16_t) _mm_extract_epi16(xVal, j); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); + // For -O0 builds: shuffle j-th element to pos 0 and extract from there + _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); + int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); + _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); output[l] += x; } xPtr ++; @@ -346,8 +349,10 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - int16_t x = (int16_t) _mm_extract_epi16(xVal, j); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); + _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); + int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); + _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); output[l] += x; } xPtr++; diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index c150209ca..8d91c5f42 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -227,8 +227,10 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l lutVal = _mm_load_si128(lutPtr); for (int i=0;i<8;i++) { - int16_t x = (int16_t) _mm_extract_epi16(xVal, i); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, i); + _mm_shuffle_epi8(xVal,_mm_set1_epi8(i)); + int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); + _mm_shuffle_epi8(lutVal,_mm_set1_epi8(i)); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); y[l] = x; } xPtr ++; @@ -295,12 +297,12 @@ void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) { __m128 xVal, yVal, zVal; for(;number < points; number++){ - xVal = _mm_load_ps(xPtr); - yVal = _mm_load_ps(yPtr); + xVal = _mm_loadu_ps(xPtr); + yVal = _mm_loadu_ps(yPtr); zVal = _mm_add_ps(xVal, yVal); - _mm_store_ps(zPtr, zVal); + _mm_storeu_ps(zPtr, zVal); xPtr += 4; yPtr += 4; @@ -338,10 +340,10 @@ void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) const float* yPtr = (const float*) y; for(; number < halfPoints; number++){ - xVal = _mm_load_ps(xPtr); - yVal = _mm_load_ps(yPtr); + xVal = _mm_loadu_ps(xPtr); + yVal = _mm_loadu_ps(yPtr); zVal = _mm_complexmul_ps(xVal, yVal); - _mm_store_ps(zPtr, zVal); + _mm_storeu_ps(zPtr, zVal); xPtr += 4; yPtr += 4; From 78de0c718b9f3059d7e47c8c2ce9b661fcdded8e Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 25 Jan 2017 15:20:47 +0100 Subject: [PATCH 04/55] fixed alignment problem in vec_abs_simd --- srslte/lib/utils/vector_simd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index c150209ca..25c1d2b7d 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -431,14 +431,14 @@ void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) { __m128 xVal1, xVal2, zVal; for(; number < quarterPoints; number++){ - xVal1 = _mm_load_ps(xPtr); + xVal1 = _mm_loadu_ps(xPtr); xPtr += 4; - xVal2 = _mm_load_ps(xPtr); + xVal2 = _mm_loadu_ps(xPtr); xPtr += 4; xVal1 = _mm_mul_ps(xVal1, xVal1); xVal2 = _mm_mul_ps(xVal2, xVal2); zVal = _mm_hadd_ps(xVal1, xVal2); - _mm_store_ps(zPtr, zVal); + _mm_storeu_ps(zPtr, zVal); zPtr += 4; } From f2a35c6dd1ec58e3db91029b50e7252ec441e6d2 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 25 Jan 2017 17:30:16 +0100 Subject: [PATCH 05/55] fixed tests with new simd functions --- CMakeLists.txt | 2 - srslte/include/srslte/utils/vector_simd.h | 2 + srslte/lib/utils/vector.c | 11 +-- srslte/lib/utils/vector_simd.c | 109 ++++++++++++++-------- 4 files changed, 77 insertions(+), 47 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 206139295..ae8e9c532 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -115,9 +115,7 @@ if(CMAKE_COMPILER_IS_GNUCC) if (HAVE_AVX2) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx2 -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") else (HAVE_AVX2) - message("NOT HAVE AVX2") if(HAVE_AVX) - message("HAVE AVX") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") elseif(HAVE_SSE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -msse4.1 -Ofast -funroll-loops -DLV_HAVE_SSE") diff --git a/srslte/include/srslte/utils/vector_simd.h b/srslte/include/srslte/utils/vector_simd.h index cfdef5ecd..81aed443f 100644 --- a/srslte/include/srslte/utils/vector_simd.h +++ b/srslte/include/srslte/utils/vector_simd.h @@ -51,6 +51,8 @@ SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, ui SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); + SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); diff --git a/srslte/lib/utils/vector.c b/srslte/lib/utils/vector.c index 578a644c2..52e8b54d0 100644 --- a/srslte/lib/utils/vector.c +++ b/srslte/lib/utils/vector.c @@ -200,20 +200,19 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) { } // Used throughout -void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) { -#ifndef LV_HAVE_SSE +void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) { + #ifndef LV_HAVE_SSE int i; for (i=0;i Date: Wed, 25 Jan 2017 17:38:22 +0100 Subject: [PATCH 06/55] restored rm_turbo (test not passing) --- srslte/lib/fec/rm_turbo.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/srslte/lib/fec/rm_turbo.c b/srslte/lib/fec/rm_turbo.c index 751c9fe0c..31cbd82ca 100644 --- a/srslte/lib/fec/rm_turbo.c +++ b/srslte/lib/fec/rm_turbo.c @@ -327,11 +327,8 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - // For -O0 builds: shuffle j-th element to pos 0 and extract from there - _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); - int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); - _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); + int16_t x = (int16_t) _mm_extract_epi16(xVal, j); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); output[l] += x; } xPtr ++; @@ -349,10 +346,8 @@ int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, lutVal = _mm_loadu_si128(lutPtr); for (int j=0;j<8;j++) { - _mm_shuffle_epi8(xVal,_mm_set1_epi8(j)); - int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); - _mm_shuffle_epi8(lutVal,_mm_set1_epi8(j)); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); + int16_t x = (int16_t) _mm_extract_epi16(xVal, j); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j); output[l] += x; } xPtr++; @@ -718,4 +713,3 @@ int srslte_rm_turbo_rx(float *w_buff, uint32_t w_buff_len, float *input, uint32_ return 0; } - From 2758ba4118bc9764552a5ea59c8eebe49d3f9c6a Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 25 Jan 2017 17:40:28 +0100 Subject: [PATCH 07/55] fixed lut in vector simd (now all tests passing) --- srslte/lib/utils/vector_simd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/srslte/lib/utils/vector_simd.c b/srslte/lib/utils/vector_simd.c index ba30e190f..16bbbc0bb 100644 --- a/srslte/lib/utils/vector_simd.c +++ b/srslte/lib/utils/vector_simd.c @@ -227,10 +227,8 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l lutVal = _mm_loadu_si128(lutPtr); for (int i=0;i<8;i++) { - _mm_shuffle_epi8(xVal,_mm_set1_epi8(i)); - int16_t x = (int16_t) _mm_extract_epi16(xVal, 0); - _mm_shuffle_epi8(lutVal,_mm_set1_epi8(i)); - uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, 0); + int16_t x = (int16_t) _mm_extract_epi16(xVal, i); + uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, i); y[l] = x; } xPtr ++; From 9acb1002e96c6e5ed655bc885749030b75463132 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 26 Jan 2017 17:48:32 +0100 Subject: [PATCH 08/55] added more functions to simd for UE --- srslte/include/srslte/utils/vector_simd.h | 10 +- srslte/lib/utils/vector.c | 33 ++++- srslte/lib/utils/vector_simd.c | 164 +++++++++++++++++++++- 3 files changed, 193 insertions(+), 14 deletions(-) diff --git a/srslte/include/srslte/utils/vector_simd.h b/srslte/include/srslte/utils/vector_simd.h index 81aed443f..8380a75de 100644 --- a/srslte/include/srslte/utils/vector_simd.h +++ b/srslte/include/srslte/utils/vector_simd.h @@ -53,11 +53,15 @@ SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, uint32_t len); + SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); -SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); + +SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *h, float *z, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); @@ -65,6 +69,10 @@ SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len); +SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); + +SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); + SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len); SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len); diff --git a/srslte/lib/utils/vector.c b/srslte/lib/utils/vector.c index 52e8b54d0..10fe38165 100644 --- a/srslte/lib/utils/vector.c +++ b/srslte/lib/utils/vector.c @@ -44,7 +44,7 @@ int srslte_vec_acc_ii(int *x, uint32_t len) { return z; } -// Used in PRACH detector +// Used in PRACH detector, AGC and chest_dl for noise averaging float srslte_vec_acc_ff(float *x, uint32_t len) { int i; float z=0; @@ -79,10 +79,14 @@ void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t } void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) { +#ifndef LV_HAVE_SSE int i; for (i=0;i Date: Fri, 27 Jan 2017 11:56:59 +0000 Subject: [PATCH 09/55] Added GCC_ARCH option to manually set -march in GCC --- CMakeLists.txt | 15 ++++++++------- cmake/modules/FindSSE.cmake | 8 ++++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ae8e9c532..d36de57aa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,6 +54,7 @@ configure_file( option(DisableMEX "DisableMEX" ON) option(StaticMKL "StaticMKL" OFF) option(DisableBladeRF "DisableBladeRF" OFF) +set(GCC_ARCH native CACHE STRING "GCC compile for specific architecture.") ######################################################################## # Install Dirs @@ -101,24 +102,24 @@ endif(CMAKE_COMPILER_IS_GNUCXX) if(CMAKE_COMPILER_IS_GNUCC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE -g") + find_package(SSE) + if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") - find_package(SSE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O0") if(HAVE_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx -DLV_HAVE_AVX -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -mavx -DLV_HAVE_AVX -DLV_HAVE_SSE") elseif(HAVE_SSE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -msse4.1 -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -msse4.1 -DLV_HAVE_SSE") endif(HAVE_AVX) else(${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3") - find_package(SSE) if (HAVE_AVX2) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx2 -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -mavx2 -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") else (HAVE_AVX2) if(HAVE_AVX) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -mavx -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -mavx -Ofast -funroll-loops -DLV_HAVE_AVX -DLV_HAVE_SSE") elseif(HAVE_SSE) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=native -mfpmath=sse -msse4.1 -Ofast -funroll-loops -DLV_HAVE_SSE") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=${GCC_ARCH} -mfpmath=sse -msse4.1 -Ofast -funroll-loops -DLV_HAVE_SSE") endif(HAVE_AVX) endif (HAVE_AVX2) diff --git a/cmake/modules/FindSSE.cmake b/cmake/modules/FindSSE.cmake index 7b258f70f..9dbbeef3e 100644 --- a/cmake/modules/FindSSE.cmake +++ b/cmake/modules/FindSSE.cmake @@ -1,9 +1,9 @@ include(CheckCSourceRuns) -option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) -option(ENABLE_AVX "Enable compile-time AVX support." ON) -option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) +option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) +option(ENABLE_AVX "Enable compile-time AVX support." ON) +option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) if (ENABLE_SSE) # @@ -96,4 +96,4 @@ if (ENABLE_SSE) endif() -mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2) \ No newline at end of file +mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2) From 979a590dc92e2a83aac74eb9b7c270031528ff97 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 7 Apr 2017 14:54:24 +0200 Subject: [PATCH 10/55] comment references to uhd::register_handler --- srslte/lib/rf/uhd_c_api.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/srslte/lib/rf/uhd_c_api.cpp b/srslte/lib/rf/uhd_c_api.cpp index 92af48f69..1b7772e68 100644 --- a/srslte/lib/rf/uhd_c_api.cpp +++ b/srslte/lib/rf/uhd_c_api.cpp @@ -9,6 +9,7 @@ extern "C" { #include "uhd_c_api.h" } +/* #if UHD_VERSION < 31100 static void (*handler)(const char*); @@ -26,6 +27,7 @@ void rf_uhd_register_msg_handler_c(void (*new_handler)(const char*)) uhd::msg::register_handler(translate_handler); #endif } +*/ void uhd_tx_metadata_set_time_spec(uhd_tx_metadata_handle *md, time_t secs, double frac_secs) { From 8e440f512ab1ac9490aad73a5688882c31fc70e1 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 7 Apr 2017 15:06:51 +0200 Subject: [PATCH 11/55] comment references to uhd::register_handler --- srslte/lib/rf/uhd_c_api.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/srslte/lib/rf/uhd_c_api.cpp b/srslte/lib/rf/uhd_c_api.cpp index 1b7772e68..93792722d 100644 --- a/srslte/lib/rf/uhd_c_api.cpp +++ b/srslte/lib/rf/uhd_c_api.cpp @@ -19,15 +19,17 @@ void translate_handler(uhd::msg::type_t type, const std::string & msg) handler(msg.c_str()); } #endif +*/ void rf_uhd_register_msg_handler_c(void (*new_handler)(const char*)) { +/* #if UHD_VERSION < 31100 handler = new_handler; uhd::msg::register_handler(translate_handler); #endif -} */ +} void uhd_tx_metadata_set_time_spec(uhd_tx_metadata_handle *md, time_t secs, double frac_secs) { From ec34d56e77082a66a01d45c825f7dce739c4d17e Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 8 Jun 2017 15:15:00 +0200 Subject: [PATCH 12/55] final merging changes --- CMakeLists.txt | 2 - lib/src/phy/rf/uhd_c_api.cpp | 4 -- lib/src/phy/utils/vector.c | 20 ++++-- lib/src/phy/utils/vector_simd.c | 8 +-- srslte/CMakeLists.txt | 84 ---------------------- srslte/include/srslte/utils/vector_simd.h | 86 ----------------------- 6 files changed, 17 insertions(+), 187 deletions(-) delete mode 100644 srslte/CMakeLists.txt delete mode 100644 srslte/include/srslte/utils/vector_simd.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 1cc2640e2..d7871d71d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -195,8 +195,6 @@ endmacro(ADD_CXX_COMPILER_FLAG_IF_AVAILABLE) if(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=${GCC_ARCH} -Wall -Wno-comment -Wno-reorder -Wno-unused-but-set-variable -Wno-unused-variable -std=c++03") - find_package(SSE) - if(${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -DDEBUG_MODE") else(${CMAKE_BUILD_TYPE} STREQUAL "Debug") diff --git a/lib/src/phy/rf/uhd_c_api.cpp b/lib/src/phy/rf/uhd_c_api.cpp index c0fb6bfd3..da348c17b 100644 --- a/lib/src/phy/rf/uhd_c_api.cpp +++ b/lib/src/phy/rf/uhd_c_api.cpp @@ -9,7 +9,6 @@ extern "C" { #include "uhd_c_api.h" } -/* #if UHD_VERSION < 31100 static void (*handler)(const char*); @@ -19,16 +18,13 @@ void translate_handler(uhd::msg::type_t type, const std::string & msg) handler(msg.c_str()); } #endif -*/ void rf_uhd_register_msg_handler_c(void (*new_handler)(const char*)) { -/* #if UHD_VERSION < 31100 handler = new_handler; uhd::msg::register_handler(translate_handler); #endif -*/ } void uhd_tx_metadata_set_time_spec(uhd_tx_metadata_handle *md, time_t secs, double frac_secs) diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 3146c760d..daa03d0b4 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -57,12 +57,18 @@ int srslte_vec_acc_ii(int *x, uint32_t len) { // Used in PRACH detector, AGC and chest_dl for noise averaging float srslte_vec_acc_ff(float *x, uint32_t len) { - int i; - float z=0; - for (i=0;i -#include -#include "srslte/config.h" - -SRSLTE_API int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len); - -SRSLTE_API void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len); - -SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len); - -SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, uint32_t len); - -SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len); - -SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len); - -SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len); - -SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *h, float *z, uint32_t len); - -SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); - -SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len); - -SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_cfc_simd(cf_t *x, float h, cf_t *z, uint32_t len); - -SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len); - -SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len); - -SRSLTE_API void srslte_vec_convert_if_simd(int16_t *x, float *z, float scale, uint32_t len); - -#ifdef __cplusplus -} -#endif - -#endif From 2bba9d187d32ce18fc0e752f423c637b10ab0335 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 7 Jul 2017 16:36:27 +0200 Subject: [PATCH 13/55] fixed dotprodconj. Removed unaligned load/store --- lib/src/phy/utils/vector.c | 2 +- lib/src/phy/utils/vector_simd.c | 16 ++++++++-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 61d0271b6..8ceefe3da 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -608,7 +608,7 @@ cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) { uint32_t i; cf_t res = 0; for (i=0;i Date: Fri, 7 Jul 2017 18:04:59 +0200 Subject: [PATCH 14/55] missing return statement --- lib/include/srslte/common/metrics_hub.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/include/srslte/common/metrics_hub.h b/lib/include/srslte/common/metrics_hub.h index 8443ef65a..9575347a2 100644 --- a/lib/include/srslte/common/metrics_hub.h +++ b/lib/include/srslte/common/metrics_hub.h @@ -34,6 +34,7 @@ public: bool init(metrics_interface *m_, float report_period_secs=1.0) { m = m_; start_periodic(report_period_secs*1e6); + return true; } void stop() { thread_cancel(); @@ -47,7 +48,7 @@ private: void run_period() { metrics_t metric; m->get_metrics(metric); - for (int i=0;iset_metrics(metric); } } From f629e10fcfdf70cbc43b0e7c00a4eb18ac15f08b Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 7 Jul 2017 18:44:17 +0200 Subject: [PATCH 15/55] fixed bug in sub_sse() and added couple of avx functions --- lib/include/srslte/phy/utils/vector_simd.h | 4 ++ lib/src/phy/utils/vector.c | 12 +++- lib/src/phy/utils/vector_simd.c | 65 +++++++++++++++++++++- 3 files changed, 76 insertions(+), 5 deletions(-) diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 6fe55f89d..1010cbed6 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -49,8 +49,12 @@ SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t l SRSLTE_API void srslte_vec_sum_fff_sse(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_fff_avx(float *x, float *y, float *z, uint32_t len); + SRSLTE_API void srslte_vec_sub_fff_sse(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_fff_avx(float *x, float *y, float *z, uint32_t len); + SRSLTE_API void srslte_vec_sc_prod_fff_sse(float *x, float h, float *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_ccc_sse(cf_t *x, cf_t h, cf_t *z, uint32_t len); diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 8ceefe3da..2a2001ae3 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -101,9 +101,13 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) { for (i=0;i Date: Tue, 11 Jul 2017 13:17:26 +0200 Subject: [PATCH 16/55] fixed segfault due to race condition in scrambling sequence pre-generation --- lib/include/srslte/phy/phch/pdsch.h | 3 ++- lib/include/srslte/phy/phch/pucch.h | 3 ++- lib/include/srslte/phy/phch/pusch.h | 3 ++- lib/src/phy/phch/pdsch.c | 24 +++++++++++---------- lib/src/phy/phch/pucch.c | 8 ++++--- lib/src/phy/phch/pusch.c | 33 +++++++++++++++-------------- 6 files changed, 41 insertions(+), 33 deletions(-) diff --git a/lib/include/srslte/phy/phch/pdsch.h b/lib/include/srslte/phy/phch/pdsch.h index 7730d2fa1..ad01c4ef8 100644 --- a/lib/include/srslte/phy/phch/pdsch.h +++ b/lib/include/srslte/phy/phch/pdsch.h @@ -48,7 +48,8 @@ #include "srslte/phy/phch/pdsch_cfg.h" typedef struct { - srslte_sequence_t seq[SRSLTE_NSUBFRAMES_X_FRAME]; + srslte_sequence_t seq[SRSLTE_NSUBFRAMES_X_FRAME]; + bool sequence_generated; } srslte_pdsch_user_t; /* PDSCH object */ diff --git a/lib/include/srslte/phy/phch/pucch.h b/lib/include/srslte/phy/phch/pucch.h index 3542dc53f..56d512418 100644 --- a/lib/include/srslte/phy/phch/pucch.h +++ b/lib/include/srslte/phy/phch/pucch.h @@ -80,7 +80,8 @@ typedef struct SRSLTE_API { } srslte_pucch_cfg_t; typedef struct { - srslte_sequence_t seq_f2[SRSLTE_NSUBFRAMES_X_FRAME]; + srslte_sequence_t seq_f2[SRSLTE_NSUBFRAMES_X_FRAME]; + bool sequence_generated; } srslte_pucch_user_t; /* PUCCH object */ diff --git a/lib/include/srslte/phy/phch/pusch.h b/lib/include/srslte/phy/phch/pusch.h index bf04a4781..e5ee43995 100644 --- a/lib/include/srslte/phy/phch/pusch.h +++ b/lib/include/srslte/phy/phch/pusch.h @@ -61,7 +61,8 @@ typedef struct { } srslte_pusch_hopping_cfg_t; typedef struct { - srslte_sequence_t seq[SRSLTE_NSUBFRAMES_X_FRAME]; + srslte_sequence_t seq[SRSLTE_NSUBFRAMES_X_FRAME]; + bool sequences_generated; } srslte_pusch_user_t; /* PUSCH object */ diff --git a/lib/src/phy/phch/pdsch.c b/lib/src/phy/phch/pdsch.c index 8791ac8e7..9b6128c64 100644 --- a/lib/src/phy/phch/pdsch.c +++ b/lib/src/phy/phch/pdsch.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "prb_dl.h" #include "srslte/phy/phch/pdsch.h" @@ -362,6 +363,7 @@ int srslte_pdsch_set_rnti(srslte_pdsch_t *q, uint16_t rnti) { return SRSLTE_ERROR; } } + q->users[rnti]->sequence_generated = true; } } return SRSLTE_SUCCESS; @@ -467,15 +469,15 @@ int srslte_pdsch_decode_multi(srslte_pdsch_t *q, srslte_demod_soft_demodulate_s(cfg->grant.mcs.mod, q->d, q->e, cfg->nbits.nof_re); /* descramble */ - if (!q->users[rnti]) { - srslte_sequence_t seq; + if (q->users[rnti] && q->users[rnti]->sequence_generated) { + srslte_scrambling_s_offset(&q->users[rnti]->seq[cfg->sf_idx], q->e, 0, cfg->nbits.nof_bits); + } else { + srslte_sequence_t seq; if (srslte_sequence_pdsch(&seq, rnti, 0, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } - srslte_scrambling_s_offset(&seq, q->e, 0, cfg->nbits.nof_bits); + srslte_scrambling_s_offset(&seq, q->e, 0, cfg->nbits.nof_bits); srslte_sequence_free(&seq); - } else { - srslte_scrambling_s_offset(&q->users[rnti]->seq[cfg->sf_idx], q->e, 0, cfg->nbits.nof_bits); } if (SRSLTE_VERBOSE_ISDEBUG()) { @@ -537,15 +539,15 @@ int srslte_pdsch_encode(srslte_pdsch_t *q, } /* scramble */ - if (!q->users[rnti]) { - srslte_sequence_t seq; + if (q->users[rnti] && q->users[rnti]->sequence_generated) { + srslte_scrambling_bytes(&q->users[rnti]->seq[cfg->sf_idx], (uint8_t*) q->e, cfg->nbits.nof_bits); + } else { + srslte_sequence_t seq; if (srslte_sequence_pdsch(&seq, rnti, 0, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } srslte_scrambling_bytes(&seq, (uint8_t*) q->e, cfg->nbits.nof_bits); srslte_sequence_free(&seq); - } else { - srslte_scrambling_bytes(&q->users[rnti]->seq[cfg->sf_idx], (uint8_t*) q->e, cfg->nbits.nof_bits); } srslte_mod_modulate_bytes(&q->mod[cfg->grant.mcs.mod], (uint8_t*) q->e, q->d, cfg->nbits.nof_bits); diff --git a/lib/src/phy/phch/pucch.c b/lib/src/phy/phch/pucch.c index c58f69871..6a889b89c 100644 --- a/lib/src/phy/phch/pucch.c +++ b/lib/src/phy/phch/pucch.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "srslte/phy/ch_estimation/refsignal_ul.h" #include "srslte/phy/phch/pucch.h" @@ -489,7 +490,7 @@ void srslte_pucch_clear_rnti(srslte_pucch_t *q, uint16_t rnti) { int srslte_pucch_set_crnti(srslte_pucch_t *q, uint16_t rnti) { if (!q->users[rnti]) { - q->users[rnti] = malloc(sizeof(srslte_pucch_user_t)); + q->users[rnti] = calloc(1, sizeof(srslte_pucch_user_t)); if (q->users[rnti]) { for (uint32_t sf_idx=0;sf_idxusers[rnti]->sequence_generated = true; } } return SRSLTE_SUCCESS; @@ -591,7 +593,7 @@ static int uci_mod_bits(srslte_pucch_t *q, srslte_pucch_format_t format, uint8_t case SRSLTE_PUCCH_FORMAT_2: case SRSLTE_PUCCH_FORMAT_2A: case SRSLTE_PUCCH_FORMAT_2B: - if (q->users[rnti]) { + if (q->users[rnti] && q->users[rnti]->sequence_generated) { memcpy(q->bits_scram, bits, SRSLTE_PUCCH2_NOF_BITS*sizeof(uint8_t)); srslte_scrambling_b(&q->users[rnti]->seq_f2[sf_idx], q->bits_scram); srslte_mod_modulate(&q->mod, q->bits_scram, q->d, SRSLTE_PUCCH2_NOF_BITS); @@ -796,7 +798,7 @@ int srslte_pucch_decode(srslte_pucch_t* q, srslte_pucch_format_t format, case SRSLTE_PUCCH_FORMAT_2: case SRSLTE_PUCCH_FORMAT_2A: case SRSLTE_PUCCH_FORMAT_2B: - if (q->users[rnti]) { + if (q->users[rnti] && q->users[rnti]->sequence_generated) { pucch_encode_(q, format, n_pucch, sf_idx, rnti, NULL, ref, true); srslte_vec_prod_conj_ccc(q->z, ref, q->z_tmp, SRSLTE_PUCCH_MAX_SYMBOLS); for (int i=0;imax_re * srslte_mod_bits_x_symbol(SRSLTE_MOD_64QAM))) { return SRSLTE_ERROR; } - } + } + q->users[rnti]->sequences_generated = true; } } return SRSLTE_SUCCESS; @@ -444,15 +445,15 @@ int srslte_pusch_encode(srslte_pusch_t *q, srslte_pusch_cfg_t *cfg, srslte_softb return SRSLTE_ERROR; } - if (!q->users[rnti]) { - srslte_sequence_t seq; - if (srslte_sequence_pusch(&seq, rnti, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { - return SRSLTE_ERROR; - } - srslte_scrambling_bytes(&seq, (uint8_t*) q->q, cfg->nbits.nof_bits); - srslte_sequence_free(&seq); + if (q->users[rnti] && q->users[rnti]->sequences_generated) { + srslte_scrambling_bytes(&q->users[rnti]->seq[cfg->sf_idx], (uint8_t*) q->q, cfg->nbits.nof_bits); } else { - srslte_scrambling_bytes(&q->users[rnti]->seq[cfg->sf_idx], (uint8_t*) q->q, cfg->nbits.nof_bits); + srslte_sequence_t seq; + if (srslte_sequence_pusch(&seq, rnti, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { + return SRSLTE_ERROR; + } + srslte_scrambling_bytes(&seq, (uint8_t*) q->q, cfg->nbits.nof_bits); + srslte_sequence_free(&seq); } // Correct UCI placeholder/repetition bits @@ -535,13 +536,13 @@ int srslte_pusch_decode(srslte_pusch_t *q, srslte_sequence_t *seq = NULL; // Create sequence if does not exist - if (!q->users[rnti]) { - seq = &q->tmp_seq; - if (srslte_sequence_pusch(seq, rnti, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { - return SRSLTE_ERROR; - } + if (q->users[rnti] && q->users[rnti]->sequences_generated) { + seq = &q->users[rnti]->seq[cfg->sf_idx]; } else { - seq = &q->users[rnti]->seq[cfg->sf_idx]; + seq = &q->tmp_seq; + if (srslte_sequence_pusch(seq, rnti, 2 * cfg->sf_idx, q->cell.id, cfg->nbits.nof_bits)) { + return SRSLTE_ERROR; + } } // Decode RI/HARQ bits before descrambling @@ -553,7 +554,7 @@ int srslte_pusch_decode(srslte_pusch_t *q, // Descrambling srslte_scrambling_s_offset(seq, q->q, 0, cfg->nbits.nof_bits); - if (!q->users[rnti]) { + if (!(q->users[rnti] && q->users[rnti]->sequences_generated)) { srslte_sequence_free(seq); } From 2b775462f72602701af800ffe57376d183de53ff Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Mon, 25 Sep 2017 13:07:01 +0200 Subject: [PATCH 17/55] Added LV_HAVE_AVX512 to CMakeLists --- CMakeLists.txt | 5 ++++ cmake/modules/FindSSE.cmake | 46 +++++++++++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d354a5497..40d3ef4da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -282,6 +282,11 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") endif(HAVE_AVX) endif (HAVE_AVX2) + if (HAVE_AVX512) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -DLV_HAVE_AVX512") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -DLV_HAVE_AVX512") + endif(HAVE_AVX512) + if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") if(HAVE_SSE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Ofast -funroll-loops") diff --git a/cmake/modules/FindSSE.cmake b/cmake/modules/FindSSE.cmake index de8b38d1d..4c9673a9d 100644 --- a/cmake/modules/FindSSE.cmake +++ b/cmake/modules/FindSSE.cmake @@ -4,10 +4,11 @@ include(CheckCSourceRuns) -option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) -option(ENABLE_AVX "Enable compile-time AVX support." ON) -option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) -option(ENABLE_FMA "Enable compile-time FMA support." ON) +option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON) +option(ENABLE_AVX "Enable compile-time AVX support." ON) +option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON) +option(ENABLE_FMA "Enable compile-time FMA support." ON) +option(ENABLE_AVX512 "Enable compile-time AVX512 support." ON) if (ENABLE_SSE) # @@ -135,6 +136,41 @@ if (ENABLE_SSE) endif() endif() + if (ENABLE_AVX512) + + # + # Check compiler for AVX intrinsics + # + if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_CLANG ) + set(CMAKE_REQUIRED_FLAGS "-mavx512f") + check_c_source_runs(" + #include + int main() + { + __m512i a, b, c; + const int src[16] = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8 , 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF}; + int dst[16]; + a = _mm512_loadu_si512( (__m512i*)src ); + b = _mm512_loadu_si512( (__m512i*)src ); + c = _mm512_add_epi32( a, b ); + _mm512_storeu_si512( (__m512i*)dst, c ); + int i = 0; + for( i = 0; i < 16; i++ ){ + if( ( src[i] + src[i] ) != dst[i] ){ + return -1; + } + } + return 0; + }" + HAVE_AVX512) + endif() + + if (HAVE_AVX512) + message(STATUS "AVX512 is enabled - target CPU must support it") + endif() + endif() + + endif() -mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2, HAVE_FMA) +mark_as_advanced(HAVE_SSE, HAVE_AVX, HAVE_AVX2, HAVE_FMA, HAVE_AVX512) From 8078238cb59624d6c7ef5eec87195f361092c9ea Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Mon, 25 Sep 2017 13:08:38 +0200 Subject: [PATCH 18/55] Removed test macros from mat.h --- lib/include/srslte/phy/utils/mat.h | 8 +------- lib/src/phy/utils/mat.c | 1 + lib/src/phy/utils/test/mat_test.c | 32 +++++++++++++++++++++++++++--- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/lib/include/srslte/phy/utils/mat.h b/lib/include/srslte/phy/utils/mat.h index d960590c4..942559955 100644 --- a/lib/include/srslte/phy/utils/mat.h +++ b/lib/include/srslte/phy/utils/mat.h @@ -27,14 +27,8 @@ #ifndef SRSLTE_MAT_H #define SRSLTE_MAT_H -#include "srslte/phy/utils/simd.h" #include "srslte/config.h" - - -/* - * Generic Macros - */ -#define RANDOM_CF() (((float)rand())/((float)RAND_MAX) + _Complex_I*((float)rand())/((float)RAND_MAX)) +#include "srslte/phy/utils/simd.h" /* Generic implementation for complex reciprocal */ SRSLTE_API cf_t srslte_mat_cf_recip_gen(cf_t a); diff --git a/lib/src/phy/utils/mat.c b/lib/src/phy/utils/mat.c index 439daa2ce..bbfc38135 100644 --- a/lib/src/phy/utils/mat.c +++ b/lib/src/phy/utils/mat.c @@ -27,6 +27,7 @@ #include #include +#include #include "srslte/phy/utils/mat.h" diff --git a/lib/src/phy/utils/test/mat_test.c b/lib/src/phy/utils/test/mat_test.c index 49be5c9ae..46081da98 100644 --- a/lib/src/phy/utils/test/mat_test.c +++ b/lib/src/phy/utils/test/mat_test.c @@ -33,12 +33,18 @@ #include #include "srslte/phy/utils/mat.h" +#include "srslte/phy/utils/simd.h" +#include "srslte/phy/utils/vector.h" bool zf_solver = false; bool mmse_solver = false; bool verbose = false; +#define RANDOM_F() ((float)rand())/((float)RAND_MAX) +#define RANDOM_S() ((int16_t)(rand() && 0x800F)) +#define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F()) + double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { if (ts_end->tv_usec > ts_start->tv_usec) { return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + @@ -49,16 +55,16 @@ double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { } } -#define NOF_REPETITIONS 1000 +#define BLOCK_SIZE 1000 #define RUN_TEST(FUNCTION) /*TYPE NAME (void)*/ { \ int i;\ struct timeval start, end;\ gettimeofday(&start, NULL); \ bool ret = true; \ - for (i = 0; i < NOF_REPETITIONS; i++) {ret &= FUNCTION ();}\ + for (i = 0; i < BLOCK_SIZE; i++) {ret &= FUNCTION ();}\ gettimeofday(&end, NULL);\ if (verbose) printf("%32s: %s ... %6.2f us/call\n", #FUNCTION, (ret)?"Pass":"Fail", \ - elapsed_us(&start, &end)/NOF_REPETITIONS);\ + elapsed_us(&start, &end)/BLOCK_SIZE);\ passed &= ret;\ } @@ -373,6 +379,24 @@ bool test_mmse_solver_avx(void) { #endif /* LV_HAVE_AVX */ +bool test_vec_dot_prod_ccc(void) { + __attribute__((aligned(256))) cf_t a[14]; + __attribute__((aligned(256))) cf_t b[14]; + cf_t res = 0, gold = 0; + + for (int i = 0; i < 14; i++) { + a[i] = RANDOM_CF(); + b[i] = RANDOM_CF(); + } + + res = srslte_vec_dot_prod_ccc(a, b, 14); + + for (int i=0;i<14;i++) { + gold += a[i]*b[i]; + } + + return (cabsf(res - gold) < 1e-3); +} int main(int argc, char **argv) { bool passed = true; @@ -405,6 +429,8 @@ int main(int argc, char **argv) { #endif /* LV_HAVE_AVX */ } + RUN_TEST(test_vec_dot_prod_ccc); + printf("%s!\n", (passed) ? "Ok" : "Failed"); if (!passed) { From 1c3b5552be004064e16527fba55e2d419a1143b2 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Mon, 25 Sep 2017 13:15:59 +0200 Subject: [PATCH 19/55] added c16 type and architecture independent inline SIMD calls --- lib/include/srslte/config.h | 1 + lib/include/srslte/phy/utils/simd.h | 833 +++++++++++++++++++++++++++- 2 files changed, 832 insertions(+), 2 deletions(-) diff --git a/lib/include/srslte/config.h b/lib/include/srslte/config.h index 68076c0c8..8a988a971 100644 --- a/lib/include/srslte/config.h +++ b/lib/include/srslte/config.h @@ -59,5 +59,6 @@ // cf_t definition typedef _Complex float cf_t; +typedef _Complex short int c16_t; #endif // CONFIG_H diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 420d07213..774dd54bd 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -27,6 +27,8 @@ #ifndef SRSLTE_SIMD_H_H #define SRSLTE_SIMD_H_H +#include + /* * SSE Macros */ @@ -44,7 +46,7 @@ /* * AVX Macros */ -#ifdef LV_HAVE_AVX +#ifdef LV_HAVE_AVX2 #define _MM256_MULJ_PS(X) _mm256_permute_ps(_MM256_CONJ_PS(X), 0b10110001) #define _MM256_CONJ_PS(X) (_mm256_xor_ps(X, _mm256_set_ps(-0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f, -0.0f, 0.0f))) @@ -60,7 +62,7 @@ #define _MM256_PROD_PS(a, b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),\ _mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b))) #endif /* LV_HAVE_FMA */ -#endif /* LV_HAVE_AVX */ +#endif /* LV_HAVE_AVX2 */ /* @@ -78,4 +80,831 @@ _mm256_fmsubadd_ps(_mm256_shuffle_ps(A,A,0xB1),_mm256_movehdup_ps(B), C)) #endif /* LV_HAVE_FMA */ + + +/* Memory Sizes for Single Floating Point and fixed point */ +#ifdef LV_HAVE_AVX512 + +#define SRSLTE_SIMD_F_SIZE 16 +#define SRSLTE_SIMD_CF_SIZE 16 + +#define SRSLTE_SIMD_S_SIZE 32 +#define SRSLTE_SIMD_C16_SIZE 0 + +#else +#ifdef LV_HAVE_AVX2 + +#define SRSLTE_SIMD_F_SIZE 8 +#define SRSLTE_SIMD_CF_SIZE 8 + +#define SRSLTE_SIMD_S_SIZE 16 +#define SRSLTE_SIMD_C16_SIZE 16 + +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + +#define SRSLTE_SIMD_F_SIZE 4 +#define SRSLTE_SIMD_CF_SIZE 4 + +#define SRSLTE_SIMD_S_SIZE 8 +#define SRSLTE_SIMD_C16_SIZE 8 + +#else /* LV_HAVE_SSE */ + +#define SRSLTE_SIMD_F_SIZE 0 +#define SRSLTE_SIMD_CF_SIZE 0 + +#define SRSLTE_SIMD_S_SIZE 0 +#define SRSLTE_SIMD_C16_SIZE 0 + +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + + + +#if SRSLTE_SIMD_F_SIZE + +/* Data types */ +#ifdef LV_HAVE_AVX512 +typedef __m512 simd_f_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256 simd_f_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128 simd_f_t; +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +/* Single precision Floating point functions */ +static inline simd_f_t srslte_simd_f_load(float *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_ps(ptr); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_ps(ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_load_ps(ptr); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_loadu(float *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_loadu_ps(ptr); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_loadu_ps(ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_loadu_ps(ptr); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_f_store(float *ptr, simd_f_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_ps(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_ps(ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_ps(ptr, simdreg); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_f_storeu(float *ptr, simd_f_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + _mm256_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps(ptr, simdreg); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_set1(float x) { +#ifdef LV_HAVE_AVX512 + return _mm512_set1_ps(x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_set1_ps(x); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_set1_ps(x); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_mul(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_mul_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_mul_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_mul_ps(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + __m512 r = _mm512_add_ps(a, b); + return _mm512_mask_sub_ps(r, 0b1010101010101010, a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_addsub_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_addsub_ps(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_sub(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_sub_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_sub_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_sub_ps(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_add(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_ps(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_ps(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_add_ps(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_zero (void) { +#ifdef LV_HAVE_AVX512 + return _mm512_setzero_ps(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_setzero_ps(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_setzero_ps(); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_swap(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_permute_ps(a, 0b10110001); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_permute_ps(a, 0b10110001); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_shuffle_ps(a, a, 0b10110001); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_hadd(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + const __m512i idx1 = _mm512_setr_epi32((0b00000), (0b00010), + (0b00100), (0b00110), + (0b01000), (0b01010), + (0b01100), (0b01110), + (0b10000), (0b10010), + (0b10100), (0b10110), + (0b11000), (0b11010), + (0b11100), (0b11110)); + const __m512i idx2 = _mm512_or_epi32(idx1, _mm512_set1_epi32(1)); + + simd_f_t a1 = _mm512_permutex2var_ps(a, idx1, b); + simd_f_t b1 = _mm512_permutex2var_ps(a, idx2, b); + return _mm512_add_ps(a1, b1); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + simd_f_t a1 = _mm256_permute2f128_ps(a, b, 0b00100000); + simd_f_t b1 = _mm256_permute2f128_ps(a, b, 0b00110001); + return _mm256_hadd_ps(a1, b1); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_hadd_ps(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_sqrt_ps(a); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_sqrt_ps(a); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_sqrt_ps(a); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_F_SIZE */ + + +#if SRSLTE_SIMD_CF_SIZE + +typedef struct { + simd_f_t re; + simd_f_t im; +} simd_cf_t; + +/* Complex Single precission Floating point functions */ +static inline simd_cf_t srslte_simd_cfi_load(cf_t *ptr) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + __m512 in1 = _mm512_permute_ps(_mm512_load_ps((float*)(ptr)), 0b11011000); + __m512 in2 = _mm512_permute_ps(_mm512_load_ps((float*)(ptr + 8)), 0b11011000); + ret.re = _mm512_unpacklo_ps(in1, in2); + ret.im = _mm512_unpackhi_ps(in1, in2); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 in1 = _mm256_permute_ps(_mm256_load_ps((float*)(ptr)), 0b11011000); + __m256 in2 = _mm256_permute_ps(_mm256_load_ps((float*)(ptr + 4)), 0b11011000); + ret.re = _mm256_unpacklo_ps(in1, in2); + ret.im = _mm256_unpackhi_ps(in1, in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128 i1 = _mm_load_ps((float*)(ptr)); + __m128 i2 = _mm_load_ps((float*)(ptr + 2)); + ret.re = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(2,0,2,0)); + ret.im = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(3,1,3,1)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +/* Complex Single precission Floating point functions */ +static inline simd_cf_t srslte_simd_cfi_loadu(cf_t *ptr) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + __m512 in1 = _mm512_permute_ps(_mm512_loadu_ps((float*)(ptr)), 0b11011000); + __m512 in2 = _mm512_permute_ps(_mm512_loadu_ps((float*)(ptr + 8)), 0b11011000); + ret.re = _mm512_unpacklo_ps(in1, in2); + ret.im = _mm512_unpackhi_ps(in1, in2); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + __m256 in1 = _mm256_permute_ps(_mm256_loadu_ps((float*)(ptr)), 0b11011000); + __m256 in2 = _mm256_permute_ps(_mm256_loadu_ps((float*)(ptr + 4)), 0b11011000); + ret.re = _mm256_unpacklo_ps(in1, in2); + ret.im = _mm256_unpackhi_ps(in1, in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128 i1 = _mm_loadu_ps((float*)(ptr)); + __m128 i2 = _mm_loadu_ps((float*)(ptr + 2)); + ret.re = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(2,0,2,0)); + ret.im = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(3,1,3,1)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_load(float *re, float *im) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_load_ps(re); + ret.im = _mm512_load_ps(im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_load_ps(re); + ret.im = _mm256_load_ps(im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_load_ps(re); + ret.im = _mm_load_ps(im); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_loadu(float *re, float *im) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_loadu_ps(re); + ret.im = _mm512_loadu_ps(im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_loadu_ps(re); + ret.im = _mm256_loadu_ps(im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_loadu_ps(re); + ret.im = _mm_loadu_ps(im); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline void srslte_simd_cfi_store(cf_t *ptr, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + __m512 out1 = _mm512_permute_ps(simdreg.re, 0b11011000); + __m512 out2 = _mm512_permute_ps(simdreg.im, 0b11011000); + _mm512_store_ps((float*)(ptr), _mm512_unpacklo_ps(out1, out2)); + _mm512_store_ps((float*)(ptr + 8), _mm512_unpackhi_ps(out1, out2)); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 out1 = _mm256_permute_ps(simdreg.re, 0b11011000); + __m256 out2 = _mm256_permute_ps(simdreg.im, 0b11011000); + _mm256_store_ps((float*)(ptr), _mm256_unpacklo_ps(out1, out2)); + _mm256_store_ps((float*)(ptr + 4), _mm256_unpackhi_ps(out1, out2)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_ps((float*)(ptr), _mm_unpacklo_ps(simdreg.re, simdreg.im)); + _mm_store_ps((float*)(ptr + 2), _mm_unpackhi_ps(simdreg.re, simdreg.im)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cfi_storeu(cf_t *ptr, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + __m512 out1 = _mm512_permute_ps(simdreg.re, 0b11011000); + __m512 out2 = _mm512_permute_ps(simdreg.im, 0b11011000); + _mm512_storeu_ps((float*)(ptr), _mm512_unpacklo_ps(out1, out2)); + _mm512_storeu_ps((float*)(ptr + 8), _mm512_unpackhi_ps(out1, out2)); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + __m256 out1 = _mm256_permute_ps(simdreg.re, 0b11011000); + __m256 out2 = _mm256_permute_ps(simdreg.im, 0b11011000); + _mm256_storeu_ps((float*)(ptr), _mm256_unpacklo_ps(out1, out2)); + _mm256_storeu_ps((float*)(ptr + 4), _mm256_unpackhi_ps(out1, out2)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps((float*)(ptr), _mm_unpacklo_ps(simdreg.re, simdreg.im)); + _mm_storeu_ps((float*)(ptr + 2), _mm_unpackhi_ps(simdreg.re, simdreg.im)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cf_store(float *re, float *im, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_ps(re, simdreg.re); + _mm512_store_ps(im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_ps((float *) re, simdreg.re); + _mm256_store_ps((float *) im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_SSE + _mm_store_ps((float *) re, simdreg.re); + _mm_store_ps((float *) im, simdreg.im); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_cf_storeu(float *re, float *im, simd_cf_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_ps(re, simdreg.re); + _mm512_storeu_ps(im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_storeu_ps((float *) re, simdreg.re); + _mm256_storeu_ps((float *) im, simdreg.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_SSE + _mm_storeu_ps((float *) re, simdreg.re); + _mm_storeu_ps((float *) im, simdreg.im); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_cf_t srslte_simd_cf_set1 (cf_t x) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_set1_ps(__real__ x); + ret.im = _mm512_set1_ps(__imag__ x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_set1_ps(__real__ x); + ret.im = _mm256_set1_ps(__imag__ x); +#else +#ifdef LV_HAVE_SSE + ret.re = _mm_set1_ps(__real__ x); + ret.im = _mm_set1_ps(__imag__ x); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_prod (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_sub_ps(_mm512_mul_ps(a.re, b.re), + _mm512_mul_ps(a.im, b.im)); + ret.im = _mm512_add_ps(_mm512_mul_ps(a.re, b.im), + _mm512_mul_ps(a.im, b.re)); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_sub_ps(_mm256_mul_ps(a.re, b.re), + _mm256_mul_ps(a.im, b.im)); + ret.im = _mm256_add_ps(_mm256_mul_ps(a.re, b.im), + _mm256_mul_ps(a.im, b.re)); +#else +#ifdef LV_HAVE_SSE + ret.re = _mm_sub_ps(_mm_mul_ps(a.re, b.re), + _mm_mul_ps(a.im, b.im)); + ret.im = _mm_add_ps(_mm_mul_ps(a.re, b.im), + _mm_mul_ps(a.im, b.re)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_conjprod (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_add_ps(_mm512_mul_ps(a.re, b.re), + _mm512_mul_ps(a.im, b.im)); + ret.im = _mm512_sub_ps(_mm512_mul_ps(a.im, b.re), + _mm512_mul_ps(a.re, b.im)); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_add_ps(_mm256_mul_ps(a.re, b.re), + _mm256_mul_ps(a.im, b.im)); + ret.im = _mm256_sub_ps(_mm256_mul_ps(a.im, b.re), + _mm256_mul_ps(a.re, b.im)); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_add_ps(_mm_mul_ps(a.re, b.re), + _mm_mul_ps(a.im, b.im)); + ret.im = _mm_sub_ps(_mm_mul_ps(a.im, b.re), + _mm_mul_ps(a.re, b.im)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_add (simd_cf_t a, simd_cf_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_add_ps(a.re, b.re); + ret.im = _mm512_add_ps(a.im, b.im); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_add_ps(a.re, b.re); + ret.im = _mm256_add_ps(a.im, b.im); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_add_ps(a.re, b.re); + ret.im = _mm_add_ps(a.im, b.im); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_zero (void) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + ret.re = _mm512_setzero_ps(); + ret.im = _mm512_setzero_ps(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + ret.re = _mm256_setzero_ps(); + ret.im = _mm256_setzero_ps(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_setzero_ps(); + ret.im = _mm_setzero_ps(); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +#endif /* SRSLTE_SIMD_CF_SIZE */ + + +#if SRSLTE_SIMD_S_SIZE + + +#ifdef LV_HAVE_AVX512 +typedef __m512i simd_s_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256i simd_s_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128i simd_s_t; +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_si512(ptr); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_si256(ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_load_si128(ptr); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_si512(ptr); +#else /* LV_HAVE_AVX512 */ + #ifdef LV_HAVE_AVX2 + return _mm256_load_si256(ptr); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_load_si128(ptr); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_s_store(int16_t *ptr, simd_s_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_si512(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_si256(ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_store_si128(ptr, simdreg); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_s_storeu(int16_t *ptr, simd_s_t simdreg) { +#ifdef LV_HAVE_AVX512 + _mm512_storeu_si512(ptr, simdreg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_storeu_si256(ptr, simdreg); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + _mm_storeu_si128(ptr, simdreg); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_zero(void) { +#ifdef LV_HAVE_AVX512 + return _mm512_setzero_si512(); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_setzero_si256(); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_setzero_si128(); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_mullo_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_mullo_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_mullo_epi16(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_add(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_add_epi16(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_s_t srslte_simd_s_sub(simd_s_t a, simd_s_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_sub_epi16(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_sub_epi16(a, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + return _mm_sub_epi16(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_S_SIZE */ + + +#if SRSLTE_SIMD_C16_SIZE + +typedef struct { +#ifdef LV_HAVE_AVX512 + union { + __m512i m512; + int16_t i16[32]; + } re; + union { + __m512i m512; + int16_t i16[32]; + } im; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + union { + __m256i m256; + int16_t i16[16]; + } re; + union { + __m256i m256; + int16_t i16[16]; + } im; +#else +#ifdef LV_HAVE_SSE + union { + __m128i m128; + int16_t i16[8]; + } re; + union { + __m128i m128; + int16_t i16[8]; + } im; +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} simd_c16_t; + +/* Fixed point precision (16-bit) functions */ +static inline simd_c16_t srslte_simd_c16i_load(c16_t *ptr) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX512 + __m512i in1 = _mm512_load_si512((__m512i*)(ptr)); + __m512i in2 = _mm512_load_si512((__m512i*)(ptr + 8)); + ret.re.m512 = _mm512_mask_blend_epi16(0xAAAAAAAA, in1,_mm512_shufflelo_epi16(_mm512_shufflehi_epi16(in2, 0b10100000), 0b10100000)); + ret.im.m512 = _mm512_mask_blend_epi16(0xAAAAAAAA, _mm512_shufflelo_epi16(_mm512_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_AVX2 + __m256i in1 = _mm256_load_si256((__m256i*)(ptr)); + __m256i in2 = _mm256_load_si256((__m256i*)(ptr + 8)); + ret.re.m256 = _mm256_blend_epi16(in1,_mm256_shufflelo_epi16(_mm256_shufflehi_epi16(in2, 0b10100000), 0b10100000), 0b10101010); + ret.im.m256 = _mm256_blend_epi16(_mm256_shufflelo_epi16(_mm256_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2, 0b10101010); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + __m128i in1 = _mm_load_si128((__m128i*)(ptr)); + __m128i in2 = _mm_load_si128((__m128i*)(ptr + 8)); + ret.re.m128 = _mm_blend_epi16(in1,_mm_shufflelo_epi16(_mm_shufflehi_epi16(in2, 0b10100000), 0b10100000), 0b10101010); + ret.im.m128 = _mm_blend_epi16(_mm_shufflelo_epi16(_mm_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2, 0b10101010); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_load(int16_t *re, int16_t *im) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_load_si256((__m256i*)(re)); + ret.im.m256 = _mm256_load_si256((__m256i*)(im)); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_load_si128((__m128i*)(re)); + ret.im.m128 = _mm_load_si128((__m128i*)(im)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline void srslte_simd_c16i_store(c16_t *ptr, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + __m256i re_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.re.m256, 0b10110001), 0b10110001); + __m256i im_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.im.m256, 0b10110001), 0b10110001); + _mm256_store_si256((__m256i *) (ptr), _mm256_blend_epi16(simdreg.re.m256, im_sw, 0b10101010)); + _mm256_store_si256((__m256i *) (ptr + 8), _mm256_blend_epi16(re_sw, simdreg.im.m256, 0b10101010)); +#else +#ifdef LV_HAVE_SSE + __m128i re_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.re.m128, 0b10110001), 0b10110001); + __m128i im_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.im.m128, 0b10110001), 0b10110001); + _mm_store_si128((__m128i *) (ptr), _mm_blend_epi16(simdreg.re.m128, im_sw, 0b10101010)); + _mm_store_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline void srslte_simd_c16_store(int16_t *re, int16_t *im, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i *) re, simdreg.re.m256); + _mm256_store_si256((__m256i *) im, simdreg.im.m256); +#else +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i *) re, simdreg.re.m128); + _mm_store_si128((__m128i *) im, simdreg.im.m128); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +static inline simd_c16_t srslte_simd_c16_prod (simd_c16_t a, simd_c16_t b) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_sub_epi16(_mm256_mulhrs_epi16(a.re.m256, _mm256_slli_epi16(b.re.m256, 1)), + _mm256_mulhrs_epi16(a.im.m256, _mm256_slli_epi16(b.im.m256, 1))); + ret.im.m256 = _mm256_add_epi16(_mm256_mulhrs_epi16(a.re.m256, _mm256_slli_epi16(b.im.m256, 1)), + _mm256_mulhrs_epi16(a.im.m256, _mm256_slli_epi16(b.re.m256, 1))); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_sub_epi16(_mm_mulhrs_epi16(a.re.m128, _mm_slli_epi16(b.re.m128, 1)), + _mm_mulhrs_epi16(a.im.m128, _mm_slli_epi16(b.im.m128, 1))); + ret.im.m128 = _mm_add_epi16(_mm_mulhrs_epi16(a.re.m128, _mm_slli_epi16(b.im.m128, 1)), + _mm_mulhrs_epi16(a.im.m128, _mm_slli_epi16(b.re.m128, 1))); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_add (simd_c16_t a, simd_c16_t b) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_add_epi16(a.re.m256, b.re.m256); + ret.im.m256 = _mm256_add_epi16(a.im.m256, b.im.m256); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_add_epi16(a.re.m128, b.re.m128); + ret.im.m128 = _mm_add_epi16(a.im.m128, b.im.m128); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +static inline simd_c16_t srslte_simd_c16_zero (void) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_setzero_si256(); + ret.im.m256 = _mm256_setzero_si256(); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_setzero_si128(); + ret.im.m128 = _mm_setzero_si128(); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + +#endif /* SRSLTE_SIMD_C16_SIZE */ + + + #endif //SRSLTE_SIMD_H_H From c9f6bfccd47d6dfcda2930e7cb81465e38fb3b9d Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Mon, 25 Sep 2017 13:19:34 +0200 Subject: [PATCH 20/55] Refactored vector library with SIMD independent architecture inline functions test-benchmark --- lib/include/srslte/phy/utils/vector.h | 8 +- lib/include/srslte/phy/utils/vector_simd.h | 61 +- lib/src/phy/utils/test/CMakeLists.txt | 3 + lib/src/phy/utils/test/vector_test.c | 555 ++++++++++ lib/src/phy/utils/vector.c | 228 +--- lib/src/phy/utils/vector_simd.c | 1120 ++++++++++---------- 6 files changed, 1187 insertions(+), 788 deletions(-) create mode 100644 lib/src/phy/utils/test/vector_test.c diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index 4a55d18b6..0fadfb334 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -80,8 +80,8 @@ SRSLTE_API void srslte_vec_load_file(char *filename, void *buffer, uint32_t len) SRSLTE_API void srslte_vec_sum_ch(uint8_t *x, uint8_t *y, char *z, uint32_t len); SRSLTE_API void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sum_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); /* substract two vectors z=x-y */ SRSLTE_API void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len); @@ -91,7 +91,7 @@ SRSLTE_API void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_ema_filter(cf_t *new_data, cf_t *average, cf_t *output, float coeff, uint32_t len); /* Square distance */ -SRSLTE_API void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints); +//SRSLTE_API void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints); /* scalar addition */ SRSLTE_API void srslte_vec_sc_add_fff(float *x, float h, float *z, uint32_t len); @@ -132,7 +132,7 @@ SRSLTE_API void srslte_vec_prod_conj_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len /* real vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len); -SRSLTE_API void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); /* Dot-product */ SRSLTE_API cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len); diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 1010cbed6..8ea2ce9bc 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -35,47 +35,66 @@ extern "C" { #include #include "srslte/config.h" -SRSLTE_API int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len); +#ifdef LV_HAVE_AVX512 +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x3F) == 0) +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x1F) == 0) +#else /* LV_HAVE_AVX */ +#ifdef LV_HAVE_SSE +#define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x0F) == 0) +#else /* LV_HAVE_SSE */ +#define SRSLTE_IS_ALIGNED(PTR) (true) +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX */ +#endif /* LV_HAVE_AVX512 */ -SRSLTE_API int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len); +SRSLTE_API int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len); -SRSLTE_API void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); -SRSLTE_API void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sum_fff_sse(float *x, float *y, float *z, uint32_t len); +SRSLTE_API cf_t srslte_vec_acc_cc_simd(cf_t *x, int len); -SRSLTE_API void srslte_vec_sum_fff_avx(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_add_fff_simd(float *x, float *y, float *z, int len); -SRSLTE_API void srslte_vec_sub_fff_sse(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *y, float *z, int len); -SRSLTE_API void srslte_vec_sub_fff_avx(float *x, float *y, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, int len); -SRSLTE_API void srslte_vec_sc_prod_fff_sse(float *x, float h, float *z, uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len); -SRSLTE_API void srslte_vec_sc_prod_ccc_sse(cf_t *x, cf_t h, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_fff_simd(float *x, float *y, float *z, int len); -SRSLTE_API void srslte_vec_prod_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); -SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); -SRSLTE_API void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_cf_simd(float *a_re, float *a_im, float *b_re, float *b_im, float *r_re, float *r_im, int len); -SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_sse(cf_t *x, cf_t *y, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_c16_simd(int16_t *a_re, int16_t *a_im, int16_t *b_re, int16_t *b_im, int16_t *r_re, + int16_t *r_im, int len); -SRSLTE_API void srslte_vec_prod_conj_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); + +SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, int len); + +SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, int len); SRSLTE_API cf_t srslte_vec_dot_prod_ccc_sse(cf_t *x, cf_t *y, uint32_t len); +SRSLTE_API c16_t srslte_vec_dot_prod_ccc_c16i_simd(c16_t *x, c16_t *y, int len); + SRSLTE_API void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len); -SRSLTE_API void srslte_vec_abs_square_cf_sse(cf_t *x, float *z, uint32_t len); +SRSLTE_API void srslte_vec_abs_cf_simd(cf_t *x, float *z, int len); -SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len); +SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, int len); + +SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len); SRSLTE_API void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len); @@ -93,7 +112,9 @@ SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len); -SRSLTE_API void srslte_vec_sc_prod_cfc_avx(const cf_t *x,const float h,cf_t *y,const uint32_t len); +SRSLTE_API void srslte_vec_sc_prod_cfc_simd(const cf_t *x,const float h,cf_t *y,const int len); + +SRSLTE_API void srslte_vec_cp_simd(cf_t *src, cf_t *dst, int len); #ifdef __cplusplus } diff --git a/lib/src/phy/utils/test/CMakeLists.txt b/lib/src/phy/utils/test/CMakeLists.txt index 4dccbf2a0..76df7ac59 100644 --- a/lib/src/phy/utils/test/CMakeLists.txt +++ b/lib/src/phy/utils/test/CMakeLists.txt @@ -42,3 +42,6 @@ target_link_libraries(algebra_test srslte_phy) add_test(algebra_2x2_zf_solver_test algebra_test -z) add_test(algebra_2x2_mmse_solver_test algebra_test -m) + +add_executable(vector_test vector_test.c) +target_link_libraries(vector_test srslte_phy) \ No newline at end of file diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c new file mode 100644 index 000000000..e781d05b9 --- /dev/null +++ b/lib/src/phy/utils/test/vector_test.c @@ -0,0 +1,555 @@ +/** + * + * \section COPYRIGHT + * + * Copyright 2013-2015 Software Radio Systems Limited + * + * \section LICENSE + * + * This file is part of the srsLTE library. + * + * srsLTE is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of + * the License, or (at your option) any later version. + * + * srsLTE is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * A copy of the GNU Affero General Public License can be found in + * the LICENSE file in the top-level directory of this distribution + * and at http://www.gnu.org/licenses/. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "srslte/phy/utils/mat.h" +#include "srslte/phy/utils/simd.h" +#include "srslte/phy/utils/vector.h" + + +bool zf_solver = false; +bool mmse_solver = false; +bool verbose = false; + +#define MAX_MSE (1e-3) +#define NOF_REPETITIONS (1024*128) +#define MAX_FUNCTIONS (64) +#define MAX_BLOCKS (16) + +#define RANDOM_F() ((float)rand())/((float)RAND_MAX) +#define RANDOM_S() ((int16_t)(rand() && 0x800F)) +#define RANDOM_CF() (RANDOM_F() + _Complex_I*RANDOM_F()) + +#define TEST_CALL(TEST_CODE) gettimeofday(&start, NULL);\ + for (int i = 0; i < NOF_REPETITIONS; i++){TEST_CODE;}\ + gettimeofday(&end, NULL); \ + *timing = elapsed_us(&start, &end); + +#define TEST(X, CODE) static bool test_##X (char *func_name, double *timing, uint32_t block_size) {\ + struct timeval start, end;\ + float mse = 0.0f;\ + bool passed;\ + strncpy(func_name, #X, 32);\ + CODE;\ + passed = (mse < MAX_MSE);\ + printf("%32s (%5d) ... %7.1f MSamp/s ... %3s Passed\n", func_name, block_size, \ + (double) block_size*NOF_REPETITIONS/ *timing, passed?"":"Not");\ + return passed;\ +} + +#define MALLOC(TYPE, NAME) TYPE *NAME = srslte_vec_malloc(sizeof(TYPE)*block_size) + + +static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { + if (ts_end->tv_usec > ts_start->tv_usec) { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + + (double) ts_end->tv_usec - (double) ts_start->tv_usec; + } else { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec - 1) * 1000000 + + ((double) ts_end->tv_usec + 1000000) - (double) ts_start->tv_usec; + } +} + +float squared_error (cf_t a, cf_t b) { + float diff_re = __real__ a - __real__ b; + float diff_im = __imag__ a - __imag__ b; + return diff_re*diff_re + diff_im*diff_im; +} + +TEST(srslte_vec_dot_prod_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + int16_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(z = srslte_vec_dot_prod_sss(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * y[i]; + } + + mse += cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_sum_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_sum_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] + y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_sub_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_sub_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] - y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_prod_sss, + MALLOC(int16_t, x); + MALLOC(int16_t, y); + MALLOC(int16_t, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_S(); + y[i] = RANDOM_S(); + } + + TEST_CALL(srslte_vec_prod_sss(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_acc_cc, + MALLOC(cf_t, x); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + TEST_CALL(z = srslte_vec_acc_cc(x, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i]; + } + + mse += cabsf(gold - z)/cabsf(gold); + + free(x); +) + + +TEST(srslte_vec_sum_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_sum_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] + y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); +) + +TEST(srslte_vec_sub_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_sub_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] - y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); +) + +TEST(srslte_vec_dot_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(z = srslte_vec_dot_prod_ccc(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * y[i]; + } + + mse = cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_dot_prod_conj_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + cf_t z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(z = srslte_vec_dot_prod_conj_ccc(x, y, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i] * conjf(y[i]); + } + + mse = cabsf(gold - z) / cabsf(gold); + + free(x); + free(y); +) + +TEST(srslte_vec_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_prod_conj_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_conj_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * conjf(y[i]); + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_sc_prod_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, z); + cf_t y = RANDOM_F(); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_prod_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_prod_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_sc_prod_fff, + MALLOC(float, x); + MALLOC(float, z); + float y = RANDOM_F(); + + float gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_abs_cf, + MALLOC(cf_t, x); + MALLOC(float, z); + float gold; + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_abs_cf(x, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = sqrtf(crealf(x[i]) * crealf(x[i]) + cimagf(x[i])*cimagf(x[i])); + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_abs_square_cf, + MALLOC(cf_t, x); + MALLOC(float, z); + float gold; + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_abs_square_cf(x, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = crealf(x[i]) * crealf(x[i]) + cimagf(x[i])*cimagf(x[i]); + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +TEST(srslte_vec_sc_prod_cfc, + MALLOC(cf_t, x); + MALLOC(cf_t, z); + cf_t gold; + float h = RANDOM_F(); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_sc_prod_cfc(x, h, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * h; + mse += cabsf(gold - z[i]); + } + + free(x); + free(z); +) + +int main(int argc, char **argv) { + char func_names[MAX_FUNCTIONS][32]; + double timmings[MAX_FUNCTIONS][MAX_BLOCKS]; + uint32_t sizes[32]; + uint32_t size_count = 0; + uint32_t func_count = 0; + bool passed = true; + + for (uint32_t block_size = 1; block_size <= 1024*16; block_size *= 2) { + func_count = 0; + + passed &= test_srslte_vec_dot_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_sum_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_sub_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_sum_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_sub_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_dot_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_dot_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_sc_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_sc_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_abs_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_abs_square_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed &= test_srslte_vec_sc_prod_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + sizes[size_count] = block_size; + size_count++; + } + + printf("\n"); + printf("%32s |", "Subroutine/MSps"); + for (int i = 0; i < size_count; i++) { + printf(" %7d", sizes[i]); + } + printf(" |\n"); + + for (int j = 0; j < 32; j++) { + printf("-"); + } + printf("-+-"); + for (int j = 0; j < size_count; j++) { + printf("--------"); + } + printf("-|\n"); + + for (int i = 0; i < func_count; i++) { + printf("%32s | ", func_names[i]); + for (int j = 0; j < size_count; j++) { + printf(" %7.1f", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + } + printf(" |\n"); + } + + return (passed)?SRSLTE_SUCCESS:SRSLTE_ERROR; +} diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 917810e92..cb21f24f1 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -36,25 +36,6 @@ #include "srslte/phy/utils/bit.h" -#ifdef LV_HAVE_SSE -#include -#endif - -#ifdef LV_HAVE_AVX -#include -#endif - - -#ifdef HAVE_VOLK -#include "volk/volk.h" -#endif - -#ifdef DEBUG_MODE -#warning FIXME: Disabling SSE/AVX vector code -#undef LV_HAVE_SSE -#undef LV_HAVE_AVX -#endif - int srslte_vec_acc_ii(int *x, uint32_t len) { int i; @@ -88,51 +69,25 @@ void srslte_vec_ema_filter(cf_t *new_data, cf_t *average, cf_t *output, float co } cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len) { - int i; - cf_t z=0; - for (i=0;i #include #include #include #include - -#include "srslte/phy/utils/vector_simd.h" - #include #include -#ifdef LV_HAVE_SSE -#include -#endif - -#ifdef LV_HAVE_AVX -#include -#endif +#include +#include "srslte/phy/utils/vector_simd.h" +#include "srslte/phy/utils/simd.h" -int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len) -{ - int result = 0; -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; +int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len) { + int i = 0; + int result = 0; +#if SRSLTE_SIMD_S_SIZE + simd_s_t simd_dotProdVal = srslte_simd_s_zero(); + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); - const __m128i* xPtr = (const __m128i*) x; - const __m128i* yPtr = (const __m128i*) y; - - __m128i dotProdVal = _mm_setzero_si128(); + simd_s_t z = srslte_simd_s_mul(a, b); - __m128i xVal, yVal, zVal; - for(;number < points; number++){ + simd_dotProdVal = srslte_simd_s_add(simd_dotProdVal, z); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); - xVal = _mm_load_si128(xPtr); - yVal = _mm_loadu_si128(yPtr); + simd_s_t z = srslte_simd_s_mul(a, b); - zVal = _mm_mullo_epi16(xVal, yVal); - - dotProdVal = _mm_add_epi16(dotProdVal, zVal); - - xPtr ++; - yPtr ++; + simd_dotProdVal = srslte_simd_s_add(simd_dotProdVal, z); + } } - - short dotProdVector[8]; - _mm_store_si128((__m128i*) dotProdVector, dotProdVal); - for (int i=0;i<8;i++) { - result += dotProdVector[i]; + __attribute__ ((aligned (SRSLTE_SIMD_S_SIZE*2))) short dotProdVector[SRSLTE_SIMD_S_SIZE]; + srslte_simd_s_store(dotProdVector, simd_dotProdVal); + for (int k = 0; k < SRSLTE_SIMD_S_SIZE; k++) { + result += dotProdVector[k]; + } +#endif /* SRSLTE_SIMD_S_SIZE */ + + for(; i < len; i++){ + result += (x[i] * y[i]); } - number = points * 8; - for(;number < len; number++){ - result += (x[number] * y[number]); - } - -#endif return result; } +void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#ifdef SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); -int srslte_vec_dot_prod_sss_avx2(short *x, short *y, uint32_t len) -{ - int result = 0; -#ifdef LV_HAVE_AVX2 - unsigned int number = 0; - const unsigned int points = len / 16; + simd_s_t r = srslte_simd_s_add(a, b); - const __m256i* xPtr = (const __m256i*) x; - const __m256i* yPtr = (const __m256i*) y; - - __m256i dotProdVal = _mm256_setzero_si256(); + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); - __m256i xVal, yVal, zVal; - for(;number < points; number++){ + simd_s_t r = srslte_simd_s_add(a, b); - xVal = _mm256_load_si256(xPtr); - yVal = _mm256_loadu_si256(yPtr); - zVal = _mm256_mullo_epi16(xVal, yVal); - dotProdVal = _mm256_add_epi16(dotProdVal, zVal); - xPtr ++; - yPtr ++; - } - - __attribute__ ((aligned (256))) short dotProdVector[16]; - _mm256_store_si256((__m256i*) dotProdVector, dotProdVal); - for (int i=0;i<16;i++) { - result += dotProdVector[i]; + srslte_simd_s_storeu(&z[i], r); + } } +#endif /* SRSLTE_SIMD_S_SIZE */ - number = points * 16; - for(;number < len; number++){ - result += (x[number] * y[number]); + for(; i < len; i++){ + z[i] = x[i] + y[i]; } - -#endif - return result; } +void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#ifdef SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); + simd_s_t r = srslte_simd_s_sub(a, b); -void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len) -{ -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); - const __m128i* xPtr = (const __m128i*) x; - const __m128i* yPtr = (const __m128i*) y; - __m128i* zPtr = (__m128i*) z; + simd_s_t r = srslte_simd_s_sub(a, b); - __m128i xVal, yVal, zVal; - for(;number < points; number++){ - - xVal = _mm_load_si128(xPtr); - yVal = _mm_load_si128(yPtr); - - zVal = _mm_add_epi16(xVal, yVal); - - _mm_store_si128(zPtr, zVal); - - xPtr ++; - yPtr ++; - zPtr ++; + srslte_simd_s_storeu(&z[i], r); + } } +#endif /* SRSLTE_SIMD_S_SIZE */ - number = points * 8; - for(;number < len; number++){ - z[number] = x[number] + y[number]; + for(; i < len; i++){ + z[i] = x[i] - y[i]; } -#endif - } -void srslte_vec_sum_sss_avx2(short *x, short *y, short *z, uint32_t len) -{ -#ifdef LV_HAVE_AVX2 - unsigned int number = 0; - const unsigned int points = len / 16; +void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { + int i = 0; +#ifdef SRSLTE_SIMD_S_SIZE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_load(&x[i]); + simd_s_t b = srslte_simd_s_load(&y[i]); - const __m256i* xPtr = (const __m256i*) x; - const __m256i* yPtr = (const __m256i*) y; - __m256i* zPtr = (__m256i*) z; + simd_s_t r = srslte_simd_s_mul(a, b); - __m256i xVal, yVal, zVal; - for(;number < points; number++){ + srslte_simd_s_store(&z[i], r); + } + } else { + for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { + simd_s_t a = srslte_simd_s_loadu(&x[i]); + simd_s_t b = srslte_simd_s_loadu(&y[i]); - xVal = _mm256_load_si256(xPtr); - yVal = _mm256_loadu_si256(yPtr); + simd_s_t r = srslte_simd_s_mul(a, b); - zVal = _mm256_add_epi16(xVal, yVal); - _mm256_store_si256(zPtr, zVal); - - xPtr ++; - yPtr ++; - zPtr ++; + srslte_simd_s_storeu(&z[i], r); + } } +#endif /* SRSLTE_SIMD_S_SIZE */ - number = points * 16; - for(;number < len; number++){ - z[number] = x[number] + y[number]; + for(; i < len; i++){ + z[i] = x[i] * y[i]; } -#endif - -} - - -void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len) -{ -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; - - const __m128i* xPtr = (const __m128i*) x; - const __m128i* yPtr = (const __m128i*) y; - __m128i* zPtr = (__m128i*) z; - - __m128i xVal, yVal, zVal; - for(;number < points; number++){ - - xVal = _mm_load_si128(xPtr); - yVal = _mm_load_si128(yPtr); - - zVal = _mm_sub_epi16(xVal, yVal); - - _mm_store_si128(zPtr, zVal); - - xPtr ++; - yPtr ++; - zPtr ++; - } - - number = points * 8; - for(;number < len; number++){ - z[number] = x[number] - y[number]; - } -#endif -} - -void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len) -{ -#ifdef LV_HAVE_AVX2 - unsigned int number = 0; - const unsigned int points = len / 16; - - const __m256i* xPtr = (const __m256i*) x; - const __m256i* yPtr = (const __m256i*) y; - __m256i* zPtr = (__m256i*) z; - - __m256i xVal, yVal, zVal; - for(;number < points; number++){ - - xVal = _mm256_load_si256(xPtr); - yVal = _mm256_loadu_si256(yPtr); - - zVal = _mm256_sub_epi16(xVal, yVal); - - _mm256_store_si256(zPtr, zVal); - - xPtr ++; - yPtr ++; - zPtr ++; - } - - number = points * 16; - for(;number < len; number++){ - z[number] = x[number] - y[number]; - } - #endif } -void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len) -{ -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; - - const __m128i* xPtr = (const __m128i*) x; - const __m128i* yPtr = (const __m128i*) y; - __m128i* zPtr = (__m128i*) z; - - __m128i xVal, yVal, zVal; - for(;number < points; number++){ - - xVal = _mm_load_si128(xPtr); - yVal = _mm_load_si128(yPtr); - - zVal = _mm_mullo_epi16(xVal, yVal); - - _mm_store_si128(zPtr, zVal); - - xPtr ++; - yPtr ++; - zPtr ++; - } - - number = points * 8; - for(;number < len; number++){ - z[number] = x[number] * y[number]; - } -#endif -} - -void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len) -{ -#ifdef LV_HAVE_AVX2 - unsigned int number = 0; - const unsigned int points = len / 16; - - const __m256i* xPtr = (const __m256i*) x; - const __m256i* yPtr = (const __m256i*) y; - __m256i* zPtr = (__m256i*) z; - - __m256i xVal, yVal, zVal; - for(;number < points; number++){ - - xVal = _mm256_loadu_si256(xPtr); - yVal = _mm256_loadu_si256(yPtr); - - zVal = _mm256_mullo_epi16(xVal, yVal); - - _mm256_storeu_si256(zPtr, zVal); - - xPtr ++; - yPtr ++; - zPtr ++; - } - - number = points * 16; - for(;number < len; number++){ - z[number] = x[number] * y[number]; - } -#endif -} - - - - - - +#warning remove function if it is not used +/* void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len) { #ifdef LV_HAVE_SSE @@ -357,8 +195,10 @@ void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len) z[number] = x[number] / divn; } #endif -} +}*/ +#warning remove function if it is not used +/* void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len) { #ifdef LV_HAVE_AVX2 @@ -387,7 +227,7 @@ void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len) z[number] = x[number] / divn; } #endif -} +}*/ @@ -531,379 +371,527 @@ void srslte_vec_sum_fff_avx(float *x, float *y, float *z, uint32_t len) { #endif } -void srslte_vec_sub_fff_sse(float *x, float *y, float *z, uint32_t len) { -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 4; +cf_t srslte_vec_acc_cc_simd(cf_t *x, int len) { + int i = 0; + cf_t acc_sum = 0.0f; - const float* xPtr = (const float*) x; - const float* yPtr = (const float*) y; - float* zPtr = (float*) z; +#if SRSLTE_SIMD_F_SIZE + simd_f_t simd_sum = srslte_simd_f_zero(); - __m128 xVal, yVal, zVal; - for(;number < points; number++){ + if (SRSLTE_IS_ALIGNED(x)) { + for (; i < len - SRSLTE_SIMD_F_SIZE / 2 + 1; i += SRSLTE_SIMD_F_SIZE / 2) { + simd_f_t a = srslte_simd_f_load((float *) &x[i]); - xVal = _mm_loadu_ps(xPtr); - yVal = _mm_loadu_ps(yPtr); + simd_sum = srslte_simd_f_add(simd_sum, a); + } + } else { + for (; i < len - SRSLTE_SIMD_F_SIZE / 2 + 1; i += SRSLTE_SIMD_F_SIZE / 2) { + simd_f_t a = srslte_simd_f_loadu((float *) &x[i]); - zVal = _mm_sub_ps(xVal, yVal); - - _mm_storeu_ps(zPtr, zVal); - - xPtr += 4; - yPtr += 4; - zPtr += 4; + simd_sum = srslte_simd_f_add(simd_sum, a); + } } - for(number = points * 4;number < len; number++){ - z[number] = x[number] - y[number]; + __attribute__((aligned(64))) cf_t sum[SRSLTE_SIMD_F_SIZE/2]; + srslte_simd_f_store((float*)&sum, simd_sum); + for (int k = 0; k < SRSLTE_SIMD_F_SIZE/2; k++) { + acc_sum += sum[k]; } #endif + + for (; i Date: Mon, 25 Sep 2017 17:08:11 +0200 Subject: [PATCH 21/55] Solved bugs and compilation error in simd and vector_simd --- lib/include/srslte/phy/utils/simd.h | 20 ++++++++++---------- lib/include/srslte/phy/utils/vector_simd.h | 2 +- lib/src/phy/utils/test/vector_test.c | 8 ++++---- lib/src/phy/utils/vector_simd.c | 10 +++++----- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 774dd54bd..22d8db79d 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -226,7 +226,7 @@ static inline simd_f_t srslte_simd_f_mul(simd_f_t a, simd_f_t b) { static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { #ifdef LV_HAVE_AVX512 __m512 r = _mm512_add_ps(a, b); - return _mm512_mask_sub_ps(r, 0b1010101010101010, a, b); + return _mm512_mask_sub_ps(r, 0b0101010101010101, a, b); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 return _mm256_addsub_ps(a, b); @@ -642,10 +642,10 @@ static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { return _mm512_load_si512(ptr); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - return _mm256_load_si256(ptr); + return _mm256_load_si256((__m256i*) ptr); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - return _mm_load_si128(ptr); + return _mm_load_si128((__m128i*) ptr); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -653,13 +653,13 @@ static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { #ifdef LV_HAVE_AVX512 - return _mm512_load_si512(ptr); + return _mm512_loadu_si512(ptr); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - return _mm256_load_si256(ptr); + return _mm256_loadu_si256((__m256i*) ptr); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - return _mm_load_si128(ptr); + return _mm_loadu_si128((__m128i*) ptr); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -670,10 +670,10 @@ static inline void srslte_simd_s_store(int16_t *ptr, simd_s_t simdreg) { _mm512_store_si512(ptr, simdreg); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - _mm256_store_si256(ptr, simdreg); + _mm256_store_si256((__m256i*) ptr, simdreg); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - _mm_store_si128(ptr, simdreg); + _mm_store_si128((__m128i*) ptr, simdreg); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -684,10 +684,10 @@ static inline void srslte_simd_s_storeu(int16_t *ptr, simd_s_t simdreg) { _mm512_storeu_si512(ptr, simdreg); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - _mm256_storeu_si256(ptr, simdreg); + _mm256_storeu_si256((__m256i*) ptr, simdreg); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - _mm_storeu_si128(ptr, simdreg); + _mm_storeu_si128((__m128i*) ptr, simdreg); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 8ea2ce9bc..4ee839fab 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -44,7 +44,7 @@ extern "C" { #ifdef LV_HAVE_SSE #define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x0F) == 0) #else /* LV_HAVE_SSE */ -#define SRSLTE_IS_ALIGNED(PTR) (true) +#define SRSLTE_IS_ALIGNED(PTR) (1) #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX512 */ diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index e781d05b9..05dce1d35 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -45,7 +45,7 @@ bool mmse_solver = false; bool verbose = false; #define MAX_MSE (1e-3) -#define NOF_REPETITIONS (1024*128) +#define NOF_REPETITIONS (1024) #define MAX_FUNCTIONS (64) #define MAX_BLOCKS (16) @@ -70,7 +70,7 @@ bool verbose = false; return passed;\ } -#define MALLOC(TYPE, NAME) TYPE *NAME = srslte_vec_malloc(sizeof(TYPE)*block_size) +#define MALLOC(TYPE, NAME) TYPE *NAME = malloc(sizeof(TYPE)*block_size) static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { @@ -339,7 +339,7 @@ TEST(srslte_vec_prod_conj_ccc, TEST(srslte_vec_sc_prod_ccc, MALLOC(cf_t, x); MALLOC(cf_t, z); - cf_t y = RANDOM_F(); + cf_t y = RANDOM_CF(); cf_t gold; for (int i = 0; i < block_size; i++) { @@ -469,7 +469,7 @@ int main(int argc, char **argv) { uint32_t func_count = 0; bool passed = true; - for (uint32_t block_size = 1; block_size <= 1024*16; block_size *= 2) { + for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) { func_count = 0; passed &= test_srslte_vec_dot_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 21132390f..2eb0428b7 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -77,7 +77,7 @@ int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len) { void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { int i = 0; -#ifdef SRSLTE_SIMD_S_SIZE +#if SRSLTE_SIMD_S_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { simd_s_t a = srslte_simd_s_load(&x[i]); @@ -106,7 +106,7 @@ void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { int i = 0; -#ifdef SRSLTE_SIMD_S_SIZE +#if SRSLTE_SIMD_S_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { simd_s_t a = srslte_simd_s_load(&x[i]); @@ -135,7 +135,7 @@ void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { int i = 0; -#ifdef SRSLTE_SIMD_S_SIZE +#if SRSLTE_SIMD_S_SIZE if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) { simd_s_t a = srslte_simd_s_load(&x[i]); @@ -721,14 +721,14 @@ void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len) { } } else { for (; i < len - SRSLTE_SIMD_F_SIZE / 2 + 1; i += SRSLTE_SIMD_F_SIZE / 2) { - simd_f_t temp = srslte_simd_f_load((float *) &x[i]); + simd_f_t temp = srslte_simd_f_loadu((float *) &x[i]); simd_f_t m1 = srslte_simd_f_mul(hre, temp); simd_f_t sw = srslte_simd_f_swap(temp); simd_f_t m2 = srslte_simd_f_mul(him, sw); simd_f_t r = srslte_simd_f_addsub(m1, m2); - srslte_simd_f_store((float *) &z[i], r); + srslte_simd_f_storeu((float *) &z[i], r); } } #endif From 9e5f999666a97b82fa0558b28294a7b9f62becbf Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Thu, 28 Sep 2017 11:04:26 +0200 Subject: [PATCH 22/55] Added more functions --- lib/include/srslte/phy/utils/simd.h | 237 ++++++++++ lib/include/srslte/phy/utils/vector.h | 5 +- lib/include/srslte/phy/utils/vector_simd.h | 63 +-- lib/src/phy/mimo/precoding.c | 3 +- lib/src/phy/sync/find_sss.c | 8 +- lib/src/phy/utils/test/CMakeLists.txt | 3 +- lib/src/phy/utils/test/vector_test.c | 287 +++++++++++- lib/src/phy/utils/vector.c | 134 +----- lib/src/phy/utils/vector_simd.c | 493 ++++++++++++++++----- 9 files changed, 939 insertions(+), 294 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 22d8db79d..9a5f15dbb 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -88,6 +88,8 @@ #define SRSLTE_SIMD_F_SIZE 16 #define SRSLTE_SIMD_CF_SIZE 16 +#define SRSLTE_SIMD_I_SIZE 16 + #define SRSLTE_SIMD_S_SIZE 32 #define SRSLTE_SIMD_C16_SIZE 0 @@ -97,6 +99,8 @@ #define SRSLTE_SIMD_F_SIZE 8 #define SRSLTE_SIMD_CF_SIZE 8 +#define SRSLTE_SIMD_I_SIZE 8 + #define SRSLTE_SIMD_S_SIZE 16 #define SRSLTE_SIMD_C16_SIZE 16 @@ -106,6 +110,8 @@ #define SRSLTE_SIMD_F_SIZE 4 #define SRSLTE_SIMD_CF_SIZE 4 +#define SRSLTE_SIMD_I_SIZE 4 + #define SRSLTE_SIMD_S_SIZE 8 #define SRSLTE_SIMD_C16_SIZE 8 @@ -114,6 +120,8 @@ #define SRSLTE_SIMD_F_SIZE 0 #define SRSLTE_SIMD_CF_SIZE 0 +#define SRSLTE_SIMD_I_SIZE 0 + #define SRSLTE_SIMD_S_SIZE 0 #define SRSLTE_SIMD_C16_SIZE 0 @@ -223,6 +231,20 @@ static inline simd_f_t srslte_simd_f_mul(simd_f_t a, simd_f_t b) { #endif /* LV_HAVE_AVX512 */ } +static inline simd_f_t srslte_simd_f_rcp(simd_f_t a) { +#ifdef LV_HAVE_AVX512 + return _mm512_rcp_ps(a); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_rcp_ps(a); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return _mm_rcp_ps(a); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { #ifdef LV_HAVE_AVX512 __m512 r = _mm512_add_ps(a, b); @@ -600,6 +622,61 @@ static inline simd_cf_t srslte_simd_cf_add (simd_cf_t a, simd_cf_t b) { return ret; } +static inline simd_cf_t srslte_simd_cf_mul (simd_cf_t a, simd_f_t b) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + b = _mm512_permutexvar_ps(b, _mm512_setr_epi32(0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15)); + ret.re = _mm512_mul_ps(a.re, b); + ret.im = _mm512_mul_ps(a.im, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + b = _mm256_permutevar8x32_ps(b, _mm256_setr_epi32(0,4,1,5,2,6,3,7)); + ret.re = _mm256_mul_ps(a.re, b); + ret.im = _mm256_mul_ps(a.im, b); +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE + ret.re = _mm_mul_ps(a.re, b); + ret.im = _mm_mul_ps(a.im, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + +static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { + simd_cf_t ret; +#ifdef LV_HAVE_AVX512 + simd_f_t a2re = _mm512_mul_ps(a.re, a.re); + simd_f_t a2im = _mm512_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm512_add_ps(a2re, a2im); + simd_f_t rcp = _mm512_rcp_ps(mod2); + simd_f_t neg_a_im = _mm512_xor_ps(_mm512_set1_ps(-0.0f), a.im); + ret.re = _mm512_mul_ps(a.re, rcp); + ret.im = _mm512_mul_ps(neg_a_im, rcp); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + simd_f_t a2re = _mm256_mul_ps(a.re, a.re); + simd_f_t a2im = _mm256_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm256_add_ps(a2re, a2im); + simd_f_t rcp = _mm256_rcp_ps(mod2); + simd_f_t neg_a_im = _mm256_xor_ps(_mm256_set1_ps(-0.0f), a.im); + ret.re = _mm256_mul_ps(a.re, rcp); + ret.im = _mm256_mul_ps(neg_a_im, rcp); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + simd_f_t a2re = _mm_mul_ps(a.re, a.re); + simd_f_t a2im = _mm_mul_ps(a.im, a.im); + simd_f_t mod2 = _mm_add_ps(a2re, a2im); + simd_f_t rcp = _mm_rcp_ps(mod2); + simd_f_t neg_a_im = _mm_xor_ps(_mm_set1_ps(-0.0f), a.im); + ret.re = _mm_mul_ps(a.re, rcp); + ret.im = _mm_mul_ps(neg_a_im, rcp); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + return ret; +} + static inline simd_cf_t srslte_simd_cf_zero (void) { simd_cf_t ret; #ifdef LV_HAVE_AVX512 @@ -621,6 +698,106 @@ static inline simd_cf_t srslte_simd_cf_zero (void) { #endif /* SRSLTE_SIMD_CF_SIZE */ +#if SRSLTE_SIMD_I_SIZE + +#ifdef LV_HAVE_AVX512 +typedef __m512i simd_i_t; +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 +typedef __m256i simd_i_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef __m128i simd_i_t; +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ + +static inline simd_i_t srslte_simd_i_load(int *x) { +#ifdef LV_HAVE_AVX512 + return _mm512_load_epi32((__m512i*)x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_load_si256((__m256i*)x); +#else + #ifdef LV_HAVE_SSE + return _mm_load_si128((__m128i*)x); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline void srslte_simd_i_store(int *x, simd_i_t reg) { +#ifdef LV_HAVE_AVX512 + _mm512_store_epi32((__m512i*)x, reg); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + _mm256_store_si256((__m256i*)x, reg); +#else +#ifdef LV_HAVE_SSE + _mm_store_si128((__m128i*)x, reg); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_set1(int x) { +#ifdef LV_HAVE_AVX512 + return _mm512_set1_epi32(x); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_set1_epi32(x); +#else + #ifdef LV_HAVE_SSE + return _mm_set1_epi32(x); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_add(simd_i_t a, simd_i_t b) { +#ifdef LV_HAVE_AVX512 + return _mm512_add_epi32(a, b); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return _mm256_add_epi32(a, b); +#else +#ifdef LV_HAVE_SSE + return _mm_add_epi32(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + return (simd_i_t) _mm512_cmp_ps_mask(a, b, _CMP_GT_OS); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return (simd_i_t) _mm256_cmp_ps(a, b, _CMP_GT_OS); +#else /* LV_HAVE_AVX2 */ + #ifdef LV_HAVE_SSE + return (simd_i_t) _mm_cmpgt_ps(a, b); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_i_t selector) { +#ifdef LV_HAVE_AVX512 + return (__m512i) _mm512_blendv_ps((__m512)a, (__m512) b, (__m512) selector); +#else /* LV_HAVE_AVX512 */ +#ifdef LV_HAVE_AVX2 + return (__m256i) _mm256_blendv_ps((__m256) a,(__m256) b,(__m256) selector); +#else + #ifdef LV_HAVE_SSE + return (__m128i) _mm_blendv_ps((__m128)a, (__m128)b, (__m128)selector); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ +} + +#endif /* SRSLTE_SIMD_I_SIZE*/ + #if SRSLTE_SIMD_S_SIZE @@ -829,6 +1006,20 @@ static inline simd_c16_t srslte_simd_c16_load(int16_t *re, int16_t *im) { return ret; } +static inline simd_c16_t srslte_simd_c16_loadu(int16_t *re, int16_t *im) { + simd_c16_t ret; +#ifdef LV_HAVE_AVX2 + ret.re.m256 = _mm256_loadu_si256((__m256i*)(re)); + ret.im.m256 = _mm256_loadu_si256((__m256i*)(im)); +#else +#ifdef LV_HAVE_SSE + ret.re.m128 = _mm_loadu_si128((__m128i*)(re)); + ret.im.m128 = _mm_loadu_si128((__m128i*)(im)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ + return ret; +} + static inline void srslte_simd_c16i_store(c16_t *ptr, simd_c16_t simdreg) { #ifdef LV_HAVE_AVX2 __m256i re_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.re.m256, 0b10110001), 0b10110001); @@ -845,6 +1036,22 @@ static inline void srslte_simd_c16i_store(c16_t *ptr, simd_c16_t simdreg) { #endif /* LV_HAVE_AVX2 */ } +static inline void srslte_simd_c16i_storeu(c16_t *ptr, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + __m256i re_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.re.m256, 0b10110001), 0b10110001); + __m256i im_sw = _mm256_shufflelo_epi16(_mm256_shufflehi_epi16(simdreg.im.m256, 0b10110001), 0b10110001); + _mm256_storeu_si256((__m256i *) (ptr), _mm256_blend_epi16(simdreg.re.m256, im_sw, 0b10101010)); + _mm256_storeu_si256((__m256i *) (ptr + 8), _mm256_blend_epi16(re_sw, simdreg.im.m256, 0b10101010)); +#else +#ifdef LV_HAVE_SSE + __m128i re_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.re.m128, 0b10110001), 0b10110001); + __m128i im_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.im.m128, 0b10110001), 0b10110001); + _mm_storeu_si128((__m128i *) (ptr), _mm_blend_epi16(simdreg.re.m128, im_sw, 0b10101010)); + _mm_storeu_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + static inline void srslte_simd_c16_store(int16_t *re, int16_t *im, simd_c16_t simdreg) { #ifdef LV_HAVE_AVX2 _mm256_store_si256((__m256i *) re, simdreg.re.m256); @@ -857,6 +1064,18 @@ static inline void srslte_simd_c16_store(int16_t *re, int16_t *im, simd_c16_t si #endif /* LV_HAVE_AVX2 */ } +static inline void srslte_simd_c16_storeu(int16_t *re, int16_t *im, simd_c16_t simdreg) { +#ifdef LV_HAVE_AVX2 + _mm256_storeu_si256((__m256i *) re, simdreg.re.m256); + _mm256_storeu_si256((__m256i *) im, simdreg.im.m256); +#else +#ifdef LV_HAVE_SSE + _mm_storeu_si128((__m128i *) re, simdreg.re.m128); + _mm_storeu_si128((__m128i *) im, simdreg.im.m128); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + static inline simd_c16_t srslte_simd_c16_prod (simd_c16_t a, simd_c16_t b) { simd_c16_t ret; #ifdef LV_HAVE_AVX2 @@ -905,6 +1124,24 @@ static inline simd_c16_t srslte_simd_c16_zero (void) { #endif /* SRSLTE_SIMD_C16_SIZE */ +#if SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE +static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX2 + __m256 aa = _mm256_permute2f128_ps(a, b, 0x20); + __m256 bb = _mm256_permute2f128_ps(a, b, 0x31); + __m256i ai = _mm256_cvttps_epi32(aa); + __m256i bi = _mm256_cvttps_epi32(bb); + return _mm256_packs_epi32(ai, bi); +#else +#ifdef LV_HAVE_SSE + __m128i ai = _mm_cvttps_epi32(a); + __m128i bi = _mm_cvttps_epi32(b); + return _mm_packs_epi32(ai, bi); +#endif /* LV_HAVE_SSE */ +#endif /* LV_HAVE_AVX2 */ +} + +#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_C16_SIZE */ #endif //SRSLTE_SIMD_H_H diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index 0fadfb334..9b99c6fff 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -123,6 +123,7 @@ SRSLTE_API void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint /* vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_prod_ccc_split(float *x_re, float *x_im, float *y_re, float *y_im, float *z_re, float *z_im, uint32_t len); /* vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_cfc(cf_t *x, float *y, cf_t *z, uint32_t len); @@ -142,8 +143,8 @@ SRSLTE_API float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len); SRSLTE_API int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len); /* z=x/y vector division (element-wise) */ -SRSLTE_API void srslte_vec_div_ccc(cf_t *x, cf_t *y, float *y_mod, cf_t *z, float *z_real, float *z_imag, uint32_t len); -void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, float *z_real, float *z_imag, uint32_t len); +SRSLTE_API void srslte_vec_div_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); +SRSLTE_API void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_div_fff(float *x, float *y, float *z, uint32_t len); /* conjugate */ diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 4ee839fab..294cff50f 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -36,26 +36,29 @@ extern "C" { #include "srslte/config.h" #ifdef LV_HAVE_AVX512 +#define SRSLTE_SIMD_BIT_ALIGN 512 #define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x3F) == 0) #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX +#define SRSLTE_SIMD_BIT_ALIGN 256 #define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x1F) == 0) #else /* LV_HAVE_AVX */ #ifdef LV_HAVE_SSE +#define SRSLTE_SIMD_BIT_ALIGN 128 #define SRSLTE_IS_ALIGNED(PTR) (((size_t)(PTR) & 0x0F) == 0) #else /* LV_HAVE_SSE */ +#define SRSLTE_SIMD_BIT_ALIGN 64 #define SRSLTE_IS_ALIGNED(PTR) (1) #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX512 */ -SRSLTE_API int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len); - +/* SIMD Basic vector math */ SRSLTE_API void srslte_vec_sum_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); SRSLTE_API void srslte_vec_sub_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); -SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len); +SRSLTE_API float srslte_vec_acc_ff_simd(float *x, int len); SRSLTE_API cf_t srslte_vec_acc_cc_simd(cf_t *x, int len); @@ -63,59 +66,63 @@ SRSLTE_API void srslte_vec_add_fff_simd(float *x, float *y, float *z, int len); SRSLTE_API void srslte_vec_sub_fff_simd(float *x, float *y, float *z, int len); +/* SIMD Vector Scalar Product */ +SRSLTE_API void srslte_vec_sc_prod_cfc_simd(const cf_t *x,const float h,cf_t *y,const int len); + SRSLTE_API void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, int len); SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len); +/* SIMD Vector Product */ +SRSLTE_API void srslte_vec_prod_ccc_split_simd(float *a_re, float *a_im, float *b_re, float *b_im, float *r_re, float *r_im, int len); + +SRSLTE_API void srslte_vec_prod_ccc_c16_simd(int16_t *a_re, int16_t *a_im, int16_t *b_re, int16_t *b_im, int16_t *r_re, + int16_t *r_im, int len); + +SRSLTE_API void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); + +SRSLTE_API void srslte_vec_prod_cfc_simd(cf_t *x, float *y, cf_t *z, int len); + SRSLTE_API void srslte_vec_prod_fff_simd(float *x, float *y, float *z, int len); SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); -SRSLTE_API void srslte_vec_prod_ccc_cf_simd(float *a_re, float *a_im, float *b_re, float *b_im, float *r_re, float *r_im, int len); +/* SIMD Division */ +SRSLTE_API void srslte_vec_div_ccc_simd(cf_t *x,cf_t *y, cf_t *z, int len); -SRSLTE_API void srslte_vec_prod_ccc_c16_simd(int16_t *a_re, int16_t *a_im, int16_t *b_re, int16_t *b_im, int16_t *r_re, - int16_t *r_im, int len); +SRSLTE_API void srslte_vec_div_cfc_simd(cf_t *x, float *y, cf_t *z, int len); -SRSLTE_API void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len); +SRSLTE_API void srslte_vec_div_fff_simd(float *x, float *y, float *z, int len); +/* SIMD Dot product */ SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, int len); SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, int len); -SRSLTE_API cf_t srslte_vec_dot_prod_ccc_sse(cf_t *x, cf_t *y, uint32_t len); - SRSLTE_API c16_t srslte_vec_dot_prod_ccc_c16i_simd(c16_t *x, c16_t *y, int len); -SRSLTE_API void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len); +SRSLTE_API int srslte_vec_dot_prod_sss_simd(int16_t *x, int16_t *y, int len); +/* SIMD Modulus functions */ SRSLTE_API void srslte_vec_abs_cf_simd(cf_t *x, float *z, int len); SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, int len); -SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len); +/* Other Functions */ +SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, int len); -SRSLTE_API void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len); - -SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len); - -SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len); - -SRSLTE_API void srslte_vec_mult_scalar_cf_f_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len); - -SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len); - -SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len); - -SRSLTE_API void srslte_vec_sc_prod_cfc_simd(const cf_t *x,const float h,cf_t *y,const int len); +SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, int len); SRSLTE_API void srslte_vec_cp_simd(cf_t *src, cf_t *dst, int len); + +/* SIMD Find Max functions */ +SRSLTE_API uint32_t srslte_vec_max_fi_simd(float *x, int len); + +SRSLTE_API uint32_t srslte_vec_max_ci_simd(cf_t *x, int len); + #ifdef __cplusplus } #endif diff --git a/lib/src/phy/mimo/precoding.c b/lib/src/phy/mimo/precoding.c index f1aab3b5d..8f8bd7737 100644 --- a/lib/src/phy/mimo/precoding.c +++ b/lib/src/phy/mimo/precoding.c @@ -36,17 +36,16 @@ #ifdef LV_HAVE_SSE #include -#include "srslte/phy/utils/mat.h" int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate); int srslte_predecoding_diversity2_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_rxant, int nof_symbols); #endif #ifdef LV_HAVE_AVX #include -#include "srslte/phy/utils/mat.h" int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate); #endif +#include "srslte/phy/utils/mat.h" static srslte_mimo_decoder_t mimo_decoder = SRSLTE_MIMO_DECODER_MMSE; diff --git a/lib/src/phy/sync/find_sss.c b/lib/src/phy/sync/find_sss.c index 2afeced42..082aee52a 100644 --- a/lib/src/phy/sync/find_sss.c +++ b/lib/src/phy/sync/find_sss.c @@ -70,14 +70,12 @@ static void corr_all_sz_partial(cf_t z[SRSLTE_SSS_N], float s[SRSLTE_SSS_N][SRSL static void extract_pair_sss(srslte_sss_synch_t *q, cf_t *input, cf_t *ce, cf_t y[2][SRSLTE_SSS_N]) { cf_t input_fft[SRSLTE_SYMBOL_SZ_MAX]; - float ce_mod[2*SRSLTE_SSS_N], z_real[2*SRSLTE_SSS_N], z_imag[2*SRSLTE_SSS_N]; - + srslte_dft_run_c(&q->dftp_input, input, input_fft); if (ce) { - srslte_vec_div_ccc(&input_fft[q->fft_size/2-SRSLTE_SSS_N], ce, ce_mod, - &input_fft[q->fft_size/2-SRSLTE_SSS_N], z_real, z_imag, - 2*SRSLTE_SSS_N); + srslte_vec_div_ccc(&input_fft[q->fft_size/2-SRSLTE_SSS_N], ce, + &input_fft[q->fft_size/2-SRSLTE_SSS_N], 2*SRSLTE_SSS_N); } for (int i = 0; i < SRSLTE_SSS_N; i++) { diff --git a/lib/src/phy/utils/test/CMakeLists.txt b/lib/src/phy/utils/test/CMakeLists.txt index 76df7ac59..1f5c66827 100644 --- a/lib/src/phy/utils/test/CMakeLists.txt +++ b/lib/src/phy/utils/test/CMakeLists.txt @@ -44,4 +44,5 @@ add_test(algebra_2x2_zf_solver_test algebra_test -z) add_test(algebra_2x2_mmse_solver_test algebra_test -m) add_executable(vector_test vector_test.c) -target_link_libraries(vector_test srslte_phy) \ No newline at end of file +target_link_libraries(vector_test srslte_phy) +add_test(vector_test vector_test) diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index 05dce1d35..cf0f38926 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -89,6 +89,26 @@ float squared_error (cf_t a, cf_t b) { return diff_re*diff_re + diff_im*diff_im; } +TEST(srslte_vec_acc_ff, + MALLOC(float, x); + float z; + + cf_t gold = 0.0f; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + TEST_CALL(z = srslte_vec_acc_ff(x, block_size)) + + for (int i = 0; i < block_size; i++) { + gold += x[i]; + } + + mse += fabs(gold - z) / gold; + + free(x); +) + TEST(srslte_vec_dot_prod_sss, MALLOC(int16_t, x); MALLOC(int16_t, y); @@ -314,6 +334,37 @@ TEST(srslte_vec_prod_ccc, free(z); ) +TEST(srslte_vec_prod_ccc_split, + MALLOC(float, x_re); + MALLOC(float, x_im); + MALLOC(float, y_re); + MALLOC(float, y_im); + MALLOC(float, z_re); + MALLOC(float, z_im); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x_re[i] = RANDOM_F(); + x_im[i] = RANDOM_F(); + y_re[i] = RANDOM_F(); + y_im[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_prod_ccc_split(x_re, x_im, y_re, y_im, z_re, z_im, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = (x_re[i] + I * x_im[i]) * (y_re[i] + I * y_im[i]); + mse += cabsf(gold - (z_re[i] + I*z_im[i])); + } + + free(x_re); + free(x_im); + free(y_re); + free(y_im); + free(z_re); + free(z_im); +) + TEST(srslte_vec_prod_conj_ccc, MALLOC(cf_t, x); MALLOC(cf_t, y); @@ -357,6 +408,27 @@ TEST(srslte_vec_sc_prod_ccc, free(z); ) +TEST(srslte_vec_convert_fi, + MALLOC(float, x); + MALLOC(short, z); + float scale = 1000.0f; + + short gold; + for (int i = 0; i < block_size; i++) { + x[i] = (float) RANDOM_F(); + } + + TEST_CALL(srslte_vec_convert_fi(x, z, scale, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = (short) ((x[i] * scale)); + mse += cabsf((float)gold - (float) z[i]); + } + + free(x); + free(z); +) + TEST(srslte_vec_prod_fff, MALLOC(float, x); MALLOC(float, y); @@ -376,6 +448,30 @@ TEST(srslte_vec_prod_fff, } free(x); + free(y); + free(z); +) + +TEST(srslte_vec_prod_cfc, + MALLOC(cf_t, x); + MALLOC(float, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_prod_cfc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] * y[i]; + mse += cabsf(gold - z[i]); + } + + free(x); + free(y); free(z); ) @@ -461,66 +557,216 @@ TEST(srslte_vec_sc_prod_cfc, free(z); ) +TEST(srslte_vec_div_ccc, + MALLOC(cf_t, x); + MALLOC(cf_t, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_CF(); + } + + TEST_CALL(srslte_vec_div_ccc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i]); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + + +TEST(srslte_vec_div_cfc, + MALLOC(cf_t, x); + MALLOC(float, y); + MALLOC(cf_t, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_div_cfc(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i])/cabsf(gold); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + + +TEST(srslte_vec_div_fff, + MALLOC(float, x); + MALLOC(float, y); + MALLOC(float, z); + + cf_t gold; + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + y[i] = RANDOM_F(); + } + + TEST_CALL(srslte_vec_div_fff(x, y, z, block_size)) + + for (int i = 0; i < block_size; i++) { + gold = x[i] / y[i]; + mse += cabsf(gold - z[i]); + } + mse /= block_size; + + free(x); + free(y); + free(z); +) + +TEST(srslte_vec_max_fi, + MALLOC(float, x); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_F(); + } + + uint32_t max_index = 0; + TEST_CALL(max_index = srslte_vec_max_fi(x, block_size);) + + float gold_value = -INFINITY; + uint32_t gold_index = 0; + for (int i = 0; i < block_size; i++) { + if (gold_value < x[i]) { + gold_value = x[i]; + gold_index = i; + } + } + mse = (gold_index != max_index) ? 1:0; + + free(x); +) + +TEST(srslte_vec_max_abs_ci, + MALLOC(cf_t, x); + + for (int i = 0; i < block_size; i++) { + x[i] = RANDOM_CF(); + } + + uint32_t max_index = 0; + TEST_CALL(max_index = srslte_vec_max_abs_ci(x, block_size);) + + float gold_value = -INFINITY; + uint32_t gold_index = 0; + for (int i = 0; i < block_size; i++) { + cf_t a = x[i]; + float abs2 = __real__ a * __real__ a + __imag__ a * __imag__ a; + if (abs2 > gold_value) { + gold_value = abs2; + gold_index = (uint32_t)i; + } + } + mse = (gold_index != max_index) ? 1:0; + + free(x); +) + int main(int argc, char **argv) { char func_names[MAX_FUNCTIONS][32]; double timmings[MAX_FUNCTIONS][MAX_BLOCKS]; uint32_t sizes[32]; uint32_t size_count = 0; uint32_t func_count = 0; - bool passed = true; + bool passed[MAX_FUNCTIONS][MAX_BLOCKS]; + bool all_passed = true; for (uint32_t block_size = 1; block_size <= 1024*8; block_size *= 2) { func_count = 0; - passed &= test_srslte_vec_dot_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_acc_ff(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_sum_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_dot_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_sub_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_sum_sss(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_sub_sss(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_sum_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_sub_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_sum_fff(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_dot_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_sub_fff(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_dot_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_dot_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_dot_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_convert_fi(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_sc_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_prod_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_sc_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_abs_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_prod_ccc_split(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_abs_square_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_prod_conj_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; - passed &= test_srslte_vec_sc_prod_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + passed[func_count][size_count] = test_srslte_vec_sc_prod_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sc_prod_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_abs_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_abs_square_cf(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_sc_prod_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_ccc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_cfc(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_div_fff(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_max_fi(func_names[func_count], &timmings[func_count][size_count], block_size); + func_count++; + + passed[func_count][size_count] = test_srslte_vec_max_abs_ci(func_names[func_count], &timmings[func_count][size_count], block_size); func_count++; sizes[size_count] = block_size; @@ -546,10 +792,11 @@ int main(int argc, char **argv) { for (int i = 0; i < func_count; i++) { printf("%32s | ", func_names[i]); for (int j = 0; j < size_count; j++) { - printf(" %7.1f", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + printf(" %s%7.1f\x1b[0m", (passed[i][j])?"":"\x1B[31m", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + all_passed &= passed[i][j]; } printf(" |\n"); } - return (passed)?SRSLTE_SUCCESS:SRSLTE_ERROR; + return (all_passed)?SRSLTE_SUCCESS:SRSLTE_ERROR; } diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index cb21f24f1..f85dbca0a 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -48,18 +48,7 @@ int srslte_vec_acc_ii(int *x, uint32_t len) { // Used in PRACH detector, AGC and chest_dl for noise averaging float srslte_vec_acc_ff(float *x, uint32_t len) { -#ifdef HAVE_VOLK_ACC_FUNCTION - float result; - volk_32f_accumulator_s32f(&result,x,len); - return result; -#else - int i; - float z=0; - for (i=0;im) { - m=x[i]; - p=i; - } - } - return p; -#endif -#endif + return srslte_vec_max_fi_simd(x, len); } int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) { @@ -616,30 +541,7 @@ void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) { // CP autocorr uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) { -#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_32 - uint32_t target=0; - volk_32fc_index_max_32u(&target,x,len); - return target; -#else -#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_16 - uint32_t target=0; - volk_32fc_index_max_16u(&target,x,len); - return target; -#else - uint32_t i; - float m=-FLT_MAX; - uint32_t p=0; - float tmp; - for (i=0;im) { - m=tmp; - p=i; - } - } - return p; -#endif -#endif + return srslte_vec_max_ci_simd(x, len); } void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) { diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 2eb0428b7..2dd354548 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -232,143 +232,113 @@ void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len) /* No improvement with AVX */ -void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len) -{ -#ifdef DEBUG_MODE - for (int i=0;i max_value) { + max_value = values_buffer[k]; + max_index = (uint32_t) indexes_buffer[k]; + } + } +#endif /* SRSLTE_SIMD_I_SIZE */ + + for (; i < len; i++) { + if (x[i] > max_value) { + max_value = x[i]; + max_index = (uint32_t)i; + } + } + + return max_index; +} + +uint32_t srslte_vec_max_ci_simd(cf_t *x, int len) { + int i = 0; + + float max_value = -INFINITY; + uint32_t max_index = 0; + +#if SRSLTE_SIMD_I_SIZE + __attribute__ ((aligned (SRSLTE_SIMD_I_SIZE*sizeof(int)))) int indexes_buffer[SRSLTE_SIMD_I_SIZE] = {0}; + __attribute__ ((aligned (SRSLTE_SIMD_I_SIZE*sizeof(float)))) float values_buffer[SRSLTE_SIMD_I_SIZE] = {0}; + + for (int k = 0; k < SRSLTE_SIMD_I_SIZE; k++) indexes_buffer[k] = k; + simd_i_t simd_inc = srslte_simd_i_set1(SRSLTE_SIMD_I_SIZE); + simd_i_t simd_indexes = srslte_simd_i_load(indexes_buffer); + simd_i_t simd_max_indexes = srslte_simd_i_set1(0); + + simd_f_t simd_max_values = srslte_simd_f_set1(-INFINITY); + + if (SRSLTE_IS_ALIGNED(x)) { + for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { + simd_f_t x1 = srslte_simd_f_load((float *) &x[i]); + simd_f_t x2 = srslte_simd_f_load((float *) &x[i + SRSLTE_SIMD_F_SIZE / 2]); + + simd_f_t mul1 = srslte_simd_f_mul(x1, x1); + simd_f_t mul2 = srslte_simd_f_mul(x2, x2); + + simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); + + simd_i_t res = srslte_simd_f_max(z1, simd_max_values); + + simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); + simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) z1, res); + simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc); + } + } else { + for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { + simd_f_t x1 = srslte_simd_f_loadu((float *) &x[i]); + simd_f_t x2 = srslte_simd_f_loadu((float *) &x[i + SRSLTE_SIMD_F_SIZE / 2]); + + simd_f_t mul1 = srslte_simd_f_mul(x1, x1); + simd_f_t mul2 = srslte_simd_f_mul(x2, x2); + + simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); + + simd_i_t res = srslte_simd_f_max(z1, simd_max_values); + + simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); + simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) z1, res); + simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc); + } + } + + srslte_simd_i_store(indexes_buffer, simd_max_indexes); + srslte_simd_f_store(values_buffer, simd_max_values); + + for (int k = 0; k < SRSLTE_SIMD_I_SIZE; k++) { + if (values_buffer[k] > max_value) { + max_value = values_buffer[k]; + max_index = (uint32_t) indexes_buffer[k]; + } + } +#endif /* SRSLTE_SIMD_I_SIZE */ + + for (; i < len; i++) { + cf_t a = x[i]; + float abs2 = __real__ a * __real__ a + __imag__ a * __imag__ a; + if (abs2 > max_value) { + max_value = abs2; + max_index = (uint32_t)i; + } + } + + return max_index; +} From 94a06867a3c3efbb8c6eb36e1ac2fa5f1aa2dc07 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Fri, 29 Sep 2017 16:42:46 +0200 Subject: [PATCH 23/55] Optimized SIMD includes and solved AVX512 bugs --- CMakeLists.txt | 4 +- lib/include/srslte/phy/utils/mat.h | 3 - lib/include/srslte/phy/utils/simd.h | 97 ++++++++++++++++++++-------- lib/src/phy/mimo/precoding.c | 5 +- lib/src/phy/utils/test/mat_test.c | 1 - lib/src/phy/utils/test/vector_test.c | 2 - lib/src/phy/utils/vector_simd.c | 10 +-- 7 files changed, 79 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 28afbc0d8..efaa1973a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -283,8 +283,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang") endif (HAVE_AVX2) if (HAVE_AVX512) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -DLV_HAVE_AVX512") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -DLV_HAVE_AVX512") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512") endif(HAVE_AVX512) if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug") diff --git a/lib/include/srslte/phy/utils/mat.h b/lib/include/srslte/phy/utils/mat.h index 942559955..339cfea23 100644 --- a/lib/include/srslte/phy/utils/mat.h +++ b/lib/include/srslte/phy/utils/mat.h @@ -60,7 +60,6 @@ SRSLTE_API float srslte_mat_2x2_cn(cf_t h00, #ifdef LV_HAVE_SSE -#include /* SSE implementation for complex reciprocal */ SRSLTE_API __m128 srslte_mat_cf_recip_sse(__m128 a); @@ -84,8 +83,6 @@ SRSLTE_API void srslte_mat_2x2_mmse_sse(__m128 y0, __m128 y1, #ifdef LV_HAVE_AVX -#include - /* AVX implementation for complex reciprocal */ SRSLTE_API __m256 srslte_mat_cf_recip_avx(__m256 a); diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 9a5f15dbb..2590794f2 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -27,7 +27,12 @@ #ifndef SRSLTE_SIMD_H_H #define SRSLTE_SIMD_H_H +#ifdef LV_HAVE_SSE /* AVX, AVX2, FMA, AVX512 are in this group */ +#ifndef __OPTIMIZE__ +#define __OPTIMIZE__ +#endif #include +#endif /* LV_HAVE_SSE */ /* * SSE Macros @@ -233,7 +238,7 @@ static inline simd_f_t srslte_simd_f_mul(simd_f_t a, simd_f_t b) { static inline simd_f_t srslte_simd_f_rcp(simd_f_t a) { #ifdef LV_HAVE_AVX512 - return _mm512_rcp_ps(a); + return _mm512_rcp14_ps(a); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 return _mm256_rcp_ps(a); @@ -372,10 +377,16 @@ typedef struct { static inline simd_cf_t srslte_simd_cfi_load(cf_t *ptr) { simd_cf_t ret; #ifdef LV_HAVE_AVX512 - __m512 in1 = _mm512_permute_ps(_mm512_load_ps((float*)(ptr)), 0b11011000); - __m512 in2 = _mm512_permute_ps(_mm512_load_ps((float*)(ptr + 8)), 0b11011000); - ret.re = _mm512_unpacklo_ps(in1, in2); - ret.im = _mm512_unpackhi_ps(in1, in2); + __m512 in1 = _mm512_load_ps((float*)(ptr)); + __m512 in2 = _mm512_load_ps((float*)(ptr + SRSLTE_SIMD_CF_SIZE/2)); + ret.re = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x00, 0x02, 0x04, 0x06, + 0x08, 0x0A, 0x0C, 0x0E, + 0x10, 0x12, 0x14, 0x16, + 0x18, 0x1A, 0x1C, 0x1E), in2); + ret.im = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x01, 0x03, 0x05, 0x07, + 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, + 0x19, 0x1B, 0x1D, 0x1F), in2); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 __m256 in1 = _mm256_permute_ps(_mm256_load_ps((float*)(ptr)), 0b11011000); @@ -398,10 +409,16 @@ static inline simd_cf_t srslte_simd_cfi_load(cf_t *ptr) { static inline simd_cf_t srslte_simd_cfi_loadu(cf_t *ptr) { simd_cf_t ret; #ifdef LV_HAVE_AVX512 - __m512 in1 = _mm512_permute_ps(_mm512_loadu_ps((float*)(ptr)), 0b11011000); - __m512 in2 = _mm512_permute_ps(_mm512_loadu_ps((float*)(ptr + 8)), 0b11011000); - ret.re = _mm512_unpacklo_ps(in1, in2); - ret.im = _mm512_unpackhi_ps(in1, in2); + __m512 in1 = _mm512_loadu_ps((float*)(ptr)); + __m512 in2 = _mm512_loadu_ps((float*)(ptr + SRSLTE_SIMD_CF_SIZE/2)); + ret.re = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x00, 0x02, 0x04, 0x06, + 0x08, 0x0A, 0x0C, 0x0E, + 0x10, 0x12, 0x14, 0x16, + 0x18, 0x1A, 0x1C, 0x1E), in2); + ret.im = _mm512_permutex2var_ps(in1, _mm512_setr_epi32(0x01, 0x03, 0x05, 0x07, + 0x09, 0x0B, 0x0D, 0x0F, + 0x11, 0x13, 0x15, 0x17, + 0x19, 0x1B, 0x1D, 0x1F), in2); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 __m256 in1 = _mm256_permute_ps(_mm256_loadu_ps((float*)(ptr)), 0b11011000); @@ -460,10 +477,16 @@ static inline simd_cf_t srslte_simd_cf_loadu(float *re, float *im) { static inline void srslte_simd_cfi_store(cf_t *ptr, simd_cf_t simdreg) { #ifdef LV_HAVE_AVX512 - __m512 out1 = _mm512_permute_ps(simdreg.re, 0b11011000); - __m512 out2 = _mm512_permute_ps(simdreg.im, 0b11011000); - _mm512_store_ps((float*)(ptr), _mm512_unpacklo_ps(out1, out2)); - _mm512_store_ps((float*)(ptr + 8), _mm512_unpackhi_ps(out1, out2)); + __m512 s1 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x00, 0x10, 0x01, 0x11, + 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, + 0x06, 0x16, 0x07, 0x17), simdreg.im); + __m512 s2 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x08, 0x18, 0x09, 0x19, + 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, + 0x0E, 0x1E, 0x0F, 0x1F), simdreg.im); + _mm512_store_ps((float*)(ptr), s1); + _mm512_store_ps((float*)(ptr + 8), s2); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 __m256 out1 = _mm256_permute_ps(simdreg.re, 0b11011000); @@ -481,10 +504,16 @@ static inline void srslte_simd_cfi_store(cf_t *ptr, simd_cf_t simdreg) { static inline void srslte_simd_cfi_storeu(cf_t *ptr, simd_cf_t simdreg) { #ifdef LV_HAVE_AVX512 - __m512 out1 = _mm512_permute_ps(simdreg.re, 0b11011000); - __m512 out2 = _mm512_permute_ps(simdreg.im, 0b11011000); - _mm512_storeu_ps((float*)(ptr), _mm512_unpacklo_ps(out1, out2)); - _mm512_storeu_ps((float*)(ptr + 8), _mm512_unpackhi_ps(out1, out2)); + __m512 s1 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x00, 0x10, 0x01, 0x11, + 0x02, 0x12, 0x03, 0x13, + 0x04, 0x14, 0x05, 0x15, + 0x06, 0x16, 0x07, 0x17), simdreg.im); + __m512 s2 = _mm512_permutex2var_ps(simdreg.re, _mm512_setr_epi32(0x08, 0x18, 0x09, 0x19, + 0x0A, 0x1A, 0x0B, 0x1B, + 0x0C, 0x1C, 0x0D, 0x1D, + 0x0E, 0x1E, 0x0F, 0x1F), simdreg.im); + _mm512_storeu_ps((float*)(ptr), s1); + _mm512_storeu_ps((float*)(ptr + 8), s2); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 __m256 out1 = _mm256_permute_ps(simdreg.re, 0b11011000); @@ -625,7 +654,6 @@ static inline simd_cf_t srslte_simd_cf_add (simd_cf_t a, simd_cf_t b) { static inline simd_cf_t srslte_simd_cf_mul (simd_cf_t a, simd_f_t b) { simd_cf_t ret; #ifdef LV_HAVE_AVX512 - b = _mm512_permutexvar_ps(b, _mm512_setr_epi32(0,4,1,5,2,6,3,7,8,12,9,13,10,14,11,15)); ret.re = _mm512_mul_ps(a.re, b); ret.im = _mm512_mul_ps(a.im, b); #else /* LV_HAVE_AVX512 */ @@ -649,7 +677,7 @@ static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { simd_f_t a2re = _mm512_mul_ps(a.re, a.re); simd_f_t a2im = _mm512_mul_ps(a.im, a.im); simd_f_t mod2 = _mm512_add_ps(a2re, a2im); - simd_f_t rcp = _mm512_rcp_ps(mod2); + simd_f_t rcp = _mm512_rcp14_ps(mod2); simd_f_t neg_a_im = _mm512_xor_ps(_mm512_set1_ps(-0.0f), a.im); ret.re = _mm512_mul_ps(a.re, rcp); ret.im = _mm512_mul_ps(neg_a_im, rcp); @@ -702,12 +730,15 @@ static inline simd_cf_t srslte_simd_cf_zero (void) { #ifdef LV_HAVE_AVX512 typedef __m512i simd_i_t; +typedef __mmask16 simd_sel_t; #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 typedef __m256i simd_i_t; +typedef __m256 simd_sel_t; #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE typedef __m128i simd_i_t; +typedef __m128i simd_sel_t; #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -768,12 +799,12 @@ static inline simd_i_t srslte_simd_i_add(simd_i_t a, simd_i_t b) { #endif /* LV_HAVE_AVX512 */ } -static inline simd_i_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { +static inline simd_sel_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { #ifdef LV_HAVE_AVX512 - return (simd_i_t) _mm512_cmp_ps_mask(a, b, _CMP_GT_OS); + return _mm512_cmp_ps_mask(a, b, _CMP_GT_OS); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - return (simd_i_t) _mm256_cmp_ps(a, b, _CMP_GT_OS); + return _mm256_cmp_ps(a, b, _CMP_GT_OS); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return (simd_i_t) _mm_cmpgt_ps(a, b); @@ -782,15 +813,15 @@ static inline simd_i_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { #endif /* LV_HAVE_AVX512 */ } -static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_i_t selector) { +static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_sel_t selector) { #ifdef LV_HAVE_AVX512 - return (__m512i) _mm512_blendv_ps((__m512)a, (__m512) b, (__m512) selector); + return (__m512i) _mm512_mask_blend_ps( selector, (__m512)a, (__m512) b); #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - return (__m256i) _mm256_blendv_ps((__m256) a,(__m256) b,(__m256) selector); + return (__m256i) _mm256_blendv_ps((__m256) a,(__m256) b, selector); #else #ifdef LV_HAVE_SSE - return (__m128i) _mm_blendv_ps((__m128)a, (__m128)b, (__m128)selector); + return (__m128i) _mm_blendv_ps((__m128)a, (__m128)b, selector); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -1127,6 +1158,19 @@ static inline simd_c16_t srslte_simd_c16_zero (void) { #if SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) { +#ifdef LV_HAVE_AVX512 + __m512 aa = _mm512_permutex2var_ps(a, _mm512_setr_epi32(0x00, 0x01, 0x02, 0x03, + 0x08, 0x09, 0x0A, 0x0B, + 0x10, 0x11, 0x12, 0x13, + 0x18, 0x19, 0x1A, 0x1B), b); + __m512 bb = _mm512_permutex2var_ps(a, _mm512_setr_epi32(0x04, 0x05, 0x06, 0x07, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x14, 0x15, 0x16, 0x17, + 0x1C, 0x1D, 0x1E, 0x1F), b); + __m512i ai = _mm512_cvttps_epi32(aa); + __m512i bi = _mm512_cvttps_epi32(bb); + return _mm512_packs_epi32(ai, bi); +#else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 __m256 aa = _mm256_permute2f128_ps(a, b, 0x20); __m256 bb = _mm256_permute2f128_ps(a, b, 0x31); @@ -1140,6 +1184,7 @@ static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) { return _mm_packs_epi32(ai, bi); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ +#endif /* LV_HAVE_AVX512 */ } #endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_C16_SIZE */ diff --git a/lib/src/phy/mimo/precoding.c b/lib/src/phy/mimo/precoding.c index 8f8bd7737..21350524b 100644 --- a/lib/src/phy/mimo/precoding.c +++ b/lib/src/phy/mimo/precoding.c @@ -33,20 +33,17 @@ #include "srslte/phy/mimo/precoding.h" #include "srslte/phy/utils/vector.h" #include "srslte/phy/utils/debug.h" +#include "srslte/phy/utils/mat.h" #ifdef LV_HAVE_SSE -#include int srslte_predecoding_single_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate); int srslte_predecoding_diversity2_sse(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_rxant, int nof_symbols); #endif #ifdef LV_HAVE_AVX -#include int srslte_predecoding_single_avx(cf_t *y[SRSLTE_MAX_PORTS], cf_t *h[SRSLTE_MAX_PORTS], cf_t *x, int nof_rxant, int nof_symbols, float noise_estimate); #endif -#include "srslte/phy/utils/mat.h" - static srslte_mimo_decoder_t mimo_decoder = SRSLTE_MIMO_DECODER_MMSE; /************************************************ diff --git a/lib/src/phy/utils/test/mat_test.c b/lib/src/phy/utils/test/mat_test.c index 46081da98..0bfb482a9 100644 --- a/lib/src/phy/utils/test/mat_test.c +++ b/lib/src/phy/utils/test/mat_test.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include "srslte/phy/utils/mat.h" diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index cf0f38926..8d5b9f2d6 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -29,9 +29,7 @@ #include #include #include -#include #include -#include #include #include diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 2dd354548..109c99717 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -556,7 +556,7 @@ void srslte_vec_prod_cfc_simd(cf_t *x, float *y, cf_t *z, int len) { for (; i < len - SRSLTE_SIMD_F_SIZE + 1; i += SRSLTE_SIMD_F_SIZE) { simd_f_t s = srslte_simd_f_loadu(&y[i]); - simd_cf_t a = srslte_simd_cfi_load(&x[i]); + simd_cf_t a = srslte_simd_cfi_loadu(&x[i]); simd_cf_t r = srslte_simd_cf_mul(a, s); srslte_simd_cfi_storeu(&z[i], r); } @@ -1036,7 +1036,7 @@ uint32_t srslte_vec_max_fi_simd(float *x, int len) { for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { simd_f_t a = srslte_simd_f_load(&x[i]); - simd_i_t res = srslte_simd_f_max(a, simd_max_values); + simd_sel_t res = srslte_simd_f_max(a, simd_max_values); simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) a, res); @@ -1046,7 +1046,7 @@ uint32_t srslte_vec_max_fi_simd(float *x, int len) { for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { simd_f_t a = srslte_simd_f_loadu(&x[i]); - simd_i_t res = srslte_simd_f_max(a, simd_max_values); + simd_sel_t res = srslte_simd_f_max(a, simd_max_values); simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) a, res); @@ -1102,7 +1102,7 @@ uint32_t srslte_vec_max_ci_simd(cf_t *x, int len) { simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); - simd_i_t res = srslte_simd_f_max(z1, simd_max_values); + simd_sel_t res = srslte_simd_f_max(z1, simd_max_values); simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) z1, res); @@ -1118,7 +1118,7 @@ uint32_t srslte_vec_max_ci_simd(cf_t *x, int len) { simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); - simd_i_t res = srslte_simd_f_max(z1, simd_max_values); + simd_sel_t res = srslte_simd_f_max(z1, simd_max_values); simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) z1, res); From d6bdabfdc09ddd315e5285ae46e335f59557d9fd Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 29 Sep 2017 20:38:12 +0200 Subject: [PATCH 24/55] Changed all harq delays to variables --- lib/include/srslte/common/common.h | 6 +++++ srsenb/hdr/mac/scheduler_ue.h | 2 +- srsenb/hdr/mac/ue.h | 2 +- srsenb/hdr/phy/phch_common.h | 8 +++---- srsenb/src/mac/mac.cc | 6 ++--- srsenb/src/mac/scheduler.cc | 8 +++---- srsenb/src/mac/scheduler_harq.cc | 2 +- srsenb/src/mac/scheduler_ue.cc | 4 ++-- srsenb/src/phy/phch_common.cc | 4 ++-- srsenb/src/phy/phch_worker.cc | 35 +++++++++++++++--------------- srsue/hdr/mac/mac.h | 2 +- srsue/hdr/phy/phch_common.h | 2 +- srsue/src/mac/proc_bsr.cc | 2 +- srsue/src/phy/phch_common.cc | 14 ++++++------ srsue/src/phy/phch_recv.cc | 4 ++-- srsue/src/phy/phch_worker.cc | 26 +++++++++++----------- 16 files changed, 66 insertions(+), 61 deletions(-) diff --git a/lib/include/srslte/common/common.h b/lib/include/srslte/common/common.h index 7156fbfc9..ea89cc8b7 100644 --- a/lib/include/srslte/common/common.h +++ b/lib/include/srslte/common/common.h @@ -44,6 +44,12 @@ #define SRSLTE_N_DRB 8 #define SRSLTE_N_RADIO_BEARERS 11 +#define HARQ_SFMOD 10 +#define HARQ_DELAY_MS 4 +#define MSG3_DELAY_MS 6 +#define HARQ_TX(tti) ((tti+HARQ_DELAY_MS)%10240) +#define HARQ_RX(tti) ((tti+(2*HARQ_DELAY_MS))%10240) + // Cat 3 UE - Max number of DL-SCH transport block bits received within a TTI // 3GPP 36.306 Table 4.1.1 #define SRSLTE_MAX_BUFFER_SIZE_BITS 102048 diff --git a/srsenb/hdr/mac/scheduler_ue.h b/srsenb/hdr/mac/scheduler_ue.h index b59461140..c454f1edf 100644 --- a/srsenb/hdr/mac/scheduler_ue.h +++ b/srsenb/hdr/mac/scheduler_ue.h @@ -173,7 +173,7 @@ private: // Allowed DCI locations per CFI and per subframe sched_dci_cce_t dci_locations[3][10]; - const static int SCHED_MAX_HARQ_PROC = 8; + const static int SCHED_MAX_HARQ_PROC = 2*HARQ_DELAY_MS; dl_harq_proc dl_harq[SCHED_MAX_HARQ_PROC]; ul_harq_proc ul_harq[SCHED_MAX_HARQ_PROC]; diff --git a/srsenb/hdr/mac/ue.h b/srsenb/hdr/mac/ue.h index b879d040b..0f95a1144 100644 --- a/srsenb/hdr/mac/ue.h +++ b/srsenb/hdr/mac/ue.h @@ -120,7 +120,7 @@ private: uint32_t nof_failures; - const static int NOF_HARQ_PROCESSES = 8; + const static int NOF_HARQ_PROCESSES = 2*HARQ_DELAY_MS; srslte_softbuffer_tx_t softbuffer_tx[NOF_HARQ_PROCESSES]; srslte_softbuffer_rx_t softbuffer_rx[NOF_HARQ_PROCESSES]; diff --git a/srsenb/hdr/phy/phch_common.h b/srsenb/hdr/phy/phch_common.h index 00a59d969..b6c4ceb08 100644 --- a/srsenb/hdr/phy/phch_common.h +++ b/srsenb/hdr/phy/phch_common.h @@ -78,13 +78,13 @@ public: mac_interface_phy *mac; // Common objects for schedulign grants - mac_interface_phy::ul_sched_t ul_grants[10]; - mac_interface_phy::dl_sched_t dl_grants[10]; + mac_interface_phy::ul_sched_t ul_grants[HARQ_SFMOD]; + mac_interface_phy::dl_sched_t dl_grants[HARQ_SFMOD]; // Map of pending ACKs for each user typedef struct { - bool is_pending[10]; - uint16_t n_pdcch[10]; + bool is_pending[HARQ_SFMOD]; + uint16_t n_pdcch[HARQ_SFMOD]; } pending_ack_t; std::map pending_ack; diff --git a/srsenb/src/mac/mac.cc b/srsenb/src/mac/mac.cc index 7cb30cc65..0528852be 100644 --- a/srsenb/src/mac/mac.cc +++ b/srsenb/src/mac/mac.cc @@ -406,7 +406,7 @@ int mac::get_dl_sched(uint32_t tti, dl_sched_t *dl_sched_res) log_step_dl(tti); if (!started) { - return 0; + return 0; } if (!dl_sched_res) { @@ -604,7 +604,7 @@ int mac::get_ul_sched(uint32_t tti, ul_sched_t *ul_sched_res) void mac::log_step_ul(uint32_t tti) { - int tti_ul = tti-8; + int tti_ul = tti-(2*HARQ_DELAY_MS); if (tti_ul < 0) { tti_ul += 10240; } @@ -613,7 +613,7 @@ void mac::log_step_ul(uint32_t tti) void mac::log_step_dl(uint32_t tti) { - int tti_dl = tti-4; + int tti_dl = tti-HARQ_DELAY_MS; if (tti_dl < 0) { tti_dl += 10240; } diff --git a/srsenb/src/mac/scheduler.cc b/srsenb/src/mac/scheduler.cc index 79cf3f476..5de780470 100644 --- a/srsenb/src/mac/scheduler.cc +++ b/srsenb/src/mac/scheduler.cc @@ -541,7 +541,7 @@ int sched::dl_sched_rar(dl_sched_rar_t rar[MAX_RAR_LIST]) pending_rar[j].rar_tti = 0; // Save UL resources - uint32_t pending_tti=(current_tti+6)%10; + uint32_t pending_tti=(current_tti+MSG3_DELAY_MS)%10; pending_msg3[pending_tti].enabled = true; pending_msg3[pending_tti].rnti = pending_rar[j].rnti; pending_msg3[pending_tti].L = L_prb; @@ -677,7 +677,7 @@ int sched::ul_sched(uint32_t tti, srsenb::sched_interface::ul_sched_res_t* sched pthread_mutex_lock(&mutex); /* If dl_sched() not yet called this tti (this tti is +4ms advanced), reset CCE state */ - if ((current_tti+4)%10240 != tti) { + if (HARQ_TX(current_tti) != tti) { bzero(used_cce, MAX_CCE*sizeof(bool)); } @@ -685,9 +685,9 @@ int sched::ul_sched(uint32_t tti, srsenb::sched_interface::ul_sched_res_t* sched current_tti = tti; sfn = tti/10; if (tti > 4) { - sf_idx = (tti-4)%10; + sf_idx = (tti-HARQ_DELAY_MS)%10; } else { - sf_idx = (tti+10240-4)%10; + sf_idx = (tti+10240-HARQ_DELAY_MS)%10; } int nof_dci_elems = 0; int nof_phich_elems = 0; diff --git a/srsenb/src/mac/scheduler_harq.cc b/srsenb/src/mac/scheduler_harq.cc index a6ae70d19..f5209b374 100644 --- a/srsenb/src/mac/scheduler_harq.cc +++ b/srsenb/src/mac/scheduler_harq.cc @@ -177,7 +177,7 @@ void dl_harq_proc::set_rbgmask(uint32_t new_mask) bool dl_harq_proc::has_pending_retx(uint32_t current_tti) { - return srslte_tti_interval(current_tti, tti) >= 8 && has_pending_retx_common(); + return srslte_tti_interval(current_tti, tti) >= (2*HARQ_DELAY_MS) && has_pending_retx_common(); } int dl_harq_proc::get_tbs() diff --git a/srsenb/src/mac/scheduler_ue.cc b/srsenb/src/mac/scheduler_ue.cc index 1534d12ef..567711cbe 100644 --- a/srsenb/src/mac/scheduler_ue.cc +++ b/srsenb/src/mac/scheduler_ue.cc @@ -247,7 +247,7 @@ bool sched_ue::get_pucch_sched(uint32_t current_tti, uint32_t prb_idx[2], uint32 // First check if it has pending ACKs for (int i=0;iul_grants[i].nof_grants;j++) { if (phy->ul_grants[i].sched_grants[j].rnti == rnti) { phy->ul_grants[i].sched_grants[j].rnti = 0; @@ -266,7 +266,7 @@ void phch_worker::rem_rnti(uint16_t rnti) void phch_worker::work_imp() { uint32_t sf_ack; - + if (!running) { return; } @@ -326,12 +326,12 @@ void phch_worker::work_imp() encode_phich(ul_grants[sf_sched_ul].phich, ul_grants[sf_sched_ul].nof_phich, sf_tx); // Prepare for receive ACK for DL grants in sf_tx+4 - sf_ack = (sf_tx+4)%10; + sf_ack = HARQ_RX(sf_tx)%HARQ_SFMOD; phy->ack_clear(sf_ack); for (uint32_t i=0;i= SRSLTE_CRNTI_START && dl_grants[sf_tx].sched_grants[i].rnti <= SRSLTE_CRNTI_END) { - phy->ack_set_pending(sf_ack, dl_grants[sf_tx].sched_grants[i].rnti, dl_grants[sf_tx].sched_grants[i].location.ncce); + phy->ack_set_pending(sf_ack, dl_grants[sf_tx].sched_grants[i].rnti, dl_grants[sf_tx].sched_grants[i].location.ncce); } } @@ -504,8 +504,7 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch, int phch_worker::decode_pucch(uint32_t tti_rx) { - uint32_t sf_rx = tti_rx%10; - srslte_uci_data_t uci_data; + srslte_uci_data_t uci_data; for(std::map::iterator iter=ue_db.begin(); iter!=ue_db.end(); ++iter) { uint16_t rnti = (uint16_t) iter->first; @@ -523,7 +522,7 @@ int phch_worker::decode_pucch(uint32_t tti_rx) uci_data.scheduling_request = true; } } - if (phy->ack_is_pending(sf_rx, rnti, &last_n_pdcch)) { + if (phy->ack_is_pending(tti_rx%HARQ_SFMOD, rnti, &last_n_pdcch)) { needs_pucch = true; needs_ack = true; uci_data.uci_ack_len = 1; @@ -589,7 +588,7 @@ int phch_worker::encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks, ui srslte_enb_dl_put_phich(&enb_dl, acks[i].ack, ue_db[rnti].phich_info.n_prb_lowest, ue_db[rnti].phich_info.n_dmrs, - sf_idx); + sf_idx%10); Info("PHICH: rnti=0x%x, hi=%d, I_lowest=%d, n_dmrs=%d, tti_tx=%d\n", rnti, acks[i].ack, @@ -606,13 +605,13 @@ int phch_worker::encode_pdcch_ul(srslte_enb_ul_pusch_t *grants, uint32_t nof_gra for (uint32_t i=0;iinfo_hex(ptr, len, - "PDSCH: rnti=0x%x, l_crb=%2d, %s, harq=%d, tbs=%d, mcs=%d, rv=%d, tti_tx=%d\n", + "PDSCH: rnti=0x%x, l_crb=%2d, %s, harq=%d, tbs=%d, mcs=%d, rv=%d, tti_tx=%d\n", rnti, phy_grant.nof_prb, grant_str, grants[i].grant.harq_process, phy_grant.mcs[0].tbs/8, phy_grant.mcs[0].idx, grants[i].grant.rv_idx, tti_tx); } diff --git a/srsue/hdr/mac/mac.h b/srsue/hdr/mac/mac.h index a306af187..d19f668bf 100644 --- a/srsue/hdr/mac/mac.h +++ b/srsue/hdr/mac/mac.h @@ -109,7 +109,7 @@ private: static const int MAC_MAIN_THREAD_PRIO = 5; static const int MAC_PDU_THREAD_PRIO = 6; - static const int MAC_NOF_HARQ_PROC = 8; + static const int MAC_NOF_HARQ_PROC = 2*HARQ_DELAY_MS; // Interaction with PHY srslte::tti_sync_cv ttisync; diff --git a/srsue/hdr/phy/phch_common.h b/srsue/hdr/phy/phch_common.h index aa64fe9ea..fb077e733 100644 --- a/srsue/hdr/phy/phch_common.h +++ b/srsue/hdr/phy/phch_common.h @@ -138,7 +138,7 @@ namespace srsue { uint32_t I_lowest; uint32_t n_dmrs; } pending_ack_t; - pending_ack_t pending_ack[10]; + pending_ack_t pending_ack[HARQ_SFMOD]; bool is_first_tx; diff --git a/srsue/src/mac/proc_bsr.cc b/srsue/src/mac/proc_bsr.cc index 898943ab9..43694c1bc 100644 --- a/srsue/src/mac/proc_bsr.cc +++ b/srsue/src/mac/proc_bsr.cc @@ -368,7 +368,7 @@ bool bsr_proc::need_to_reset_sr() { bool bsr_proc::need_to_send_sr(uint32_t tti) { if (!sr_is_sent && triggered_bsr_type == REGULAR) { - if (srslte_tti_interval(tti,next_tx_tti)>0 && srslte_tti_interval(tti,next_tx_tti) < 10240-4) { + if (srslte_tti_interval(tti,next_tx_tti)>0 && srslte_tti_interval(tti,next_tx_tti) < 10240-HARQ_DELAY_MS) { reset_sr = false; sr_is_sent = true; Debug("BSR: Need to send sr: sr_is_sent=true, reset_sr=false, tti=%d, next_tx_tti=%d\n", tti, next_tx_tti); diff --git a/srsue/src/phy/phch_common.cc b/srsue/src/phy/phch_common.cc index d49b1ced2..cce1857f1 100644 --- a/srsue/src/phy/phch_common.cc +++ b/srsue/src/phy/phch_common.cc @@ -195,13 +195,13 @@ void phch_common::set_dl_rnti(srslte_rnti_type_t type, uint16_t rnti_value, int } void phch_common::reset_pending_ack(uint32_t tti) { - pending_ack[tti%10].enabled = false; + pending_ack[tti%HARQ_SFMOD].enabled = false; } void phch_common::set_pending_ack(uint32_t tti, uint32_t I_lowest, uint32_t n_dmrs) { - pending_ack[tti%10].enabled = true; - pending_ack[tti%10].I_lowest = I_lowest; - pending_ack[tti%10].n_dmrs = n_dmrs; + pending_ack[tti%HARQ_SFMOD].enabled = true; + pending_ack[tti%HARQ_SFMOD].I_lowest = I_lowest; + pending_ack[tti%HARQ_SFMOD].n_dmrs = n_dmrs; Debug("Set pending ACK for tti=%d I_lowest=%d, n_dmrs=%d\n", tti, I_lowest, n_dmrs); } @@ -211,12 +211,12 @@ bool phch_common::get_pending_ack(uint32_t tti) { bool phch_common::get_pending_ack(uint32_t tti, uint32_t *I_lowest, uint32_t *n_dmrs) { if (I_lowest) { - *I_lowest = pending_ack[tti%10].I_lowest; + *I_lowest = pending_ack[tti%HARQ_SFMOD].I_lowest; } if (n_dmrs) { - *n_dmrs = pending_ack[tti%10].n_dmrs; + *n_dmrs = pending_ack[tti%HARQ_SFMOD].n_dmrs; } - return pending_ack[tti%10].enabled; + return pending_ack[tti%HARQ_SFMOD].enabled; } /* The transmisison of UL subframes must be in sequence. Each worker uses this function to indicate diff --git a/srsue/src/phy/phch_recv.cc b/srsue/src/phy/phch_recv.cc index dad2c82b8..647226384 100644 --- a/srsue/src/phy/phch_recv.cc +++ b/srsue/src/phy/phch_recv.cc @@ -715,11 +715,11 @@ void phch_recv::run_thread() { worker->set_sample_offset(srslte_ue_sync_get_sfo(&ue_sync)/1000); - /* Compute TX time: Any transmission happens in TTI4 thus advance 4 ms the reception time */ + /* Compute TX time: Any transmission happens in TTI+4 thus advance 4 ms the reception time */ srslte_timestamp_t rx_time, tx_time, tx_time_prach; srslte_ue_sync_get_last_timestamp(&ue_sync, &rx_time); srslte_timestamp_copy(&tx_time, &rx_time); - srslte_timestamp_add(&tx_time, 0, 4e-3 - time_adv_sec); + srslte_timestamp_add(&tx_time, 0, HARQ_DELAY_MS*1e-3 - time_adv_sec); worker->set_tx_time(tx_time, next_offset); next_offset = 0; diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index 1c9abc1ca..1c52568af 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -335,7 +335,7 @@ void phch_worker::work_imp() &ul_action.softbuffers[0], ul_action.rv[0], ul_action.rnti, ul_mac_grant.is_from_rar); signal_ready = true; if (ul_action.expect_ack) { - phy->set_pending_ack(tti + 8, ue_ul.pusch_cfg.grant.n_prb_tilde[0], ul_action.phy_grant.ul.ncs_dmrs); + phy->set_pending_ack(tti + 2*HARQ_DELAY_MS, ue_ul.pusch_cfg.grant.n_prb_tilde[0], ul_action.phy_grant.ul.ncs_dmrs); } } else if (dl_action.generate_ack || uci_data.scheduling_request || uci_data.uci_cqi_len > 0) { @@ -663,7 +663,7 @@ bool phch_worker::decode_pdcch_ul(mac_interface_phy::mac_grant_t* grant) char timestr[64]; timestr[0]='\0'; - phy->reset_pending_ack(tti + 8); + phy->reset_pending_ack(tti + 2*HARQ_DELAY_MS); srslte_dci_msg_t dci_msg; srslte_ra_ul_dci_t dci_unpacked; @@ -776,7 +776,7 @@ void phch_worker::set_uci_sr() { uci_data.scheduling_request = false; if (phy->sr_enabled) { - uint32_t sr_tx_tti = (tti+4)%10240; + uint32_t sr_tx_tti = HARQ_TX(tti); // Get I_sr parameter if (srslte_ue_ul_sr_send_tti(I_sr, sr_tx_tti)) { Info("PUCCH: SR transmission at TTI=%d, I_sr=%d\n", sr_tx_tti, I_sr); @@ -793,7 +793,7 @@ void phch_worker::set_uci_periodic_cqi() int cqi_max = phy->args->cqi_max; if (period_cqi.configured && rnti_is_set) { - if (period_cqi.ri_idx_present && srslte_ri_send(period_cqi.pmi_idx, period_cqi.ri_idx, (tti+4)%10240)) { + if (period_cqi.ri_idx_present && srslte_ri_send(period_cqi.pmi_idx, period_cqi.ri_idx, HARQ_TX(tti))) { if (uci_data.uci_ri_len) { uci_data.uci_cqi[0] = uci_data.uci_ri; uci_data.uci_cqi_len = uci_data.uci_ri_len; @@ -802,7 +802,7 @@ void phch_worker::set_uci_periodic_cqi() uci_data.uci_pmi_len = 0; Info("PUCCH: Periodic RI=%d\n", uci_data.uci_cqi[0]); } - } else if (srslte_cqi_send(period_cqi.pmi_idx, (tti+4)%10240)) { + } else if (srslte_cqi_send(period_cqi.pmi_idx, HARQ_TX(tti))) { srslte_cqi_value_t cqi_report; if (period_cqi.format_is_subband) { // TODO: Implement subband periodic reports @@ -868,8 +868,8 @@ void phch_worker::set_uci_aperiodic_cqi() bool phch_worker::srs_is_ready_to_send() { if (srs_cfg.configured) { - if (srslte_refsignal_srs_send_cs(srs_cfg.subframe_config, (tti+4)%10) == 1 && - srslte_refsignal_srs_send_ue(srs_cfg.I_srs, (tti+4)%10240) == 1) + if (srslte_refsignal_srs_send_cs(srs_cfg.subframe_config, HARQ_RX(tti)%10) == 1 && + srslte_refsignal_srs_send_ue(srs_cfg.I_srs, HARQ_TX(tti)) == 1) { return true; } @@ -889,7 +889,7 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui char timestr[64]; timestr[0]='\0'; - if (srslte_ue_ul_cfg_grant(&ue_ul, grant, (tti+4)%10240, rv, current_tx_nb)) { + if (srslte_ue_ul_cfg_grant(&ue_ul, grant, HARQ_TX(tti), rv, current_tx_nb)) { Error("Configuring UL grant\n"); } @@ -919,7 +919,7 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui #endif Info("PUSCH: tti_tx=%d, n_prb=%d, rb_start=%d, tbs=%d, mod=%d, mcs=%d, rv_idx=%d, ack=%s, ri=%s, cfo=%.1f Hz%s\n", - (tti+4)%10240, + HARQ_TX(tti), grant->L_prb, grant->n_prb[0], grant->mcs.tbs/8, grant->mcs.mod, grant->mcs.idx, rv, uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", @@ -950,7 +950,7 @@ void phch_worker::encode_pucch() gettimeofday(&t[1], NULL); #endif - if (srslte_ue_ul_pucch_encode(&ue_ul, uci_data, last_dl_pdcch_ncce, (tti+4)%10240, signal_buffer[0])) { + if (srslte_ue_ul_pucch_encode(&ue_ul, uci_data, last_dl_pdcch_ncce, HARQ_TX(tti), signal_buffer[0])) { Error("Encoding PUCCH\n"); } @@ -966,7 +966,7 @@ void phch_worker::encode_pucch() float gain = set_power(tx_power); Info("PUCCH: tti_tx=%d, n_cce=%3d, n_pucch=%d, n_prb=%d, ack=%s%s, ri=%s, pmi=%s%s, sr=%s, cfo=%.1f Hz%s\n", - (tti+4)%10240, + HARQ_TX(tti), last_dl_pdcch_ncce, ue_ul.pucch.last_n_pucch, ue_ul.pucch.last_n_prb, uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", uci_data.uci_ack_len>1?(uci_data.uci_ack_2?"1":"0"):"", @@ -987,7 +987,7 @@ void phch_worker::encode_srs() char timestr[64]; timestr[0]='\0'; - if (srslte_ue_ul_srs_encode(&ue_ul, (tti+4)%10240, signal_buffer[0])) + if (srslte_ue_ul_srs_encode(&ue_ul, HARQ_TX(tti), signal_buffer[0])) { Error("Encoding SRS\n"); } @@ -1002,7 +1002,7 @@ void phch_worker::encode_srs() float gain = set_power(tx_power); uint32_t fi = srslte_vec_max_fi((float*) signal_buffer, SRSLTE_SF_LEN_PRB(cell.nof_prb)); float *f = (float*) signal_buffer; - Info("SRS: power=%.2f dBm, tti_tx=%d%s\n", tx_power, (tti+4)%10240, timestr); + Info("SRS: power=%.2f dBm, tti_tx=%d%s\n", tx_power, HARQ_TX(tti), timestr); } From dbae016b003c69f651e2c0fe885281ca61b9cb7c Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Mon, 2 Oct 2017 18:16:03 +0100 Subject: [PATCH 25/55] Removed unused vector functions --- lib/include/srslte/phy/utils/vector.h | 35 +---- lib/src/phy/utils/vector.c | 182 +------------------------- lib/src/phy/utils/vector_simd.c | 69 ---------- 3 files changed, 2 insertions(+), 284 deletions(-) diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index 9b99c6fff..4a5daefb3 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -54,7 +54,6 @@ extern "C" { #define SRSLTE_VEC_EMA(data, average, alpha) ((alpha)*(data)+(1-alpha)*(average)) /** Return the sum of all the elements */ -SRSLTE_API int srslte_vec_acc_ii(int *x, uint32_t len); SRSLTE_API float srslte_vec_acc_ff(float *x, uint32_t len); SRSLTE_API cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len); @@ -77,7 +76,6 @@ SRSLTE_API void srslte_vec_save_file(char *filename, void *buffer, uint32_t len) SRSLTE_API void srslte_vec_load_file(char *filename, void *buffer, uint32_t len); /* sum two vectors */ -SRSLTE_API void srslte_vec_sum_ch(uint8_t *x, uint8_t *y, char *z, uint32_t len); SRSLTE_API void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sum_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sub_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t len); @@ -87,39 +85,16 @@ SRSLTE_API void srslte_vec_sum_sss(int16_t *x, int16_t *y, int16_t *z, uint32_t SRSLTE_API void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len); SRSLTE_API void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); -/* EMA filter: output=coeff*new_data + (1-coeff)*average */ -SRSLTE_API void srslte_vec_ema_filter(cf_t *new_data, cf_t *average, cf_t *output, float coeff, uint32_t len); - -/* Square distance */ -//SRSLTE_API void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints); - -/* scalar addition */ -SRSLTE_API void srslte_vec_sc_add_fff(float *x, float h, float *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_cfc(cf_t *x, float h, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_add_sss(int16_t *x, int16_t h, int16_t *z, uint32_t len); - /* scalar product */ SRSLTE_API void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len); SRSLTE_API void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len); -SRSLTE_API void srslte_vec_sc_div2_sss(short *x, int pow2_div, short *z, uint32_t len); -/* Normalization */ -SRSLTE_API void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len); SRSLTE_API void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len); SRSLTE_API void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len); -SRSLTE_API void srslte_vec_convert_ci(int8_t *x, int16_t *z, uint32_t len); -SRSLTE_API void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len); -SRSLTE_API void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len); - -SRSLTE_API void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len); -SRSLTE_API void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len); - -SRSLTE_API void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len); +SRSLTE_API void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len); /* vector product (element-wise) */ SRSLTE_API void srslte_vec_prod_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len); @@ -159,11 +134,6 @@ SRSLTE_API float srslte_vec_corr_ccc(cf_t *x, cf_t *y, uint32_t len); /* return the index of the maximum value in the vector */ SRSLTE_API uint32_t srslte_vec_max_fi(float *x, uint32_t len); SRSLTE_API uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len); -SRSLTE_API int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len); -SRSLTE_API int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len); - -/* maximum between two vectors */ -SRSLTE_API void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len); /* quantify vector of floats or int16 and convert to uint8_t */ SRSLTE_API void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len); @@ -173,9 +143,6 @@ SRSLTE_API void srslte_vec_quant_suc(int16_t *in, uint8_t *out, float gain, int1 SRSLTE_API void srslte_vec_abs_cf(cf_t *x, float *abs, uint32_t len); SRSLTE_API void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t len); -/* argument of each vector element */ -SRSLTE_API void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len); - /* Copy 256 bit aligned vector */ SRSLTE_API void srs_vec_cf_cpy(cf_t *src, cf_t *dst, int len); diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index f85dbca0a..3bb7fb08f 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -37,15 +37,6 @@ -int srslte_vec_acc_ii(int *x, uint32_t len) { - int i; - int z=0; - for (i=0;im) { - m=x[i]; - } - } - return m; -} - -int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) { - uint32_t i; - int16_t m=-INT16_MIN; - for (i=0;im) { - m=abs(x[i]); - } - } - return m; -} - -void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) { - uint32_t i; - for (i=0;i y[i]) { - z[i] = x[i]; - } else { - z[i] = y[i]; - } - } -} - - // CP autocorr uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) { return srslte_vec_max_ci_simd(x, len); diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 109c99717..0294bd1af 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -162,75 +162,6 @@ void srslte_vec_prod_sss_simd(int16_t *x, int16_t *y, int16_t *z, int len) { } } - - - -#warning remove function if it is not used -/* -void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len) -{ -#ifdef LV_HAVE_SSE - unsigned int number = 0; - const unsigned int points = len / 8; - - const __m128i* xPtr = (const __m128i*) x; - __m128i* zPtr = (__m128i*) z; - - __m128i xVal, zVal; - for(;number < points; number++){ - - xVal = _mm_load_si128(xPtr); - - zVal = _mm_srai_epi16(xVal, k); - - _mm_store_si128(zPtr, zVal); - - xPtr ++; - zPtr ++; - } - - number = points * 8; - short divn = (1< Date: Thu, 5 Oct 2017 16:52:02 +0200 Subject: [PATCH 26/55] Now working with variable HARQ scheduling --- lib/include/srslte/common/common.h | 14 +- srsenb/hdr/phy/phch_worker.h | 4 +- srsenb/src/mac/scheduler.cc | 4 +- srsenb/src/mac/scheduler_metric.cc | 18 +- srsenb/src/mac/scheduler_ue.cc | 12 +- srsenb/src/phy/phch_common.cc | 10 +- srsenb/src/phy/phch_worker.cc | 367 +++++++++++++++-------------- srsenb/src/phy/txrx.cc | 2 +- srsue/hdr/mac/mux.h | 3 +- srsue/src/phy/phch_common.cc | 12 +- srsue/src/phy/phch_worker.cc | 41 ++-- 11 files changed, 260 insertions(+), 227 deletions(-) diff --git a/lib/include/srslte/common/common.h b/lib/include/srslte/common/common.h index c6ad06402..9d3d56569 100644 --- a/lib/include/srslte/common/common.h +++ b/lib/include/srslte/common/common.h @@ -44,13 +44,15 @@ #define SRSLTE_N_DRB 8 #define SRSLTE_N_RADIO_BEARERS 11 -#define HARQ_DELAY_MS 4 -#define MSG3_DELAY_MS 6 -#define HARQ_TX(tti) ((tti+HARQ_DELAY_MS)%10240) -#define HARQ_RX(tti) ((tti+(2*HARQ_DELAY_MS))%10240) +#define HARQ_DELAY_MS 6 +#define MSG3_DELAY_MS 6 +#define TTI_TX(tti) ((tti+HARQ_DELAY_MS)%10240) +#define TTI_RX_ACK(tti) ((tti+(2*HARQ_DELAY_MS))%10240) -#define TTIMOD_SZ 10 -#define TTIMOD(tti) (tti%TTIMOD_SZ) +#define TTIMOD_SZ 20 +#define TTIMOD(tti) (tti%TTIMOD_SZ) + +#define ASYNC_DL_SCHED (HARQ_DELAY_MS <= 4) // Cat 3 UE - Max number of DL-SCH transport block bits received within a TTI // 3GPP 36.306 Table 4.1.1 diff --git a/srsenb/hdr/phy/phch_worker.h b/srsenb/hdr/phy/phch_worker.h index a84045920..3194751e8 100644 --- a/srsenb/hdr/phy/phch_worker.h +++ b/srsenb/hdr/phy/phch_worker.h @@ -89,9 +89,9 @@ private: cf_t *signal_buffer_rx; cf_t *signal_buffer_tx; - uint32_t tti_rx, tti_tx, tti_sched_ul; + uint32_t tti_rx, tti_tx_dl, tti_tx_ul; uint32_t sf_rx, sf_tx, tx_mutex_cnt; - uint32_t t_rx, t_tx, t_sched_ul; + uint32_t t_rx, t_tx_dl, t_tx_ul; srslte_enb_dl_t enb_dl; srslte_enb_ul_t enb_ul; diff --git a/srsenb/src/mac/scheduler.cc b/srsenb/src/mac/scheduler.cc index 73f121424..ab499efb0 100644 --- a/srsenb/src/mac/scheduler.cc +++ b/srsenb/src/mac/scheduler.cc @@ -677,14 +677,14 @@ int sched::ul_sched(uint32_t tti, srsenb::sched_interface::ul_sched_res_t* sched pthread_mutex_lock(&mutex); /* If dl_sched() not yet called this tti (this tti is +4ms advanced), reset CCE state */ - if (HARQ_TX(current_tti) != tti) { + if (TTI_TX(current_tti) != tti) { bzero(used_cce, MAX_CCE*sizeof(bool)); } /* Initialize variables */ current_tti = tti; sfn = tti/10; - if (tti > 4) { + if (tti > HARQ_DELAY_MS) { sf_idx = (tti-HARQ_DELAY_MS)%10; } else { sf_idx = (tti+10240-HARQ_DELAY_MS)%10; diff --git a/srsenb/src/mac/scheduler_metric.cc b/srsenb/src/mac/scheduler_metric.cc index 708ab2dd8..6c50009f7 100644 --- a/srsenb/src/mac/scheduler_metric.cc +++ b/srsenb/src/mac/scheduler_metric.cc @@ -142,8 +142,12 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) dl_harq_proc *h = user->get_pending_dl_harq(current_tti); // Time-domain RR scheduling +#if ASYNC_DL_SCHED if (pending_data || h) { - if (nof_users_with_data) { +#else + if (pending_data || (h && !h->is_empty())) { +#endif + if (nof_users_with_data) { if (nof_users_with_data == 2) { } if ((current_tti%nof_users_with_data) != user->ue_idx) { @@ -153,7 +157,11 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) } // Schedule retx if we have space +#if ASYNC_DL_SCHED if (h) { +#else + if (h && !h->is_empty()) { +#endif uint32_t retx_mask = h->get_rbgmask(); // If can schedule the same mask, do it if (!allocation_is_valid(retx_mask)) { @@ -170,10 +178,14 @@ dl_harq_proc* dl_metric_rr::get_user_allocation(sched_ue *user) } } } - // If could not schedule the reTx, or there wasn't any pending retx, find an empty PID + // If could not schedule the reTx, or there wasn't any pending retx, find an empty PID +#if ASYNC_DL_SCHED h = user->get_empty_dl_harq(); if (h) { - // Allocate resources based on pending data +#else + if (h && h->is_empty()) { +#endif + // Allocate resources based on pending data if (pending_data) { uint32_t pending_rb = user->get_required_prb_dl(pending_data, nof_ctrl_symbols); uint32_t newtx_mask = 0; diff --git a/srsenb/src/mac/scheduler_ue.cc b/srsenb/src/mac/scheduler_ue.cc index 525494ad2..7f4042849 100644 --- a/srsenb/src/mac/scheduler_ue.cc +++ b/srsenb/src/mac/scheduler_ue.cc @@ -248,7 +248,7 @@ bool sched_ue::get_pucch_sched(uint32_t current_tti, uint32_t prb_idx[2]) // First check if it has pending ACKs for (int i=0;iN_pucch_1 = phy->pucch_cfg.n1_pucch_an; srslte_enb_ul_cfg_ue(&enb_ul, rnti, uci_cfg, pucch_sched, srs_cfg); - - ue_db[rnti].I_sr = I_sr; + + ue_db[rnti].I_sr = I_sr; ue_db[rnti].I_sr_en = true; if (pucch_cqi) { - ue_db[rnti].pmi_idx = pmi_idx; - ue_db[rnti].cqi_en = true; - ue_db[rnti].pucch_cqi_ack = pucch_cqi_ack; + ue_db[rnti].pmi_idx = pmi_idx; + ue_db[rnti].cqi_en = true; + ue_db[rnti].pucch_cqi_ack = pucch_cqi_ack; } else { - ue_db[rnti].pmi_idx = 0; - ue_db[rnti].cqi_en = false; + ue_db[rnti].pmi_idx = 0; + ue_db[rnti].cqi_en = false; } - + } else { Error("Setting config dedicated: rnti=0x%x does not exist\n"); } - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } void phch_worker::rem_rnti(uint16_t rnti) { - pthread_mutex_lock(&mutex); + pthread_mutex_lock(&mutex); if (ue_db.count(rnti)) { ue_db.erase(rnti); - - srslte_enb_dl_rem_rnti(&enb_dl, rnti); + + srslte_enb_dl_rem_rnti(&enb_dl, rnti); srslte_enb_ul_rem_rnti(&enb_ul, rnti); - - // remove any pending grant for each subframe + + // remove any pending grant for each subframe for (uint32_t i=0;iul_grants[i].nof_grants;j++) { if (phy->ul_grants[i].sched_grants[j].rnti == rnti) { - phy->ul_grants[i].sched_grants[j].rnti = 0; + phy->ul_grants[i].sched_grants[j].rnti = 0; } } for (uint32_t j=0;jdl_grants[i].nof_grants;j++) { if (phy->dl_grants[i].sched_grants[j].rnti == rnti) { - phy->dl_grants[i].sched_grants[j].rnti = 0; + phy->dl_grants[i].sched_grants[j].rnti = 0; } } } } else { Error("Removing user: rnti=0x%x does not exist\n", rnti); } - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } void phch_worker::work_imp() @@ -275,18 +275,18 @@ void phch_worker::work_imp() } pthread_mutex_lock(&mutex); - + mac_interface_phy::ul_sched_t *ul_grants = phy->ul_grants; - mac_interface_phy::dl_sched_t *dl_grants = phy->dl_grants; - mac_interface_phy *mac = phy->mac; - + mac_interface_phy::dl_sched_t *dl_grants = phy->dl_grants; + mac_interface_phy *mac = phy->mac; + log_h->step(tti_rx); - + Debug("Worker %d running\n", get_id()); - + for(std::map::iterator iter=ue_db.begin(); iter!=ue_db.end(); ++iter) { uint16_t rnti = (uint16_t) iter->first; - ue_db[rnti].has_grant_tti = -1; + ue_db[rnti].has_grant_tti = -1; } // Process UL signal @@ -294,51 +294,51 @@ void phch_worker::work_imp() // Decode pending UL grants for the tti they were scheduled decode_pusch(ul_grants[t_rx].sched_grants, ul_grants[t_rx].nof_grants); - + // Decode remaining PUCCH ACKs not associated with PUSCH transmission and SR signals decode_pucch(); - + // Get DL scheduling for the TX TTI from MAC - if (mac->get_dl_sched(tti_tx, &dl_grants[t_tx]) < 0) { + if (mac->get_dl_sched(tti_tx_dl, &dl_grants[t_tx_dl]) < 0) { Error("Getting DL scheduling from MAC\n"); goto unlock; - } - - if (dl_grants[t_tx].cfi < 1 || dl_grants[t_tx].cfi > 3) { - Error("Invalid CFI=%d\n", dl_grants[t_tx].cfi); + } + + if (dl_grants[t_tx_dl].cfi < 1 || dl_grants[t_tx_dl].cfi > 3) { + Error("Invalid CFI=%d\n", dl_grants[t_tx_dl].cfi); goto unlock; } - + // Get UL scheduling for the TX TTI from MAC - if (mac->get_ul_sched(tti_sched_ul, &ul_grants[t_sched_ul]) < 0) { + if (mac->get_ul_sched(tti_tx_ul, &ul_grants[t_tx_ul]) < 0) { Error("Getting UL scheduling from MAC\n"); goto unlock; - } - + } + // Put base signals (references, PBCH, PCFICH and PSS/SSS) into the resource grid srslte_enb_dl_clear_sf(&enb_dl); - srslte_enb_dl_set_cfi(&enb_dl, dl_grants[t_tx].cfi); - srslte_enb_dl_put_base(&enb_dl, tti_tx); + srslte_enb_dl_set_cfi(&enb_dl, dl_grants[t_tx_dl].cfi); + srslte_enb_dl_put_base(&enb_dl, tti_tx_dl); + + // Put UL/DL grants to resource grid. PDSCH data will be encoded as well. + encode_pdcch_dl(dl_grants[t_tx_dl].sched_grants, dl_grants[t_tx_dl].nof_grants); + encode_pdcch_ul(ul_grants[t_tx_ul].sched_grants, ul_grants[t_tx_ul].nof_grants); + encode_pdsch(dl_grants[t_tx_dl].sched_grants, dl_grants[t_tx_dl].nof_grants); - // Put UL/DL grants to resource grid. PDSCH data will be encoded as well. - encode_pdcch_dl(dl_grants[t_tx].sched_grants, dl_grants[t_tx].nof_grants); - encode_pdcch_ul(ul_grants[t_sched_ul].sched_grants, ul_grants[t_sched_ul].nof_grants); - encode_pdsch(dl_grants[t_tx].sched_grants, dl_grants[t_tx].nof_grants); - // Put pending PHICH HARQ ACK/NACK indications into subframe - encode_phich(ul_grants[t_sched_ul].phich, ul_grants[t_sched_ul].nof_phich); - - // Prepare for receive ACK for DL grants in t_tx+4 - phy->ack_clear(TTIMOD(HARQ_TX(sf_tx))); - for (uint32_t i=0;iack_clear(TTIMOD(TTI_TX(t_tx_dl))); + for (uint32_t i=0;i= SRSLTE_CRNTI_START && dl_grants[t_tx].sched_grants[i].rnti <= SRSLTE_CRNTI_END) { - phy->ack_set_pending(TTIMOD(HARQ_TX(sf_tx)), dl_grants[t_tx].sched_grants[i].rnti, dl_grants[t_tx].sched_grants[i].location.ncce); + if (dl_grants[t_tx_dl].sched_grants[i].rnti >= SRSLTE_CRNTI_START && dl_grants[t_tx_dl].sched_grants[i].rnti <= SRSLTE_CRNTI_END) { + phy->ack_set_pending(TTIMOD(TTI_TX(t_tx_dl)), dl_grants[t_tx_dl].sched_grants[i].rnti, dl_grants[t_tx_dl].sched_grants[i].location.ncce); } } - + // Generate signal and transmit - srslte_enb_dl_gen_signal(&enb_dl, signal_buffer_tx); + srslte_enb_dl_gen_signal(&enb_dl, signal_buffer_tx); Debug("Sending to radio\n"); phy->worker_end(tx_mutex_cnt, signal_buffer_tx, SRSLTE_SF_LEN_PRB(phy->cell.nof_prb), tx_time); @@ -347,35 +347,35 @@ void phch_worker::work_imp() #endif #ifdef DEBUG_WRITE_FILE - if (tti_tx == 10) { + if (tti_tx_dl == 10) { fclose(f); exit(-1); } -#endif - +#endif + /* Tell the plotting thread to draw the plots */ #ifdef ENABLE_GUI if ((int) get_id() == plot_worker_id) { - sem_post(&plot_sem); + sem_post(&plot_sem); } #endif unlock: - pthread_mutex_unlock(&mutex); + pthread_mutex_unlock(&mutex); } int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) { - srslte_uci_data_t uci_data; + srslte_uci_data_t uci_data; bzero(&uci_data, sizeof(srslte_uci_data_t)); - - uint32_t wideband_cqi_value = 0; - - uint32_t n_rb_ho = 0; + + uint32_t wideband_cqi_value = 0; + + uint32_t n_rb_ho = 0; for (uint32_t i=0;iack_is_pending(t_rx, rnti)) { - uci_data.uci_ack_len = 1; + uci_data.uci_ack_len = 1; } - // Configure PUSCH CQI channel + // Configure PUSCH CQI channel srslte_cqi_value_t cqi_value; - bool cqi_enabled = false; + bool cqi_enabled = false; if (ue_db[rnti].cqi_en && srslte_cqi_send(ue_db[rnti].pmi_idx, tti_rx)) { cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; - cqi_enabled = true; + cqi_enabled = true; } else if (grants[i].grant.cqi_request) { cqi_value.type = SRSLTE_CQI_TYPE_SUBBAND_HL; cqi_value.subband_hl.N = (phy->cell.nof_prb > 7) ? srslte_cqi_hl_get_no_subbands(phy->cell.nof_prb) : 0; - cqi_enabled = true; + cqi_enabled = true; } if (cqi_enabled) { uci_data.uci_cqi_len = srslte_cqi_size(&cqi_value); } - - // mark this tti as having an ul grant to avoid pucch - ue_db[rnti].has_grant_tti = tti_rx; - - srslte_ra_ul_grant_t phy_grant; + + // mark this tti as having an ul grant to avoid pucch + ue_db[rnti].has_grant_tti = tti_rx; + + srslte_ra_ul_grant_t phy_grant; int res = -1; if (!srslte_ra_ul_dci_to_grant(&grants[i].grant, enb_ul.cell.nof_prb, n_rb_ho, &phy_grant, tti_rx%8)) { if (phy_grant.mcs.mod == SRSLTE_MOD_64QAM) { @@ -414,27 +414,27 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) } phy_grant.Qm = SRSLTE_MIN(phy_grant.Qm, 4); res = srslte_enb_ul_get_pusch(&enb_ul, &phy_grant, grants[i].softbuffer, - rnti, grants[i].rv_idx, - grants[i].current_tx_nb, - grants[i].data, - &uci_data, + rnti, grants[i].rv_idx, + grants[i].current_tx_nb, + grants[i].data, + &uci_data, sf_rx); } else { Error("Computing PUSCH grant\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } - + #ifdef LOG_EXECTIME gettimeofday(&t[2], NULL); get_time_interval(t); snprintf(timestr, 64, ", dec_time=%4d us", (int) t[0].tv_usec); #endif - - bool crc_res = (res == 0); - + + bool crc_res = (res == 0); + // Save PHICH scheduling for this user. Each user can have just 1 PUSCH grant per TTI - ue_db[rnti].phich_info.n_prb_lowest = enb_ul.pusch_cfg.grant.n_prb_tilde[0]; - ue_db[rnti].phich_info.n_dmrs = phy_grant.ncs_dmrs; + ue_db[rnti].phich_info.n_prb_lowest = enb_ul.pusch_cfg.grant.n_prb_tilde[0]; + ue_db[rnti].phich_info.n_dmrs = phy_grant.ncs_dmrs; char cqi_str[64]; if (cqi_enabled) { @@ -446,8 +446,8 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) } snprintf(cqi_str, 64, ", cqi=%d", wideband_cqi_value); } - - float snr_db = 10*log10(srslte_chest_ul_get_snr(&enb_ul.chest)); + + float snr_db = 10*log10(srslte_chest_ul_get_snr(&enb_ul.chest)); /* if (!crc_res && enb_ul.pusch_cfg.grant.L_prb == 1 && enb_ul.pusch_cfg.grant.n_prb[0] == 0 && snr_db > 5) { @@ -456,8 +456,8 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) srslte_vec_save_file("d", enb_ul.pusch.d, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); srslte_vec_save_file("ce2", enb_ul.pusch.ce, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); srslte_vec_save_file("z", enb_ul.pusch.z, sizeof(cf_t)*enb_ul.pusch_cfg.nbits.nof_re); - printf("saved sf_idx=%d, mcs=%d, tbs=%d, rnti=%d, rv=%d, snr=%.1f\n", tti%10, - grants[i].grant.mcs_idx, enb_ul.pusch_cfg.cb_segm.tbs, rnti, grants[i].rv_idx, snr_db); + printf("saved sf_idx=%d, mcs=%d, tbs=%d, rnti=%d, rv=%d, snr=%.1f\n", tti%10, + grants[i].grant.mcs_idx, enb_ul.pusch_cfg.cb_segm.tbs, rnti, grants[i].rv_idx, snr_db); exit(-1); } */ @@ -465,120 +465,121 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) "PUSCH: rnti=0x%x, prb=(%d,%d), tbs=%d, mcs=%d, rv=%d, snr=%.1f dB, n_iter=%d, crc=%s%s%s%s\n", rnti, phy_grant.n_prb[0], phy_grant.n_prb[0]+phy_grant.L_prb, phy_grant.mcs.tbs/8, phy_grant.mcs.idx, grants[i].grant.rv_idx, - snr_db, + snr_db, srslte_pusch_last_noi(&enb_ul.pusch), crc_res?"OK":"KO", uci_data.uci_ack_len>0?(uci_data.uci_ack?", ack=1":", ack=0"):"", - uci_data.uci_cqi_len>0?cqi_str:"", - timestr); - - // Notify MAC of RL status + uci_data.uci_cqi_len>0?cqi_str:"", + timestr); + + // Notify MAC of RL status if (grants[i].grant.rv_idx == 0) { if (res && snr_db < PUSCH_RL_SNR_DB_TH) { Debug("PUSCH: Radio-Link failure snr=%.1f dB\n", snr_db); phy->mac->rl_failure(rnti); } else { phy->mac->rl_ok(rnti); - } + } } - + // Notify MAC new received data and HARQ Indication value - phy->mac->crc_info(tti_rx, rnti, phy_grant.mcs.tbs/8, crc_res); + phy->mac->crc_info(tti_rx, rnti, phy_grant.mcs.tbs/8, crc_res); if (uci_data.uci_ack_len) { phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (crc_res || snr_db > PUSCH_RL_SNR_DB_TH)); } - - // Notify MAC of UL SNR and DL CQI + + // Notify MAC of UL SNR and DL CQI if (snr_db >= PUSCH_RL_SNR_DB_TH) { phy->mac->snr_info(tti_rx, rnti, snr_db); } if (uci_data.uci_cqi_len>0 && crc_res) { phy->mac->cqi_info(tti_rx, rnti, wideband_cqi_value); } - - // Save metrics stats + + // Save metrics stats ue_db[rnti].metrics_ul(phy_grant.mcs.idx, 0, snr_db, srslte_pusch_last_noi(&enb_ul.pusch)); - } + } } - return SRSLTE_SUCCESS; + return SRSLTE_SUCCESS; } int phch_worker::decode_pucch() { srslte_uci_data_t uci_data; - + for(std::map::iterator iter=ue_db.begin(); iter!=ue_db.end(); ++iter) { uint16_t rnti = (uint16_t) iter->first; if (rnti >= SRSLTE_CRNTI_START && rnti <= SRSLTE_CRNTI_END && ue_db[rnti].has_grant_tti != (int) tti_rx) { - // Check if user needs to receive PUCCH - bool needs_pucch = false, needs_ack=false, needs_sr=false, needs_cqi=false; + // Check if user needs to receive PUCCH + bool needs_pucch = false, needs_ack=false, needs_sr=false, needs_cqi=false; uint32_t last_n_pdcch = 0; bzero(&uci_data, sizeof(srslte_uci_data_t)); - + if (ue_db[rnti].I_sr_en) { if (srslte_ue_ul_sr_send_tti(ue_db[rnti].I_sr, tti_rx)) { - needs_pucch = true; - needs_sr = true; - uci_data.scheduling_request = true; + needs_pucch = true; + needs_sr = true; + uci_data.scheduling_request = true; } - } + } + if (phy->ack_is_pending(t_rx, rnti, &last_n_pdcch)) { - needs_pucch = true; - needs_ack = true; - uci_data.uci_ack_len = 1; + needs_pucch = true; + needs_ack = true; + uci_data.uci_ack_len = 1; } srslte_cqi_value_t cqi_value; if (ue_db[rnti].cqi_en && (ue_db[rnti].pucch_cqi_ack || !needs_ack)) { if (srslte_cqi_send(ue_db[rnti].pmi_idx, tti_rx)) { - needs_pucch = true; - needs_cqi = true; - cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; + needs_pucch = true; + needs_cqi = true; + cqi_value.type = SRSLTE_CQI_TYPE_WIDEBAND; uci_data.uci_cqi_len = srslte_cqi_size(&cqi_value); } } - + if (needs_pucch) { - if (srslte_enb_ul_get_pucch(&enb_ul, rnti, last_n_pdcch, t_rx, &uci_data)) { + if (srslte_enb_ul_get_pucch(&enb_ul, rnti, last_n_pdcch, sf_rx, &uci_data)) { fprintf(stderr, "Error getting PUCCH\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } if (uci_data.uci_ack_len > 0) { - phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (srslte_pucch_get_last_corr(&enb_ul.pucch) >= PUCCH_RL_CORR_TH)); + phy->mac->ack_info(tti_rx, rnti, uci_data.uci_ack && (srslte_pucch_get_last_corr(&enb_ul.pucch) >= PUCCH_RL_CORR_TH)); } if (uci_data.scheduling_request) { - phy->mac->sr_detected(tti_rx, rnti); + phy->mac->sr_detected(tti_rx, rnti); } - + char cqi_str[64]; if (uci_data.uci_cqi_len) { srslte_cqi_value_unpack(uci_data.uci_cqi, &cqi_value); phy->mac->cqi_info(tti_rx, rnti, cqi_value.wideband.wideband_cqi); sprintf(cqi_str, ", cqi=%d", cqi_value.wideband.wideband_cqi); } - log_h->info("PUCCH: rnti=0x%x, corr=%.2f, n_pucch=%d, n_prb=%d%s%s%s\n", - rnti, + log_h->info("PUCCH: rnti=0x%x, corr=%.2f, n_pucch=%d, n_prb=%d%s%s%s\n", + rnti, srslte_pucch_get_last_corr(&enb_ul.pucch), enb_ul.pucch.last_n_pucch, enb_ul.pucch.last_n_prb, - needs_ack?(uci_data.uci_ack?", ack=1":", ack=0"):"", - needs_sr?(uci_data.scheduling_request?", sr=yes":", sr=no"):"", - needs_cqi?cqi_str:""); + needs_ack?(uci_data.uci_ack?", ack=1":", ack=0"):"", + needs_sr?(uci_data.scheduling_request?", sr=yes":", sr=no"):"", + needs_cqi?cqi_str:""); - // Notify MAC of RL status + // Notify MAC of RL status if (!needs_sr) { if (srslte_pucch_get_last_corr(&enb_ul.pucch) < PUCCH_RL_CORR_TH) { Debug("PUCCH: Radio-Link failure corr=%.1f\n", srslte_pucch_get_last_corr(&enb_ul.pucch)); phy->mac->rl_failure(rnti); } else { phy->mac->rl_ok(rnti); - } - } + } + } } } - } - return 0; + } + return 0; } @@ -587,15 +588,15 @@ int phch_worker::encode_phich(srslte_enb_dl_phich_t *acks, uint32_t nof_acks) for (uint32_t i=0;iinfo_hex(ptr, len, - "PDSCH: rnti=0x%x, l_crb=%2d, %s, harq=%d, tbs=%d, mcs=%d, rv=%d, tti_tx=%d\n", - rnti, phy_grant.nof_prb, grant_str, grants[i].grant.harq_process, - phy_grant.mcs[0].tbs/8, phy_grant.mcs[0].idx, grants[i].grant.rv_idx, tti_tx); + "PDSCH: rnti=0x%x, l_crb=%2d, %s, harq=%d, tbs=%d, mcs=%d, rv=%d, tti_tx_dl=%d\n", + rnti, phy_grant.nof_prb, grant_str, grants[i].grant.harq_process, + phy_grant.mcs[0].tbs/8, phy_grant.mcs[0].idx, grants[i].grant.rv_idx, tti_tx_dl); } srslte_softbuffer_tx_t *sb[SRSLTE_MAX_CODEWORDS] = {grants[i].softbuffer, NULL}; diff --git a/srsenb/src/phy/txrx.cc b/srsenb/src/phy/txrx.cc index 9427e3459..fa14b0b82 100644 --- a/srsenb/src/phy/txrx.cc +++ b/srsenb/src/phy/txrx.cc @@ -115,7 +115,7 @@ void txrx::run_thread() /* Compute TX time: Any transmission happens in TTI+4 thus advance 4 ms the reception time */ srslte_timestamp_copy(&tx_time, &rx_time); - srslte_timestamp_add(&tx_time, 0, 4e-3); + srslte_timestamp_add(&tx_time, 0, HARQ_DELAY_MS*1e-3); Debug("Settting TTI=%d, tx_mutex=%d, tx_time=%d:%f to worker %d\n", tti, tx_mutex_cnt, diff --git a/srsue/hdr/mac/mux.h b/srsue/hdr/mac/mux.h index 1167af752..ab081070c 100644 --- a/srsue/hdr/mac/mux.h +++ b/srsue/hdr/mac/mux.h @@ -82,8 +82,7 @@ private: const static int MIN_RLC_SDU_LEN = 0; const static int MAX_NOF_SUBHEADERS = 20; - const static int MAX_HARQ_PROC = 8; - + std::vector lch; // Keep track of the PIDs that transmitted BSR reports diff --git a/srsue/src/phy/phch_common.cc b/srsue/src/phy/phch_common.cc index 489d27197..549783fd9 100644 --- a/srsue/src/phy/phch_common.cc +++ b/srsue/src/phy/phch_common.cc @@ -136,12 +136,16 @@ srslte::radio* phch_common::get_radio() void phch_common::set_rar_grant(uint32_t tti, uint8_t grant_payload[SRSLTE_RAR_GRANT_LEN]) { srslte_dci_rar_grant_unpack(&rar_grant, grant_payload); - rar_grant_pending = true; - // PUSCH is at n+6 or n+7 and phch_worker assumes default delay of 4 ttis + rar_grant_pending = true; + int delay = MSG3_DELAY_MS-HARQ_DELAY_MS; + if (delay < 0) { + fprintf(stderr, "Error MSG3_DELAY_MS can't be lower than HARQ_DELAY_MS\n"); + delay = 0; + } if (rar_grant.ul_delay) { - rar_grant_tti = (tti + 3) % 10240; + rar_grant_tti = (tti + delay + 1) % 10240; } else { - rar_grant_tti = (tti + 2) % 10240; + rar_grant_tti = (tti + delay) % 10240; } } diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index e6c0050e2..adeb41603 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -292,6 +292,13 @@ void phch_worker::work_imp() } } } + + // Process RAR before UL to enable zero-delay Msg3 + bool rar_delivered = false; + if (HARQ_DELAY_MS == MSG3_DELAY_MS && dl_mac_grant.rnti_type == SRSLTE_RNTI_RAR) { + rar_delivered = true; + phy->mac->tb_decoded(dl_ack[0], 0, dl_mac_grant.rnti_type, dl_mac_grant.pid); + } // Decode PHICH bool ul_ack = false; @@ -313,8 +320,8 @@ void phch_worker::work_imp() set_uci_periodic_cqi(); } - /* TTI offset for UL is always 4 for LTE */ - ul_action.tti_offset = 4; + /* TTI offset for UL */ + ul_action.tti_offset = HARQ_DELAY_MS; /* Send UL grant or HARQ information (from PHICH) to MAC */ if (ul_grant_available && ul_ack_available) { @@ -335,7 +342,7 @@ void phch_worker::work_imp() &ul_action.softbuffers[0], ul_action.rv[0], ul_action.rnti, ul_mac_grant.is_from_rar); signal_ready = true; if (ul_action.expect_ack) { - phy->set_pending_ack(HARQ_RX(tti), ue_ul.pusch_cfg.grant.n_prb_tilde[0], ul_action.phy_grant.ul.ncs_dmrs); + phy->set_pending_ack(TTI_RX_ACK(tti), ue_ul.pusch_cfg.grant.n_prb_tilde[0], ul_action.phy_grant.ul.ncs_dmrs); } } else if (dl_action.generate_ack || uci_data.scheduling_request || uci_data.uci_cqi_len > 0) { @@ -357,7 +364,7 @@ void phch_worker::work_imp() if (!dl_action.generate_ack_callback) { if (dl_mac_grant.rnti_type == SRSLTE_RNTI_PCH && dl_action.decode_enabled[0]) { phy->mac->pch_decoded_ok(dl_mac_grant.n_bytes[0]); - } else { + } else if (!rar_delivered) { for (uint32_t tb = 0; tb < SRSLTE_MAX_TB; tb++) { if (dl_action.decode_enabled[tb]) { phy->mac->tb_decoded(dl_ack[tb], tb, dl_mac_grant.rnti_type, dl_mac_grant.pid); @@ -475,7 +482,7 @@ bool phch_worker::decode_pdcch_dl(srsue::mac_interface_phy::mac_grant_t* grant) /* Fill MAC grant structure */ grant->ndi[0] = dci_unpacked.ndi; grant->ndi[1] = dci_unpacked.ndi_1; - grant->pid = dci_unpacked.harq_process; + grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); grant->n_bytes[0] = grant->phy_grant.dl.mcs[0].tbs / (uint32_t) 8; grant->n_bytes[1] = grant->phy_grant.dl.mcs[1].tbs / (uint32_t) 8; grant->tti = tti; @@ -663,7 +670,7 @@ bool phch_worker::decode_pdcch_ul(mac_interface_phy::mac_grant_t* grant) char timestr[64]; timestr[0]='\0'; - phy->reset_pending_ack(HARQ_RX(tti)); + phy->reset_pending_ack(TTI_RX_ACK(tti)); srslte_dci_msg_t dci_msg; srslte_ra_ul_dci_t dci_unpacked; @@ -776,7 +783,7 @@ void phch_worker::set_uci_sr() { uci_data.scheduling_request = false; if (phy->sr_enabled) { - uint32_t sr_tx_tti = HARQ_TX(tti); + uint32_t sr_tx_tti = TTI_TX(tti); // Get I_sr parameter if (srslte_ue_ul_sr_send_tti(I_sr, sr_tx_tti)) { Info("PUCCH: SR transmission at TTI=%d, I_sr=%d\n", sr_tx_tti, I_sr); @@ -793,7 +800,7 @@ void phch_worker::set_uci_periodic_cqi() int cqi_max = phy->args->cqi_max; if (period_cqi.configured && rnti_is_set) { - if (period_cqi.ri_idx_present && srslte_ri_send(period_cqi.pmi_idx, period_cqi.ri_idx, HARQ_TX(tti))) { + if (period_cqi.ri_idx_present && srslte_ri_send(period_cqi.pmi_idx, period_cqi.ri_idx, TTI_TX(tti))) { if (uci_data.uci_ri_len) { uci_data.uci_cqi[0] = uci_data.uci_ri; uci_data.uci_cqi_len = uci_data.uci_ri_len; @@ -802,7 +809,7 @@ void phch_worker::set_uci_periodic_cqi() uci_data.uci_pmi_len = 0; Info("PUCCH: Periodic RI=%d\n", uci_data.uci_cqi[0]); } - } else if (srslte_cqi_send(period_cqi.pmi_idx, HARQ_TX(tti))) { + } else if (srslte_cqi_send(period_cqi.pmi_idx, TTI_TX(tti))) { srslte_cqi_value_t cqi_report; if (period_cqi.format_is_subband) { // TODO: Implement subband periodic reports @@ -868,8 +875,8 @@ void phch_worker::set_uci_aperiodic_cqi() bool phch_worker::srs_is_ready_to_send() { if (srs_cfg.configured) { - if (srslte_refsignal_srs_send_cs(srs_cfg.subframe_config, HARQ_RX(tti)%10) == 1 && - srslte_refsignal_srs_send_ue(srs_cfg.I_srs, HARQ_TX(tti)) == 1) + if (srslte_refsignal_srs_send_cs(srs_cfg.subframe_config, TTI_TX(tti)%10) == 1 && + srslte_refsignal_srs_send_ue(srs_cfg.I_srs, TTI_TX(tti)) == 1) { return true; } @@ -889,7 +896,7 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui char timestr[64]; timestr[0]='\0'; - if (srslte_ue_ul_cfg_grant(&ue_ul, grant, HARQ_TX(tti), rv, current_tx_nb)) { + if (srslte_ue_ul_cfg_grant(&ue_ul, grant, TTI_TX(tti), rv, current_tx_nb)) { Error("Configuring UL grant\n"); } @@ -919,7 +926,7 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui #endif Info("PUSCH: tti_tx=%d, n_prb=%d, rb_start=%d, tbs=%d, mod=%d, mcs=%d, rv_idx=%d, ack=%s, ri=%s, cfo=%.1f Hz%s\n", - HARQ_TX(tti), + TTI_TX(tti), grant->L_prb, grant->n_prb[0], grant->mcs.tbs/8, grant->mcs.mod, grant->mcs.idx, rv, uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", @@ -950,7 +957,7 @@ void phch_worker::encode_pucch() gettimeofday(&t[1], NULL); #endif - if (srslte_ue_ul_pucch_encode(&ue_ul, uci_data, last_dl_pdcch_ncce, HARQ_TX(tti), signal_buffer[0])) { + if (srslte_ue_ul_pucch_encode(&ue_ul, uci_data, last_dl_pdcch_ncce, TTI_TX(tti), signal_buffer[0])) { Error("Encoding PUCCH\n"); } @@ -966,7 +973,7 @@ void phch_worker::encode_pucch() float gain = set_power(tx_power); Info("PUCCH: tti_tx=%d, n_cce=%3d, n_pucch=%d, n_prb=%d, ack=%s%s, ri=%s, pmi=%s%s, sr=%s, cfo=%.1f Hz%s\n", - HARQ_TX(tti), + TTI_TX(tti), last_dl_pdcch_ncce, ue_ul.pucch.last_n_pucch, ue_ul.pucch.last_n_prb, uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", uci_data.uci_ack_len>1?(uci_data.uci_ack_2?"1":"0"):"", @@ -987,7 +994,7 @@ void phch_worker::encode_srs() char timestr[64]; timestr[0]='\0'; - if (srslte_ue_ul_srs_encode(&ue_ul, HARQ_TX(tti), signal_buffer[0])) + if (srslte_ue_ul_srs_encode(&ue_ul, TTI_TX(tti), signal_buffer[0])) { Error("Encoding SRS\n"); } @@ -1002,7 +1009,7 @@ void phch_worker::encode_srs() float gain = set_power(tx_power); uint32_t fi = srslte_vec_max_fi((float*) signal_buffer, SRSLTE_SF_LEN_PRB(cell.nof_prb)); float *f = (float*) signal_buffer; - Info("SRS: power=%.2f dBm, tti_tx=%d%s\n", tx_power, HARQ_TX(tti), timestr); + Info("SRS: power=%.2f dBm, tti_tx=%d%s\n", tx_power, TTI_TX(tti), timestr); } From 5208c4c1602b63b814aee04b058f7e2119cf7d8f Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Sun, 8 Oct 2017 00:41:39 +0200 Subject: [PATCH 27/55] Removed unused code --- srsenb/src/phy/phch_common.cc | 4 ---- 1 file changed, 4 deletions(-) diff --git a/srsenb/src/phy/phch_common.cc b/srsenb/src/phy/phch_common.cc index c4b94153c..061743274 100644 --- a/srsenb/src/phy/phch_common.cc +++ b/srsenb/src/phy/phch_common.cc @@ -128,10 +128,6 @@ bool phch_common::ack_is_pending(uint32_t sf_idx, uint16_t rnti, uint32_t *last_ bool ret = pending_ack[rnti].is_pending[sf_idx]; pending_ack[rnti].is_pending[sf_idx] = false; - if (ret) { - - } - if (ret && last_n_pdcch) { *last_n_pdcch = pending_ack[rnti].n_pdcch[sf_idx]; } From 1a5cf45ddacb16271bf70df002ff7ad881a84690 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Mon, 9 Oct 2017 16:30:32 +0200 Subject: [PATCH 28/55] Solved compilation error for SSE (Tested in Atom) --- lib/include/srslte/phy/utils/simd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 2590794f2..08eed115f 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -738,7 +738,7 @@ typedef __m256 simd_sel_t; #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE typedef __m128i simd_i_t; -typedef __m128i simd_sel_t; +typedef __m128 simd_sel_t; #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -807,7 +807,7 @@ static inline simd_sel_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { return _mm256_cmp_ps(a, b, _CMP_GT_OS); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - return (simd_i_t) _mm_cmpgt_ps(a, b); + return (simd_sel_t) _mm_cmpgt_ps(a, b); #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ From a180b5ebacfdfa8f90add5f764608ee57be3a8fd Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Tue, 10 Oct 2017 12:06:24 +0200 Subject: [PATCH 29/55] Msg3 delay is added to harq delay --- lib/include/srslte/common/common.h | 2 +- srsenb/src/mac/scheduler.cc | 2 +- srsue/src/phy/phch_common.cc | 10 ++++------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/lib/include/srslte/common/common.h b/lib/include/srslte/common/common.h index fd785fbca..6372af73e 100644 --- a/lib/include/srslte/common/common.h +++ b/lib/include/srslte/common/common.h @@ -45,7 +45,7 @@ #define SRSLTE_N_RADIO_BEARERS 11 #define HARQ_DELAY_MS 4 -#define MSG3_DELAY_MS 6 +#define MSG3_DELAY_MS 2 // Delay added to HARQ_DELAY_MS #define TTI_TX(tti) ((tti+HARQ_DELAY_MS)%10240) #define TTI_RX_ACK(tti) ((tti+(2*HARQ_DELAY_MS))%10240) diff --git a/srsenb/src/mac/scheduler.cc b/srsenb/src/mac/scheduler.cc index 21d60652d..a7e3d12d6 100644 --- a/srsenb/src/mac/scheduler.cc +++ b/srsenb/src/mac/scheduler.cc @@ -541,7 +541,7 @@ int sched::dl_sched_rar(dl_sched_rar_t rar[MAX_RAR_LIST]) pending_rar[j].rar_tti = 0; // Save UL resources - uint32_t pending_tti=(current_tti+MSG3_DELAY_MS)%10; + uint32_t pending_tti=(current_tti+MSG3_DELAY_MS+HARQ_DELAY_MS)%10; pending_msg3[pending_tti].enabled = true; pending_msg3[pending_tti].rnti = pending_rar[j].rnti; pending_msg3[pending_tti].L = L_prb; diff --git a/srsue/src/phy/phch_common.cc b/srsue/src/phy/phch_common.cc index 549783fd9..7d2948c1a 100644 --- a/srsue/src/phy/phch_common.cc +++ b/srsue/src/phy/phch_common.cc @@ -137,15 +137,13 @@ void phch_common::set_rar_grant(uint32_t tti, uint8_t grant_payload[SRSLTE_RAR_G { srslte_dci_rar_grant_unpack(&rar_grant, grant_payload); rar_grant_pending = true; - int delay = MSG3_DELAY_MS-HARQ_DELAY_MS; - if (delay < 0) { - fprintf(stderr, "Error MSG3_DELAY_MS can't be lower than HARQ_DELAY_MS\n"); - delay = 0; + if (MSG3_DELAY_MS < 0) { + fprintf(stderr, "Error MSG3_DELAY_MS can't be negative\n"); } if (rar_grant.ul_delay) { - rar_grant_tti = (tti + delay + 1) % 10240; + rar_grant_tti = (tti + MSG3_DELAY_MS + 1) % 10240; } else { - rar_grant_tti = (tti + delay) % 10240; + rar_grant_tti = (tti + MSG3_DELAY_MS) % 10240; } } From fda886407b7bebe2537ee608687f74f5dae9cf81 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Tue, 10 Oct 2017 12:33:10 +0200 Subject: [PATCH 30/55] Added option to force the DL/UL frequency at the UE --- srsenb/enb.conf.example | 4 +++- srsue/hdr/phy/phch_recv.h | 5 +++++ srsue/hdr/phy/phy.h | 3 ++- srsue/src/main.cc | 2 ++ srsue/src/phy/phch_recv.cc | 29 ++++++++++++++++++++++------- srsue/src/phy/phy.cc | 5 +++++ srsue/src/ue.cc | 4 ++++ srsue/ue.conf.example | 2 ++ 8 files changed, 45 insertions(+), 9 deletions(-) diff --git a/srsenb/enb.conf.example b/srsenb/enb.conf.example index 44e546116..7f023d0c6 100644 --- a/srsenb/enb.conf.example +++ b/srsenb/enb.conf.example @@ -45,7 +45,9 @@ drb_config = drb.conf # tx_gain: Transmit gain (dB). # rx_gain: Optional receive gain (dB). If disabled, AGC if enabled # -# Optional parameters: +# Optional parameters: +# dl_freq: Override DL frequency corresponding to dl_earfcn +# ul_freq: Override UL frequency corresponding to dl_earfcn (must be set if dl_freq is set) # device_name: Device driver family. Supported options: "auto" (uses first found), "UHD" or "bladeRF" # device_args: Arguments for the device driver. Options are "auto" or any string. # Default for UHD: "recv_frame_size=9232,send_frame_size=9232" diff --git a/srsue/hdr/phy/phch_recv.h b/srsue/hdr/phy/phch_recv.h index 044960760..c51622e53 100644 --- a/srsue/hdr/phy/phch_recv.h +++ b/srsue/hdr/phy/phch_recv.h @@ -53,6 +53,7 @@ public: void set_agc_enable(bool enable); void set_earfcn(std::vector earfcn); + void force_freq(float dl_freq, float ul_freq); void reset_sync(); void cell_search_start(); @@ -171,6 +172,10 @@ private: int cell_meas_rsrp(); int cell_search(int force_N_id_2 = -1); bool set_cell(); + + float dl_freq; + float ul_freq; + }; } // namespace srsue diff --git a/srsue/hdr/phy/phy.h b/srsue/hdr/phy/phy.h index 0c77360b2..07a2713fa 100644 --- a/srsue/hdr/phy/phy.h +++ b/srsue/hdr/phy/phy.h @@ -76,6 +76,7 @@ public: void write_trace(std::string filename); void set_earfcn(std::vector earfcns); + void force_freq(float dl_freq, float ul_freq); /********** RRC INTERFACE ********************/ void reset(); @@ -167,7 +168,7 @@ private: /* Current time advance */ uint32_t n_ta; - + bool init_(srslte::radio *radio_handler, mac_interface_phy *mac, srslte::log *log_h, bool do_agc, uint32_t nof_workers); void set_default_args(phy_args_t *args); bool check_args(phy_args_t *args); diff --git a/srsue/src/main.cc b/srsue/src/main.cc index 569083998..47d5f807e 100644 --- a/srsue/src/main.cc +++ b/srsue/src/main.cc @@ -65,6 +65,8 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { common.add_options() ("rf.dl_earfcn", bpo::value(&args->rf.dl_earfcn)->default_value(3400), "Downlink EARFCN") ("rf.freq_offset", bpo::value(&args->rf.freq_offset)->default_value(0), "(optional) Frequency offset") + ("rf.dl_freq", bpo::value(&args->rf.dl_freq)->default_value(-1), "Downlink Frequency (if positive overrides EARFCN)") + ("rf.ul_freq", bpo::value(&args->rf.ul_freq)->default_value(-1), "Uplink Frequency (if positive overrides EARFCN)") ("rf.rx_gain", bpo::value(&args->rf.rx_gain)->default_value(-1), "Front-end receiver gain") ("rf.tx_gain", bpo::value(&args->rf.tx_gain)->default_value(-1), "Front-end transmitter gain") ("rf.nof_rx_ant", bpo::value(&args->rf.nof_rx_ant)->default_value(1), "Number of RX antennas") diff --git a/srsue/src/phy/phch_recv.cc b/srsue/src/phy/phch_recv.cc index e10c013fe..8642460aa 100644 --- a/srsue/src/phy/phch_recv.cc +++ b/srsue/src/phy/phch_recv.cc @@ -63,6 +63,8 @@ double callback_set_rx_gain(void *h, double gain) { phch_recv::phch_recv() { + dl_freq = -1; + ul_freq = -1; bzero(&cell, sizeof(srslte_cell_t)); running = false; } @@ -445,6 +447,11 @@ void phch_recv::set_earfcn(std::vector earfcn) { this->earfcn = earfcn; } +void phch_recv::force_freq(float dl_freq, float ul_freq) { + this->dl_freq = dl_freq; + this->ul_freq = ul_freq; +} + bool phch_recv::stop_sync() { wait_radio_reset(); @@ -568,17 +575,25 @@ bool phch_recv::cell_select(uint32_t earfcn, srslte_cell_t cell) { bool phch_recv::set_frequency() { - double dl_freq = 1e6*srslte_band_fd(current_earfcn); - double ul_freq = 1e6*srslte_band_fu(srslte_band_ul_earfcn(current_earfcn)); - if (dl_freq > 0 && ul_freq > 0) { + double set_dl_freq = 0; + double set_ul_freq = 0; + + if (this->dl_freq > 0 && this->ul_freq > 0) { + set_dl_freq = this->dl_freq; + set_ul_freq = this->ul_freq; + } else { + set_dl_freq = 1e6*srslte_band_fd(current_earfcn); + set_ul_freq = 1e6*srslte_band_fu(srslte_band_ul_earfcn(current_earfcn)); + } + if (set_dl_freq > 0 && set_ul_freq > 0) { log_h->info("SYNC: Set DL EARFCN=%d, f_dl=%.1f MHz, f_ul=%.1f MHz\n", - current_earfcn, dl_freq / 1e6, ul_freq / 1e6); + current_earfcn, set_dl_freq / 1e6, set_ul_freq / 1e6); log_h->console("Searching cell in DL EARFCN=%d, f_dl=%.1f MHz, f_ul=%.1f MHz\n", - current_earfcn, dl_freq / 1e6, ul_freq / 1e6); + current_earfcn, set_dl_freq / 1e6, set_ul_freq / 1e6); - radio_h->set_rx_freq(dl_freq); - radio_h->set_tx_freq(ul_freq); + radio_h->set_rx_freq(set_dl_freq); + radio_h->set_tx_freq(set_ul_freq); ul_dl_factor = radio_h->get_tx_freq()/radio_h->get_rx_freq(); srslte_ue_sync_reset(&ue_sync); diff --git a/srsue/src/phy/phy.cc b/srsue/src/phy/phy.cc index e74107661..df29726d5 100644 --- a/srsue/src/phy/phy.cc +++ b/srsue/src/phy/phy.cc @@ -332,6 +332,11 @@ void phy::set_earfcn(vector< uint32_t > earfcns) sf_recv.set_earfcn(earfcns); } +void phy::force_freq(float dl_freq, float ul_freq) +{ + sf_recv.force_freq(dl_freq, ul_freq); +} + bool phy::sync_status() { return sf_recv.status_is_sync(); diff --git a/srsue/src/ue.cc b/srsue/src/ue.cc index 92adaf8dd..3560a78fe 100644 --- a/srsue/src/ue.cc +++ b/srsue/src/ue.cc @@ -193,6 +193,10 @@ bool ue::init(all_args_t *args_) earfcn_list.push_back(args->rf.dl_earfcn); phy.set_earfcn(earfcn_list); + if (args->rf.dl_freq > 0 && args->rf.ul_freq > 0) { + phy.force_freq(args->rf.dl_freq, args->rf.ul_freq); + } + printf("Waiting PHY to initialize...\n"); phy.wait_initialize(); phy.configure_ul_params(); diff --git a/srsue/ue.conf.example b/srsue/ue.conf.example index 30c05069a..5d14d3c3c 100644 --- a/srsue/ue.conf.example +++ b/srsue/ue.conf.example @@ -9,6 +9,8 @@ # rx_gain: Optional receive gain (dB). If disabled, AGC if enabled # # Optional parameters: +# dl_freq: Override DL frequency corresponding to dl_earfcn +# ul_freq: Override UL frequency corresponding to dl_earfcn # nof_rx_ant: Number of RX antennas (Default 1, supported 1 or 2) # device_name: Device driver family. Supported options: "auto" (uses first found), "UHD" or "bladeRF" # device_args: Arguments for the device driver. Options are "auto" or any string. From 2019ca31eff3b7190cb6b2f3cf53734f95be9486 Mon Sep 17 00:00:00 2001 From: yagoda Date: Fri, 13 Oct 2017 15:35:48 +0100 Subject: [PATCH 31/55] adding neon support for new kernel structure --- lib/include/srslte/phy/utils/simd.h | 319 +++++++++++++++++++++++++--- 1 file changed, 292 insertions(+), 27 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 08eed115f..6e4185788 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -121,7 +121,17 @@ #define SRSLTE_SIMD_C16_SIZE 8 #else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON +#define SRSLTE_SIMD_F_SIZE 4 +#define SRSLTE_SIMD_CF_SIZE 4 + +#define SRSLTE_SIMD_I_SIZE 4 + +#define SRSLTE_SIMD_S_SIZE 8 +#define SRSLTE_SIMD_C16_SIZE 8 + +#else /* LV_HAVE_NEON */ #define SRSLTE_SIMD_F_SIZE 0 #define SRSLTE_SIMD_CF_SIZE 0 @@ -147,6 +157,10 @@ typedef __m256 simd_f_t; #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE typedef __m128 simd_f_t; +#else /* HAVE_NEON */ +#ifdef HAVE_NEON +typedef float32x4 simd_f_t; +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -161,6 +175,10 @@ static inline simd_f_t srslte_simd_f_load(float *ptr) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_load_ps(ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_f32(ptr); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -175,12 +193,16 @@ static inline simd_f_t srslte_simd_f_loadu(float *ptr) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_loadu_ps(ptr); +#else /* LV_HAVE_SSE */ + #ifdef HAVE_NEON + return vld1q_f32(ptr); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ } -static inline void srslte_simd_f_store(float *ptr, simd_f_t simdreg) { +static inline void srslte_simd_f_store(float *ptr, simd_f_t simdreg) {vst1q_f32 #ifdef LV_HAVE_AVX512 _mm512_store_ps(ptr, simdreg); #else /* LV_HAVE_AVX512 */ @@ -189,6 +211,10 @@ static inline void srslte_simd_f_store(float *ptr, simd_f_t simdreg) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE _mm_store_ps(ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_f32(ptr, simdreg); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -203,6 +229,10 @@ static inline void srslte_simd_f_storeu(float *ptr, simd_f_t simdreg) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE _mm_storeu_ps(ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_f32(ptr, simdreg); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -217,6 +247,10 @@ static inline simd_f_t srslte_simd_f_set1(float x) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_set1_ps(x); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_f32(x); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -231,6 +265,10 @@ static inline simd_f_t srslte_simd_f_mul(simd_f_t a, simd_f_t b) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_mul_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vmulq_f32(a,b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -245,6 +283,10 @@ static inline simd_f_t srslte_simd_f_rcp(simd_f_t a) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_rcp_ps(a); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vrecpeq_f32(a); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -265,6 +307,9 @@ static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { #endif /* LV_HAVE_AVX512 */ } + + + static inline simd_f_t srslte_simd_f_sub(simd_f_t a, simd_f_t b) { #ifdef LV_HAVE_AVX512 return _mm512_sub_ps(a, b); @@ -274,6 +319,10 @@ static inline simd_f_t srslte_simd_f_sub(simd_f_t a, simd_f_t b) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_sub_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vsubq_f32(a, b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -286,8 +335,12 @@ static inline simd_f_t srslte_simd_f_add(simd_f_t a, simd_f_t b) { #ifdef LV_HAVE_AVX2 return _mm256_add_ps(a, b); #else /* LV_HAVE_AVX2 */ -#ifdef LV_HAVE_SSE +#ifdef LV_HAVE_SSE return _mm_add_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vaddq_f32(a, b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -300,8 +353,12 @@ static inline simd_f_t srslte_simd_f_zero (void) { #ifdef LV_HAVE_AVX2 return _mm256_setzero_ps(); #else /* LV_HAVE_AVX2 */ -#ifdef LV_HAVE_SSE +#ifdef LV_HAVE_SSE return _mm_setzero_ps(); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_f32(0); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -344,6 +401,10 @@ static inline simd_f_t srslte_simd_f_hadd(simd_f_t a, simd_f_t b) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_hadd_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vcombine_f32( vpadd_f32( vget_low_f32(a), vget_high_f32(a) ), vpadd_f32( vget_low_f32(b), vget_high_f32(b) ) ); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -358,6 +419,10 @@ static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_sqrt_ps(a); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vrecpeq_f32(vrsqrteq_f32(a)); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -368,10 +433,15 @@ static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) { #if SRSLTE_SIMD_CF_SIZE +#ifdef HAVE_NEON + typedef float32x4x2_t simd_cf_t; +#else typedef struct { simd_f_t re; simd_f_t im; + } simd_cf_t; +#endif /* Complex Single precission Floating point functions */ static inline simd_cf_t srslte_simd_cfi_load(cf_t *ptr) { @@ -399,6 +469,10 @@ static inline simd_cf_t srslte_simd_cfi_load(cf_t *ptr) { __m128 i2 = _mm_load_ps((float*)(ptr + 2)); ret.re = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(2,0,2,0)); ret.im = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(3,1,3,1)); +#else +#ifdef HAVE_NEON + ret = vld2q_f32((float*)(ptr)); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -431,6 +505,10 @@ static inline simd_cf_t srslte_simd_cfi_loadu(cf_t *ptr) { __m128 i2 = _mm_loadu_ps((float*)(ptr + 2)); ret.re = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(2,0,2,0)); ret.im = _mm_shuffle_ps(i1, i2, _MM_SHUFFLE(3,1,3,1)); +#else +#ifdef HAVE_NEON + ret = vld2q_f32((float*)(ptr)); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -450,6 +528,11 @@ static inline simd_cf_t srslte_simd_cf_load(float *re, float *im) { #ifdef LV_HAVE_SSE ret.re = _mm_load_ps(re); ret.im = _mm_load_ps(im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + ret.val[0] = vld1q_f32(ptr); + ret.val[1] = vld1q_f32(ptr); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -469,6 +552,11 @@ static inline simd_cf_t srslte_simd_cf_loadu(float *re, float *im) { #ifdef LV_HAVE_SSE ret.re = _mm_loadu_ps(re); ret.im = _mm_loadu_ps(im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + ret.val[0] = vld1q_f32(ptr); + ret.val[1] = vld1q_f32(ptr); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -497,6 +585,10 @@ static inline void srslte_simd_cfi_store(cf_t *ptr, simd_cf_t simdreg) { #ifdef LV_HAVE_SSE _mm_store_ps((float*)(ptr), _mm_unpacklo_ps(simdreg.re, simdreg.im)); _mm_store_ps((float*)(ptr + 2), _mm_unpackhi_ps(simdreg.re, simdreg.im)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_f32((float*)(ptr), simdreg); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -524,6 +616,10 @@ static inline void srslte_simd_cfi_storeu(cf_t *ptr, simd_cf_t simdreg) { #ifdef LV_HAVE_SSE _mm_storeu_ps((float*)(ptr), _mm_unpacklo_ps(simdreg.re, simdreg.im)); _mm_storeu_ps((float*)(ptr + 2), _mm_unpackhi_ps(simdreg.re, simdreg.im)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_f32((float*)(ptr), simdreg); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -541,6 +637,11 @@ static inline void srslte_simd_cf_store(float *re, float *im, simd_cf_t simdreg) #ifdef LV_HAVE_SSE _mm_store_ps((float *) re, simdreg.re); _mm_store_ps((float *) im, simdreg.im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst1q_f32((float *) re, simdreg.val[0]); + vst1q_f32((float *) im, simdreg.val[1]); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -558,6 +659,11 @@ static inline void srslte_simd_cf_storeu(float *re, float *im, simd_cf_t simdreg #ifdef LV_HAVE_SSE _mm_storeu_ps((float *) re, simdreg.re); _mm_storeu_ps((float *) im, simdreg.im); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst1q_f32((float *) re, simdreg.val[0]); + vst1q_f32((float *) im, simdreg.val[1]); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -576,6 +682,11 @@ static inline simd_cf_t srslte_simd_cf_set1 (cf_t x) { #ifdef LV_HAVE_SSE ret.re = _mm_set1_ps(__real__ x); ret.im = _mm_set1_ps(__imag__ x); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + re.val[0] = vdupq_n_f32(__real__ x); + im.val[1] = vdupq_n_f32(__imag__ x); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -601,6 +712,13 @@ static inline simd_cf_t srslte_simd_cf_prod (simd_cf_t a, simd_cf_t b) { _mm_mul_ps(a.im, b.im)); ret.im = _mm_add_ps(_mm_mul_ps(a.re, b.im), _mm_mul_ps(a.im, b.re)); +#else +#ifdef HAVE_NEON + ret.val[0] = vsubq_f32(vmulq_f32(a.val[0],b.val[0]), + vmulq_f32(a.val[1],b.val[1])); + ret.val[1] = vaddq_f32(vmulq_f32(a.val[0],b.val[1]), + vmulq_f32(a.val[1],b.val[0])); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -626,6 +744,13 @@ static inline simd_cf_t srslte_simd_cf_conjprod (simd_cf_t a, simd_cf_t b) { _mm_mul_ps(a.im, b.im)); ret.im = _mm_sub_ps(_mm_mul_ps(a.im, b.re), _mm_mul_ps(a.re, b.im)); + #else +#ifdef HAVE_NEON + ret.val[0] = vaddq_f32(vmulq_f32(a.val[0],b.val[0]), + vmulq_f32(a.val[1],b.val[1])); + ret.val[1] = vsubq_f32(vmulq_f32(a.val[1],b.val[0]), + vmulq_f32(a.val[0],b.val[1])); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -645,6 +770,11 @@ static inline simd_cf_t srslte_simd_cf_add (simd_cf_t a, simd_cf_t b) { #ifdef LV_HAVE_SSE ret.re = _mm_add_ps(a.re, b.re); ret.im = _mm_add_ps(a.im, b.im); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vaddq_f32(a.val[0],a.val[0]); + ret.val[1] = vaddq_f32(a.val[1],a.val[1]); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -665,6 +795,11 @@ static inline simd_cf_t srslte_simd_cf_mul (simd_cf_t a, simd_f_t b) { #ifdef LV_HAVE_SSE ret.re = _mm_mul_ps(a.re, b); ret.im = _mm_mul_ps(a.im, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vmulq_f32(a.val[0],b); + ret.val[1] = vmulq_f32(a.val[1],b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -699,6 +834,16 @@ static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { simd_f_t neg_a_im = _mm_xor_ps(_mm_set1_ps(-0.0f), a.im); ret.re = _mm_mul_ps(a.re, rcp); ret.im = _mm_mul_ps(neg_a_im, rcp); + #else /* LV_HAVE_SSE */ + #ifdef HAVE_NEON + simd_f_t a2re = vmulq_f32(a.val[0], a.val[0]); + simd_f_t a2im = vmulq_f32(a.val[1], a.val[1]); + simd_f_t mod2 = vaddq_f32(a2re, a2im); + simd_f_t rcp = vrecpeq_f32(mod2); + simd_f_t neg_a_im = vnegq_f32(vdupq_n_f32(-0.0f), a.val[1]); + ret.val[0] = vmulq_f32(a.val[0], rcp); + ret.val[1] = vmulq_f32(neg_a_im, rcp); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -718,7 +863,11 @@ static inline simd_cf_t srslte_simd_cf_zero (void) { #ifdef LV_HAVE_SSE ret.re = _mm_setzero_ps(); ret.im = _mm_setzero_ps(); -#endif /* LV_HAVE_SSE */ +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + ret.val[0] = vdupq_n_f32(0); + ret.val[1] = vdupq_n_f32(0); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ return ret; @@ -739,6 +888,11 @@ typedef __m256 simd_sel_t; #ifdef LV_HAVE_SSE typedef __m128i simd_i_t; typedef __m128 simd_sel_t; +#else /* LV_HAVE_AVX2 */ +#ifdef LV_HAVE_SSE +typedef int32x4_t simd_i_t; +typedef __m128 simd_sel_t; +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -752,6 +906,10 @@ static inline simd_i_t srslte_simd_i_load(int *x) { #else #ifdef LV_HAVE_SSE return _mm_load_si128((__m128i*)x); +#else + #ifdef HAVE_NEON + return vld1_s32((int32x4_t*)x); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -780,6 +938,10 @@ static inline simd_i_t srslte_simd_i_set1(int x) { #else #ifdef LV_HAVE_SSE return _mm_set1_epi32(x); +#else + #ifdef HAVE_NEON + return vdupq_n_s32(x); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -794,10 +956,14 @@ static inline simd_i_t srslte_simd_i_add(simd_i_t a, simd_i_t b) { #else #ifdef LV_HAVE_SSE return _mm_add_epi32(a, b); +#else +#ifdef HAVE_NEON + return vaddq_s32(a, b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ -} +}vcgtq_f32 static inline simd_sel_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { #ifdef LV_HAVE_AVX512 @@ -808,6 +974,10 @@ static inline simd_sel_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return (simd_sel_t) _mm_cmpgt_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return (simd_sel_t) vcgtq_f32(a, b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -841,6 +1011,10 @@ typedef __m256i simd_s_t; #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE typedef __m128i simd_s_t; +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON +typedef int16x8_t simd_s_t; +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -854,6 +1028,10 @@ static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_load_si128((__m128i*) ptr); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_s16((int16x8_t*) ptr); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -867,7 +1045,11 @@ static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { return _mm256_loadu_si256((__m256i*) ptr); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - return _mm_loadu_si128((__m128i*) ptr); + return _mm_loadu_si128((__m128i*) ptr) +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vld1q_s16((int16x8_t*) ptr); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -882,6 +1064,10 @@ static inline void srslte_simd_s_store(int16_t *ptr, simd_s_t simdreg) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE _mm_store_si128((__m128i*) ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_s16((int16x8_t*) ptr, simdreg); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -896,11 +1082,15 @@ static inline void srslte_simd_s_storeu(int16_t *ptr, simd_s_t simdreg) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE _mm_storeu_si128((__m128i*) ptr, simdreg); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + vst1q_s16((int16x8_t*) ptr, simdreg); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ } - +vdupq_n_s16 static inline simd_s_t srslte_simd_s_zero(void) { #ifdef LV_HAVE_AVX512 return _mm512_setzero_si512(); @@ -910,10 +1100,14 @@ static inline simd_s_t srslte_simd_s_zero(void) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_setzero_si128(); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vdupq_n_s16(0); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ -} +}vmulq_s16 static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) { #ifdef LV_HAVE_AVX512 @@ -924,6 +1118,10 @@ static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_mullo_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vmulq_s16(a, b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -938,6 +1136,10 @@ static inline simd_s_t srslte_simd_s_add(simd_s_t a, simd_s_t b) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_add_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vaddq_s16(a, b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -952,6 +1154,10 @@ static inline simd_s_t srslte_simd_s_sub(simd_s_t a, simd_s_t b) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_sub_epi16(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vsubq_s16(a, b); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -962,8 +1168,9 @@ static inline simd_s_t srslte_simd_s_sub(simd_s_t a, simd_s_t b) { #if SRSLTE_SIMD_C16_SIZE -typedef struct { +typedef #ifdef LV_HAVE_AVX512 + struct { union { __m512i m512; int16_t i16[32]; @@ -974,24 +1181,32 @@ typedef struct { } im; #else /* LV_HAVE_AVX512 */ #ifdef LV_HAVE_AVX2 - union { - __m256i m256; - int16_t i16[16]; - } re; - union { - __m256i m256; - int16_t i16[16]; - } im; + struct { + union { + __m256i m256; + int16_t i16[16]; + } re; + union { + __m256i m256; + int16_t i16[16]; + } im; #else #ifdef LV_HAVE_SSE - union { - __m128i m128; - int16_t i16[8]; - } re; - union { - __m128i m128; - int16_t i16[8]; - } im; + struct { + union { + __m128i m128; + int16_t i16[8]; + } re; + union { + __m128i m128; + int16_t i16[8]; + } im; +#else +#ifdef HAVE_NEON + union { + int16x8x2_t m128; + int16_t i16[16]; +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -1017,6 +1232,10 @@ static inline simd_c16_t srslte_simd_c16i_load(c16_t *ptr) { __m128i in2 = _mm_load_si128((__m128i*)(ptr + 8)); ret.re.m128 = _mm_blend_epi16(in1,_mm_shufflelo_epi16(_mm_shufflehi_epi16(in2, 0b10100000), 0b10100000), 0b10101010); ret.im.m128 = _mm_blend_epi16(_mm_shufflelo_epi16(_mm_shufflehi_epi16(in1, 0b11110101), 0b11110101),in2, 0b10101010); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128 = vld2q_s16((int16_t*)(ptr)); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -1032,6 +1251,11 @@ static inline simd_c16_t srslte_simd_c16_load(int16_t *re, int16_t *im) { #ifdef LV_HAVE_SSE ret.re.m128 = _mm_load_si128((__m128i*)(re)); ret.im.m128 = _mm_load_si128((__m128i*)(im)); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128.val[0] = vld1q_s16((int16_t*)(re)); + ret.m128.val[1] = vld1q_s16((int16_t*)(im)); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ return ret; @@ -1046,6 +1270,11 @@ static inline simd_c16_t srslte_simd_c16_loadu(int16_t *re, int16_t *im) { #ifdef LV_HAVE_SSE ret.re.m128 = _mm_loadu_si128((__m128i*)(re)); ret.im.m128 = _mm_loadu_si128((__m128i*)(im)); +#else /* LV_HAVE_SSE*/ +#ifdef HAVE_NEON + ret.m128.val[0] = vld1q_s16((int16_t*)(re)); + ret.m128.val[1] = vld1q_s16((int16_t*)(im)); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ return ret; @@ -1063,6 +1292,10 @@ static inline void srslte_simd_c16i_store(c16_t *ptr, simd_c16_t simdreg) { __m128i im_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.im.m128, 0b10110001), 0b10110001); _mm_store_si128((__m128i *) (ptr), _mm_blend_epi16(simdreg.re.m128, im_sw, 0b10101010)); _mm_store_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_f32((float*)(ptr) ,simdreg); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ } @@ -1079,6 +1312,10 @@ static inline void srslte_simd_c16i_storeu(c16_t *ptr, simd_c16_t simdreg) { __m128i im_sw = _mm_shufflelo_epi16(_mm_shufflehi_epi16(simdreg.im.m128, 0b10110001), 0b10110001); _mm_storeu_si128((__m128i *) (ptr), _mm_blend_epi16(simdreg.re.m128, im_sw, 0b10101010)); _mm_storeu_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); +#else /*HAVE_NEON*/ +#ifdef HAVE_NEON + vst2q_f32((float*)(ptr) ,simdreg); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ } @@ -1091,6 +1328,11 @@ static inline void srslte_simd_c16_store(int16_t *re, int16_t *im, simd_c16_t si #ifdef LV_HAVE_SSE _mm_store_si128((__m128i *) re, simdreg.re.m128); _mm_store_si128((__m128i *) im, simdreg.im.m128); +#else +#ifdef HAVE_NEON + vst1q_f32((int16_t *) re, simdreg.m128.val[0]); + vst1q_f32((int16_t *) im, simdreg.m128.val[1]); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ } @@ -1103,10 +1345,17 @@ static inline void srslte_simd_c16_storeu(int16_t *re, int16_t *im, simd_c16_t s #ifdef LV_HAVE_SSE _mm_storeu_si128((__m128i *) re, simdreg.re.m128); _mm_storeu_si128((__m128i *) im, simdreg.im.m128); +#else +#ifdef HAVE_NEON + vst1q_f32((int16_t *) re, simdreg.m128.val[0]); + vst1q_f32((int16_t *) im, simdreg.m128.val[1]); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ } + +//TODO static inline simd_c16_t srslte_simd_c16_prod (simd_c16_t a, simd_c16_t b) { simd_c16_t ret; #ifdef LV_HAVE_AVX2 @@ -1134,11 +1383,16 @@ static inline simd_c16_t srslte_simd_c16_add (simd_c16_t a, simd_c16_t b) { #ifdef LV_HAVE_SSE ret.re.m128 = _mm_add_epi16(a.re.m128, b.re.m128); ret.im.m128 = _mm_add_epi16(a.im.m128, b.im.m128); +#else +#ifdef HAVE_NEON + ret.m128.val[0] = vaddq_s32(a.m127.val[0],a.m127.val[0]); + ret.m128.val[1] = vaddq_s32(a.m127.val[1],a.m127.val[1]); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ return ret; } - +vdupq_n_s16 static inline simd_c16_t srslte_simd_c16_zero (void) { simd_c16_t ret; #ifdef LV_HAVE_AVX2 @@ -1148,7 +1402,12 @@ static inline simd_c16_t srslte_simd_c16_zero (void) { #ifdef LV_HAVE_SSE ret.re.m128 = _mm_setzero_si128(); ret.im.m128 = _mm_setzero_si128(); -#endif /* LV_HAVE_SSE */ +#else +#ifdef HAVE_NEON + ret.m128.val[0] = vdupq_n_s16(0); + ret.m128.val[1] = vdupq_n_s16(0); +#endif /* HAVE_NEON */ +#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ return ret; } @@ -1182,6 +1441,12 @@ static inline simd_s_t srslte_simd_convert_2f_s(simd_f_t a, simd_f_t b) { __m128i ai = _mm_cvttps_epi32(a); __m128i bi = _mm_cvttps_epi32(b); return _mm_packs_epi32(ai, bi); + #else +#ifdef HAVE_NEON + int32x4_t ai = vcvtq_s32_f32(a); + int32x4_t bi = vcvtq_s32_f32(b); + return (simd_s_t)vcombine_s16(vqmovn_s32(ai), vqmovn_s32(bi)); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ From f4b9e7311a1daa4b306c9e8bebd3c956db3e6cad Mon Sep 17 00:00:00 2001 From: yagoda Date: Tue, 17 Oct 2017 15:51:27 +0000 Subject: [PATCH 32/55] adding neon support to new vector structure --- lib/include/srslte/phy/utils/simd.h | 123 +++++++++++++++++++--------- lib/src/phy/utils/vector_simd.c | 37 +++++++-- 2 files changed, 116 insertions(+), 44 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 6e4185788..e7820c307 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -33,6 +33,11 @@ #endif #include #endif /* LV_HAVE_SSE */ +#include + +#ifdef HAVE_NEON +#include +#endif /* * SSE Macros @@ -140,6 +145,7 @@ #define SRSLTE_SIMD_S_SIZE 0 #define SRSLTE_SIMD_C16_SIZE 0 +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -159,7 +165,7 @@ typedef __m256 simd_f_t; typedef __m128 simd_f_t; #else /* HAVE_NEON */ #ifdef HAVE_NEON -typedef float32x4 simd_f_t; +typedef float32x4_t simd_f_t; #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -188,7 +194,7 @@ static inline simd_f_t srslte_simd_f_loadu(float *ptr) { #ifdef LV_HAVE_AVX512 return _mm512_loadu_ps(ptr); #else /* LV_HAVE_AVX512 */ - #ifdef LV_HAVE_AVX2 + #ifdef LV_HAVE_AVX2 return _mm256_loadu_ps(ptr); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE @@ -202,7 +208,7 @@ static inline simd_f_t srslte_simd_f_loadu(float *ptr) { #endif /* LV_HAVE_AVX512 */ } -static inline void srslte_simd_f_store(float *ptr, simd_f_t simdreg) {vst1q_f32 +static inline void srslte_simd_f_store(float *ptr, simd_f_t simdreg) { #ifdef LV_HAVE_AVX512 _mm512_store_ps(ptr, simdreg); #else /* LV_HAVE_AVX512 */ @@ -281,11 +287,11 @@ static inline simd_f_t srslte_simd_f_rcp(simd_f_t a) { #ifdef LV_HAVE_AVX2 return _mm256_rcp_ps(a); #else /* LV_HAVE_AVX2 */ - #ifdef LV_HAVE_SSE +#ifdef LV_HAVE_SSE return _mm_rcp_ps(a); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - return vrecpeq_f32(a); + return vmulq_f32(vrecpeq_f32(a), vrecpsq_f32(vrecpeq_f32(a), a)); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -302,6 +308,22 @@ static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_addsub_ps(a, b); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON + float* a_ptr = &a; + float* b_ptr = &b; + simd_f_t ret; + float* c_ptr = &ret; + for(int i = 0; i<4;i++){ + if(i%2==0){ + c_ptr[i] = a_ptr[i] - b_ptr[i]; + }else{ + c_ptr[i] = a_ptr[i] + b_ptr[i]; + } + } + + return ret; +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -373,6 +395,10 @@ static inline simd_f_t srslte_simd_f_swap(simd_f_t a) { #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE return _mm_shuffle_ps(a, a, 0b10110001); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON + return vcombine_f32(vrev64_f32(vget_low_f32(a)), vrev64_f32(vget_high_f32(a))); +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -421,7 +447,9 @@ static inline simd_f_t srslte_simd_f_sqrt(simd_f_t a) { return _mm_sqrt_ps(a); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - return vrecpeq_f32(vrsqrteq_f32(a)); + float32x4_t sqrt_reciprocal = vrsqrteq_f32(a); + sqrt_reciprocal = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a,sqrt_reciprocal), sqrt_reciprocal),sqrt_reciprocal); + return vmulq_f32(a,sqrt_reciprocal); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -530,8 +558,8 @@ static inline simd_cf_t srslte_simd_cf_load(float *re, float *im) { ret.im = _mm_load_ps(im); #else /*HAVE_NEON*/ #ifdef HAVE_NEON - ret.val[0] = vld1q_f32(ptr); - ret.val[1] = vld1q_f32(ptr); + ret.val[0] = vld1q_f32(re); + ret.val[1] = vld1q_f32(im); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -554,8 +582,8 @@ static inline simd_cf_t srslte_simd_cf_loadu(float *re, float *im) { ret.im = _mm_loadu_ps(im); #else /*HAVE_NEON*/ #ifdef HAVE_NEON - ret.val[0] = vld1q_f32(ptr); - ret.val[1] = vld1q_f32(ptr); + ret.val[0] = vld1q_f32(re); + ret.val[1] = vld1q_f32(im); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -684,8 +712,8 @@ static inline simd_cf_t srslte_simd_cf_set1 (cf_t x) { ret.im = _mm_set1_ps(__imag__ x); #else /*HAVE_NEON*/ #ifdef HAVE_NEON - re.val[0] = vdupq_n_f32(__real__ x); - im.val[1] = vdupq_n_f32(__imag__ x); + ret.val[0] = vdupq_n_f32(__real__ x); + ret.val[1] = vdupq_n_f32(__imag__ x); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -772,8 +800,8 @@ static inline simd_cf_t srslte_simd_cf_add (simd_cf_t a, simd_cf_t b) { ret.im = _mm_add_ps(a.im, b.im); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - ret.val[0] = vaddq_f32(a.val[0],a.val[0]); - ret.val[1] = vaddq_f32(a.val[1],a.val[1]); + ret.val[0] = vaddq_f32(a.val[0],b.val[0]); + ret.val[1] = vaddq_f32(a.val[1],b.val[1]); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -839,8 +867,8 @@ static inline simd_cf_t srslte_simd_cf_rcp (simd_cf_t a) { simd_f_t a2re = vmulq_f32(a.val[0], a.val[0]); simd_f_t a2im = vmulq_f32(a.val[1], a.val[1]); simd_f_t mod2 = vaddq_f32(a2re, a2im); - simd_f_t rcp = vrecpeq_f32(mod2); - simd_f_t neg_a_im = vnegq_f32(vdupq_n_f32(-0.0f), a.val[1]); + simd_f_t rcp = vmulq_f32(vrecpeq_f32(mod2), vrecpsq_f32(vrecpeq_f32(mod2), mod2)); + simd_f_t neg_a_im = vnegq_f32(a.val[1]); ret.val[0] = vmulq_f32(a.val[0], rcp); ret.val[1] = vmulq_f32(neg_a_im, rcp); #endif /* HAVE_NEON */ @@ -868,6 +896,7 @@ static inline simd_cf_t srslte_simd_cf_zero (void) { ret.val[0] = vdupq_n_f32(0); ret.val[1] = vdupq_n_f32(0); #endif /* HAVE_NEON */ +#endif /* HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ return ret; @@ -889,9 +918,9 @@ typedef __m256 simd_sel_t; typedef __m128i simd_i_t; typedef __m128 simd_sel_t; #else /* LV_HAVE_AVX2 */ -#ifdef LV_HAVE_SSE +#ifdef HAVE_NEON typedef int32x4_t simd_i_t; -typedef __m128 simd_sel_t; +typedef int32x4_t simd_sel_t; #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -908,7 +937,7 @@ static inline simd_i_t srslte_simd_i_load(int *x) { return _mm_load_si128((__m128i*)x); #else #ifdef HAVE_NEON - return vld1_s32((int32x4_t*)x); + return vld1q_s32((int*)x); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -924,6 +953,10 @@ static inline void srslte_simd_i_store(int *x, simd_i_t reg) { #else #ifdef LV_HAVE_SSE _mm_store_si128((__m128i*)x, reg); +#else +#ifdef HAVE_NEON + vst1q_s32((int*)x, reg); +#endif /*HAVE_NEON*/ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -963,7 +996,7 @@ static inline simd_i_t srslte_simd_i_add(simd_i_t a, simd_i_t b) { #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ -}vcgtq_f32 +} static inline simd_sel_t srslte_simd_f_max(simd_f_t a, simd_f_t b) { #ifdef LV_HAVE_AVX512 @@ -992,6 +1025,25 @@ static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_sel_t s #else #ifdef LV_HAVE_SSE return (__m128i) _mm_blendv_ps((__m128)a, (__m128)b, selector); +#else /* LV_HAVE_SSE */ +#ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON + + int* a_ptr = &a; + int* b_ptr = &b; + simd_i_t ret; + int* sel = &selector; + + int* c_ptr = &ret; + for(int i = 0;i<4;i++) + { + if(sel[i] == -1){ + c_ptr[i] = b_ptr[i]; + }else{ + c_ptr[i] = a_ptr[i]; + } + } + return ret; +#endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ @@ -1030,7 +1082,7 @@ static inline simd_s_t srslte_simd_s_load(int16_t *ptr) { return _mm_load_si128((__m128i*) ptr); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - return vld1q_s16((int16x8_t*) ptr); + return vld1q_s16(ptr); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -1048,7 +1100,7 @@ static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { return _mm_loadu_si128((__m128i*) ptr) #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - return vld1q_s16((int16x8_t*) ptr); + return vld1q_s16(ptr); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -1066,7 +1118,7 @@ static inline void srslte_simd_s_store(int16_t *ptr, simd_s_t simdreg) { _mm_store_si128((__m128i*) ptr, simdreg); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - vst1q_s16((int16x8_t*) ptr, simdreg); + vst1q_s16( ptr, simdreg); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -1084,13 +1136,12 @@ static inline void srslte_simd_s_storeu(int16_t *ptr, simd_s_t simdreg) { _mm_storeu_si128((__m128i*) ptr, simdreg); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON - vst1q_s16((int16x8_t*) ptr, simdreg); + vst1q_s16(ptr, simdreg); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ } -vdupq_n_s16 static inline simd_s_t srslte_simd_s_zero(void) { #ifdef LV_HAVE_AVX512 return _mm512_setzero_si512(); @@ -1107,7 +1158,7 @@ static inline simd_s_t srslte_simd_s_zero(void) { #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ #endif /* LV_HAVE_AVX512 */ -}vmulq_s16 +} static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) { #ifdef LV_HAVE_AVX512 @@ -1294,7 +1345,7 @@ static inline void srslte_simd_c16i_store(c16_t *ptr, simd_c16_t simdreg) { _mm_store_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); #else /*HAVE_NEON*/ #ifdef HAVE_NEON - vst2q_f32((float*)(ptr) ,simdreg); + vst2q_s16((int16_t*)(ptr) ,simdreg.m128); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -1314,7 +1365,7 @@ static inline void srslte_simd_c16i_storeu(c16_t *ptr, simd_c16_t simdreg) { _mm_storeu_si128((__m128i *) (ptr + 8), _mm_blend_epi16(re_sw, simdreg.im.m128, 0b10101010)); #else /*HAVE_NEON*/ #ifdef HAVE_NEON - vst2q_f32((float*)(ptr) ,simdreg); + vst2q_s16((int16_t*)(ptr) ,simdreg.m128); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -1330,8 +1381,8 @@ static inline void srslte_simd_c16_store(int16_t *re, int16_t *im, simd_c16_t si _mm_store_si128((__m128i *) im, simdreg.im.m128); #else #ifdef HAVE_NEON - vst1q_f32((int16_t *) re, simdreg.m128.val[0]); - vst1q_f32((int16_t *) im, simdreg.m128.val[1]); + vst1q_s16((int16_t *) re, simdreg.m128.val[0]); + vst1q_s16((int16_t *) im, simdreg.m128.val[1]); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ @@ -1347,15 +1398,13 @@ static inline void srslte_simd_c16_storeu(int16_t *re, int16_t *im, simd_c16_t s _mm_storeu_si128((__m128i *) im, simdreg.im.m128); #else #ifdef HAVE_NEON - vst1q_f32((int16_t *) re, simdreg.m128.val[0]); - vst1q_f32((int16_t *) im, simdreg.m128.val[1]); + vst1q_s16((int16_t *) re, simdreg.m128.val[0]); + vst1q_s16((int16_t *) im, simdreg.m128.val[1]); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ } - -//TODO static inline simd_c16_t srslte_simd_c16_prod (simd_c16_t a, simd_c16_t b) { simd_c16_t ret; #ifdef LV_HAVE_AVX2 @@ -1385,14 +1434,14 @@ static inline simd_c16_t srslte_simd_c16_add (simd_c16_t a, simd_c16_t b) { ret.im.m128 = _mm_add_epi16(a.im.m128, b.im.m128); #else #ifdef HAVE_NEON - ret.m128.val[0] = vaddq_s32(a.m127.val[0],a.m127.val[0]); - ret.m128.val[1] = vaddq_s32(a.m127.val[1],a.m127.val[1]); + ret.m128.val[0] = vaddq_s16(a.m128.val[0],a.m128.val[0]); + ret.m128.val[1] = vaddq_s16(a.m128.val[1],a.m128.val[1]); #endif /* HAVE_NEON */ #endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_AVX2 */ return ret; } -vdupq_n_s16 + static inline simd_c16_t srslte_simd_c16_zero (void) { simd_c16_t ret; #ifdef LV_HAVE_AVX2 diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 0294bd1af..ab281a653 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -751,10 +751,37 @@ void srslte_vec_div_fff_simd(float *x, float *y, float *z, int len) { } } + + +int srslte_vec_sc_prod_ccc_simd2(cf_t *x, cf_t h, cf_t *z, int len) +{ + int i = 0; + const unsigned int loops = len / 4; +#ifdef HAVE_NEON + simd_cf_t h_vec; + h_vec.val[0] = srslte_simd_f_set1(__real__ h); + h_vec.val[1] = srslte_simd_f_set1(__imag__ h); + for (; i < loops; i++) { + + simd_cf_t in = srslte_simd_cfi_load(&x[i*4]); + simd_cf_t temp = srslte_simd_cf_prod(in, h_vec); + srslte_simd_cfi_store(&z[i*4], temp); + } + +#endif + i = loops * 4; +return i; +} + void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len) { int i = 0; #if SRSLTE_SIMD_F_SIZE + + +#ifdef HAVE_NEON + i = srslte_vec_sc_prod_ccc_simd2(x, h, z, len); +#else const simd_f_t hre = srslte_simd_f_set1(__real__ h); const simd_f_t him = srslte_simd_f_set1(__imag__ h); @@ -766,8 +793,8 @@ void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len) { simd_f_t sw = srslte_simd_f_swap(temp); simd_f_t m2 = srslte_simd_f_mul(him, sw); simd_f_t r = srslte_simd_f_addsub(m1, m2); - srslte_simd_f_store((float *) &z[i], r); + } } else { for (; i < len - SRSLTE_SIMD_F_SIZE / 2 + 1; i += SRSLTE_SIMD_F_SIZE / 2) { @@ -782,10 +809,11 @@ void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, int len) { } } #endif - +#endif for (; i < len; i++) { z[i] = x[i] * h; } + } void srslte_vec_sc_prod_fff_simd(float *x, float h, float *z, int len) { @@ -831,7 +859,6 @@ void srslte_vec_abs_cf_simd(cf_t *x, float *z, int len) { simd_f_t z1 = srslte_simd_f_hadd(mul1, mul2); z1 = srslte_simd_f_sqrt(z1); - srslte_simd_f_store(&z[i], z1); } } else { @@ -966,9 +993,7 @@ uint32_t srslte_vec_max_fi_simd(float *x, int len) { if (SRSLTE_IS_ALIGNED(x)) { for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { simd_f_t a = srslte_simd_f_load(&x[i]); - simd_sel_t res = srslte_simd_f_max(a, simd_max_values); - simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) a, res); simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc); @@ -976,9 +1001,7 @@ uint32_t srslte_vec_max_fi_simd(float *x, int len) { } else { for (; i < len - SRSLTE_SIMD_I_SIZE + 1; i += SRSLTE_SIMD_I_SIZE) { simd_f_t a = srslte_simd_f_loadu(&x[i]); - simd_sel_t res = srslte_simd_f_max(a, simd_max_values); - simd_max_indexes = srslte_simd_i_select(simd_max_indexes, simd_indexes, res); simd_max_values = (simd_f_t) srslte_simd_i_select((simd_i_t) simd_max_values, (simd_i_t) a, res); simd_indexes = srslte_simd_i_add(simd_indexes, simd_inc); From 0504e7a51b42288cd9e7f9b113b7d8bc4cc2f30d Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Wed, 18 Oct 2017 12:49:43 +0200 Subject: [PATCH 33/55] Fixed test for abs value. Solved compilation Neon warnings and SSE errors --- lib/include/srslte/phy/utils/simd.h | 16 +++++++-------- lib/src/phy/utils/test/vector_test.c | 30 ++++++++++++++++++++++++---- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index e7820c307..0c378591e 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -310,10 +310,10 @@ static inline simd_f_t srslte_simd_f_addsub(simd_f_t a, simd_f_t b) { return _mm_addsub_ps(a, b); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON - float* a_ptr = &a; - float* b_ptr = &b; + float* a_ptr = (float*) &a; + float* b_ptr = (float*) &b; simd_f_t ret; - float* c_ptr = &ret; + float* c_ptr = (float*) &ret; for(int i = 0; i<4;i++){ if(i%2==0){ c_ptr[i] = a_ptr[i] - b_ptr[i]; @@ -1028,12 +1028,12 @@ static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_sel_t s #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON - int* a_ptr = &a; - int* b_ptr = &b; + int* a_ptr = (int*) &a; + int* b_ptr = (int*) &b; simd_i_t ret; - int* sel = &selector; + int* sel = (int*) &selector; - int* c_ptr = &ret; + int* c_ptr = (int*) &ret; for(int i = 0;i<4;i++) { if(sel[i] == -1){ @@ -1097,7 +1097,7 @@ static inline simd_s_t srslte_simd_s_loadu(int16_t *ptr) { return _mm256_loadu_si256((__m256i*) ptr); #else /* LV_HAVE_AVX2 */ #ifdef LV_HAVE_SSE - return _mm_loadu_si128((__m128i*) ptr) + return _mm_loadu_si128((__m128i*) ptr); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON return vld1q_s16(ptr); diff --git a/lib/src/phy/utils/test/vector_test.c b/lib/src/phy/utils/test/vector_test.c index 8d5b9f2d6..4ebed9862 100644 --- a/lib/src/phy/utils/test/vector_test.c +++ b/lib/src/phy/utils/test/vector_test.c @@ -63,12 +63,12 @@ bool verbose = false; strncpy(func_name, #X, 32);\ CODE;\ passed = (mse < MAX_MSE);\ - printf("%32s (%5d) ... %7.1f MSamp/s ... %3s Passed\n", func_name, block_size, \ - (double) block_size*NOF_REPETITIONS/ *timing, passed?"":"Not");\ + printf("%32s (%5d) ... %7.1f MSamp/s ... %3s Passed (%.6f)\n", func_name, block_size, \ + (double) block_size*NOF_REPETITIONS/ *timing, passed?"":"Not", mse);\ return passed;\ } -#define MALLOC(TYPE, NAME) TYPE *NAME = malloc(sizeof(TYPE)*block_size) +#define MALLOC(TYPE, NAME) TYPE *NAME = srslte_vec_malloc(sizeof(TYPE)*block_size) static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { @@ -507,7 +507,7 @@ TEST(srslte_vec_abs_cf, for (int i = 0; i < block_size; i++) { gold = sqrtf(crealf(x[i]) * crealf(x[i]) + cimagf(x[i])*cimagf(x[i])); - mse += cabsf(gold - z[i]); + mse += cabsf(gold - z[i])/block_size; } free(x); @@ -771,12 +771,27 @@ int main(int argc, char **argv) { size_count++; } + char fname[68]; + FILE *f = NULL; + void * p = popen("(date +%g%m%d && hostname) | tr '\\r\\n' '__'", "r"); + if (p) { + fgets(fname, 64, p); + strncpy(fname + strnlen(fname, 64) - 1, ".tsv", 4); + f = fopen(fname, "w"); + if (f) printf("Saving benchmark results in '%s'\n", fname); + } + pclose(p); + + printf("\n"); printf("%32s |", "Subroutine/MSps"); + if (f) fprintf(f, "Subroutine/MSps Vs Vector size\t"); for (int i = 0; i < size_count; i++) { printf(" %7d", sizes[i]); + if (f) fprintf(f, "%d\t", sizes[i]); } printf(" |\n"); + if (f) fprintf(f, "\n"); for (int j = 0; j < 32; j++) { printf("-"); @@ -789,12 +804,19 @@ int main(int argc, char **argv) { for (int i = 0; i < func_count; i++) { printf("%32s | ", func_names[i]); + if (f) fprintf(f, "%s\t", func_names[i]); + for (int j = 0; j < size_count; j++) { printf(" %s%7.1f\x1b[0m", (passed[i][j])?"":"\x1B[31m", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + if (f) fprintf(f, "%.1f\t", (double) NOF_REPETITIONS*(double)sizes[j]/timmings[i][j]); + all_passed &= passed[i][j]; } printf(" |\n"); + if (f) fprintf(f, "\n"); } + if (f) fclose(f); + return (all_passed)?SRSLTE_SUCCESS:SRSLTE_ERROR; } From 12877fe2960a64fb371f6bd8060bd94475d9abdf Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 18 Oct 2017 16:33:19 -0400 Subject: [PATCH 34/55] Missing ip_netmask configuration file --- srsue/src/main.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/srsue/src/main.cc b/srsue/src/main.cc index 47d5f807e..3b0cad2d0 100644 --- a/srsue/src/main.cc +++ b/srsue/src/main.cc @@ -122,6 +122,10 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { /* Expert section */ + ("expert.ip_netmask", + bpo::value(&args->expert.ip_netmask)->default_value("255.255.255.0"), + "Netmask of the tun_srsue device") + ("expert.phy.worker_cpu_mask", bpo::value(&args->expert.phy.worker_cpu_mask)->default_value(-1), "cpu bit mask (eg 255 = 1111 1111)") From 739b8fc457f3588f0d9d580c43de978d31329390 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 18 Oct 2017 16:34:18 -0400 Subject: [PATCH 35/55] Restored TA commands --- srsue/src/phy/phy.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/srsue/src/phy/phy.cc b/srsue/src/phy/phy.cc index df29726d5..9760ab374 100644 --- a/srsue/src/phy/phy.cc +++ b/srsue/src/phy/phy.cc @@ -200,8 +200,8 @@ void phy::set_timeadv_rar(uint32_t ta_cmd) { void phy::set_timeadv(uint32_t ta_cmd) { n_ta = srslte_N_ta_new(n_ta, ta_cmd); - //sf_recv.set_time_adv_sec(((float) n_ta)*SRSLTE_LTE_TS); - Warning("Not supported: Set TA: ta_cmd: %d, n_ta: %d, ta_usec: %.1f\n", ta_cmd, n_ta, ((float) n_ta)*SRSLTE_LTE_TS*1e6); + sf_recv.set_time_adv_sec(((float) n_ta)*SRSLTE_LTE_TS); + //Warning("Not supported: Set TA: ta_cmd: %d, n_ta: %d, ta_usec: %.1f\n", ta_cmd, n_ta, ((float) n_ta)*SRSLTE_LTE_TS*1e6); } void phy::configure_prach_params() @@ -308,7 +308,8 @@ void phy::reset() pdcch_dl_search_reset(); for(uint32_t i=0;i Date: Wed, 18 Oct 2017 16:49:36 -0400 Subject: [PATCH 36/55] Fix for async dl scheduling --- lib/include/srslte/common/common.h | 1 + srsue/hdr/mac/mac.h | 2 +- srsue/src/phy/phch_worker.cc | 11 +++++++---- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/include/srslte/common/common.h b/lib/include/srslte/common/common.h index 6372af73e..fbc703486 100644 --- a/lib/include/srslte/common/common.h +++ b/lib/include/srslte/common/common.h @@ -52,6 +52,7 @@ #define TTIMOD_SZ (((2*HARQ_DELAY_MS) < 10)?10:20) #define TTIMOD(tti) (tti%TTIMOD_SZ) +#define MOD_N_PROC (2*HARQ_DELAY_MS-8) #define ASYNC_DL_SCHED (HARQ_DELAY_MS <= 4) // Cat 3 UE - Max number of DL-SCH transport block bits received within a TTI diff --git a/srsue/hdr/mac/mac.h b/srsue/hdr/mac/mac.h index d19f668bf..87bb764e1 100644 --- a/srsue/hdr/mac/mac.h +++ b/srsue/hdr/mac/mac.h @@ -135,7 +135,7 @@ private: demux demux_unit; /* DL/UL HARQ */ - dl_harq_entity dl_harq; + dl_harq_entity dl_harq; ul_harq_entity ul_harq; /* MAC Uplink-related Procedures */ diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index f89696015..a32bb7537 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -65,8 +65,7 @@ phch_worker::phch_worker() : tr_exec(10240) cell_initiated = false; pregen_enabled = false; trace_enabled = false; - - reset(); + reset(); } @@ -97,7 +96,7 @@ void phch_worker::reset() bzero(&period_cqi, sizeof(srslte_cqi_periodic_cfg_t)); I_sr = 0; rnti_is_set = false; - rar_cqi_request = false; + rar_cqi_request = false; cfi = 0; } @@ -482,7 +481,11 @@ bool phch_worker::decode_pdcch_dl(srsue::mac_interface_phy::mac_grant_t* grant) /* Fill MAC grant structure */ grant->ndi[0] = dci_unpacked.ndi; grant->ndi[1] = dci_unpacked.ndi_1; - grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); + if (tti < MOD_N_PROC) { + grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:tti+(2*HARQ_DELAY_MS); + } else { + grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); + } grant->n_bytes[0] = grant->phy_grant.dl.mcs[0].tbs / (uint32_t) 8; grant->n_bytes[1] = grant->phy_grant.dl.mcs[1].tbs / (uint32_t) 8; grant->tti = tti; From 177f36fc8c1fa7c31667cd3bc8baa3d2c5548af9 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 18 Oct 2017 16:50:22 -0400 Subject: [PATCH 37/55] Improved cell reestablishment procedure timers (still not working the authentication) --- srsue/hdr/phy/phch_recv.h | 4 ++-- srsue/hdr/upper/rrc.h | 5 ++++- srsue/src/mac/mac.cc | 3 ++- srsue/src/phy/phch_common.cc | 1 + srsue/src/upper/rrc.cc | 28 ++++++++++++++++------------ 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/srsue/hdr/phy/phch_recv.h b/srsue/hdr/phy/phch_recv.h index c51622e53..01a296094 100644 --- a/srsue/hdr/phy/phch_recv.h +++ b/srsue/hdr/phy/phch_recv.h @@ -158,7 +158,7 @@ private: uint32_t current_earfcn; uint32_t sync_sfn_cnt; - const static uint32_t SYNC_SFN_TIMEOUT = 200; + const static uint32_t SYNC_SFN_TIMEOUT = 1000; float ul_dl_factor; int cur_earfcn_index; bool cell_search_in_progress; @@ -166,7 +166,7 @@ private: float measure_rsrp; srslte_ue_dl_t ue_dl_measure; - const static int RSRP_MEASURE_NOF_FRAMES = 5; + const static int RSRP_MEASURE_NOF_FRAMES = 10; int cell_sync_sfn(); int cell_meas_rsrp(); diff --git a/srsue/hdr/upper/rrc.h b/srsue/hdr/upper/rrc.h index 3643f76c3..3e2fb70dd 100644 --- a/srsue/hdr/upper/rrc.h +++ b/srsue/hdr/upper/rrc.h @@ -98,6 +98,8 @@ private: uint8_t transaction_id; bool drb_up; + bool reestablishment_in_progress; + // timeouts in ms uint32_t connecting_timeout; @@ -244,7 +246,8 @@ private: // Helpers void rrc_connection_release(); - void radio_link_failure(); + void con_restablish_cell_reselected(); + void radio_link_failure(); static void* start_sib_thread(void *rrc_); void sib_search(); void apply_sib2_configs(LIBLTE_RRC_SYS_INFO_BLOCK_TYPE_2_STRUCT *sib2); diff --git a/srsue/src/mac/mac.cc b/srsue/src/mac/mac.cc index 1a2c909ae..f8d10bb34 100644 --- a/srsue/src/mac/mac.cc +++ b/srsue/src/mac/mac.cc @@ -117,7 +117,8 @@ void mac::reset() Info("Resetting MAC\n"); - timers.stop_all(); + timers.get(timer_alignment)->stop(); + timers.get(contention_resolution_timer)->stop(); ul_harq.reset_ndi(); diff --git a/srsue/src/phy/phch_common.cc b/srsue/src/phy/phch_common.cc index 7d2948c1a..d956ddd15 100644 --- a/srsue/src/phy/phch_common.cc +++ b/srsue/src/phy/phch_common.cc @@ -336,6 +336,7 @@ void phch_common::reset_ul() pthread_mutex_trylock(&tx_mutex[i]); pthread_mutex_unlock(&tx_mutex[i]); } + radio_h->tx_end(); } } diff --git a/srsue/src/upper/rrc.cc b/srsue/src/upper/rrc.cc index 94c2e449a..4ed20deeb 100644 --- a/srsue/src/upper/rrc.cc +++ b/srsue/src/upper/rrc.cc @@ -35,8 +35,6 @@ #include "srslte/common/security.h" #include "srslte/common/bcd_helpers.h" -#define TIMEOUT_RESYNC_REESTABLISH 100 - using namespace srslte; namespace srsue { @@ -92,6 +90,8 @@ void rrc::init(phy_interface_rrc *phy_, pthread_mutex_init(&mutex, NULL); + reestablishment_in_progress = false; + ue_category = SRSLTE_UE_CATEGORY; t301 = mac_timers->timer_get_unique_id(); t310 = mac_timers->timer_get_unique_id(); @@ -207,7 +207,11 @@ void rrc::run_thread() { break; case RRC_STATE_CELL_SELECTED: rrc_log->info("RRC Cell Selected: Sending connection request...\n"); - send_con_request(); + if (reestablishment_in_progress) { + con_restablish_cell_reselected(); + } else { + send_con_request(); + } state = RRC_STATE_CONNECTING; connecting_timeout = 0; break; @@ -226,6 +230,7 @@ void rrc::run_thread() { usleep(60000); rrc_log->info("Leaving RRC_CONNECTED state\n"); drb_up = false; + reestablishment_in_progress = false; pdcp->reset(); rlc->reset(); phy->reset(); @@ -663,6 +668,8 @@ void rrc::send_con_restablish_request() { ul_ccch_msg.msg.rrc_con_reest_req.cause = LIBLTE_RRC_CON_REEST_REQ_CAUSE_OTHER_FAILURE; liblte_rrc_pack_ul_ccch_msg(&ul_ccch_msg, (LIBLTE_BIT_MSG_STRUCT *) &bit_buf); + reestablishment_in_progress = true; + rrc_log->info("Initiating RRC Connection Reestablishment Procedure\n"); rrc_log->console("RRC Connection Reestablishment\n"); mac_timers->timer_get(t310)->stop(); @@ -673,19 +680,16 @@ void rrc::send_con_restablish_request() { set_phy_default(); mac->reset(); set_mac_default(); +} - // FIXME: Cell selection should be different?? - - // Wait for cell re-synchronization - uint32_t timeout_cnt = 0; - while (!phy->sync_status() && timeout_cnt < TIMEOUT_RESYNC_REESTABLISH) { - usleep(10000); - timeout_cnt++; - } +// Actions following cell reselection 5.3.7.3 +void rrc::con_restablish_cell_reselected() +{ + reestablishment_in_progress = false; + rrc_log->info("Cell Selection finished. Initiating transmission of RRC Connection Reestablishment Request\n"); mac_timers->timer_get(t301)->reset(); mac_timers->timer_get(t301)->run(); mac_timers->timer_get(t311)->stop(); - rrc_log->info("Cell Selection finished. Initiating transmission of RRC Connection Reestablishment Request\n"); // Byte align and pack the message bits for PDCP if ((bit_buf.N_bits % 8) != 0) { From b8d5b5b6a92807ebee34ebb698c2f190a79b3efb Mon Sep 17 00:00:00 2001 From: Paul Sutton Date: Wed, 18 Oct 2017 21:54:52 +0100 Subject: [PATCH 38/55] count_dl fix --- srsue/src/upper/nas.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/srsue/src/upper/nas.cc b/srsue/src/upper/nas.cc index f0fd8cf54..110c8c78e 100644 --- a/srsue/src/upper/nas.cc +++ b/srsue/src/upper/nas.cc @@ -281,7 +281,6 @@ void nas::parse_attach_accept(uint32_t lcid, byte_buffer_t *pdu) { LIBLTE_MME_ACTIVATE_DEFAULT_EPS_BEARER_CONTEXT_ACCEPT_MSG_STRUCT act_def_eps_bearer_context_accept; nas_log->info("Received Attach Accept\n"); - count_dl++; liblte_mme_unpack_attach_accept_msg((LIBLTE_BYTE_MSG_STRUCT *) pdu, &attach_accept); @@ -359,6 +358,8 @@ void nas::parse_attach_accept(uint32_t lcid, byte_buffer_t *pdu) { state = EMM_STATE_REGISTERED; current_plmn = selecting_plmn; + count_dl++; + // Send EPS bearer context accept and attach complete count_ul++; act_def_eps_bearer_context_accept.eps_bearer_id = eps_bearer_id; @@ -437,6 +438,9 @@ void nas::parse_authentication_request(uint32_t lcid, byte_buffer_t *pdu) { nas_log->console("Warning: Network authentication failure\n"); pool->deallocate(pdu); } + + // Reset DL counter (as per 24.301 5.4.3.2) + count_dl = 0; } void nas::parse_authentication_reject(uint32_t lcid, byte_buffer_t *pdu) { @@ -539,6 +543,8 @@ void nas::parse_security_mode_command(uint32_t lcid, byte_buffer_t *pdu) { } } + count_dl++; + if (!success) { // Reuse pdu for response pdu->reset(); From f1ec1b2f3c07bdf3bb0c09bb86e7996204742637 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Wed, 18 Oct 2017 19:33:43 -0400 Subject: [PATCH 39/55] disable attempt to resync after N310 (too soon if N310 is low) --- srsue/src/upper/rrc.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/srsue/src/upper/rrc.cc b/srsue/src/upper/rrc.cc index 4ed20deeb..10373dcf2 100644 --- a/srsue/src/upper/rrc.cc +++ b/srsue/src/upper/rrc.cc @@ -489,12 +489,12 @@ void rrc::earfcn_end() { // Detection of physical layer problems (5.3.11.1) void rrc::out_of_sync() { - current_cell->in_sync = false; + current_cell->in_sync = false; if (!mac_timers->timer_get(t311)->is_running() && !mac_timers->timer_get(t310)->is_running()) { n310_cnt++; if (n310_cnt == N310) { // attempt resync - phy->sync_reset(); + //phy->sync_reset(); mac_timers->timer_get(t310)->reset(); mac_timers->timer_get(t310)->run(); From 3292f9c2694ce5b5a75501427b5a2fd09fc38189 Mon Sep 17 00:00:00 2001 From: yagoda Date: Thu, 19 Oct 2017 16:38:58 +0000 Subject: [PATCH 40/55] simd.h tidy up & small fix for eMBMS --- lib/include/srslte/phy/utils/simd.h | 2 -- lib/src/phy/ue/ue_dl.c | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/include/srslte/phy/utils/simd.h b/lib/include/srslte/phy/utils/simd.h index 0c378591e..09e9cff8e 100644 --- a/lib/include/srslte/phy/utils/simd.h +++ b/lib/include/srslte/phy/utils/simd.h @@ -1027,12 +1027,10 @@ static inline simd_i_t srslte_simd_i_select(simd_i_t a, simd_i_t b, simd_sel_t s return (__m128i) _mm_blendv_ps((__m128)a, (__m128)b, selector); #else /* LV_HAVE_SSE */ #ifdef HAVE_NEON // CURRENTLY USES GENERIC IMPLEMENTATION FOR NEON - int* a_ptr = (int*) &a; int* b_ptr = (int*) &b; simd_i_t ret; int* sel = (int*) &selector; - int* c_ptr = (int*) &ret; for(int i = 0;i<4;i++) { diff --git a/lib/src/phy/ue/ue_dl.c b/lib/src/phy/ue/ue_dl.c index c4e2d3f6c..c11a29b8b 100644 --- a/lib/src/phy/ue/ue_dl.c +++ b/lib/src/phy/ue/ue_dl.c @@ -603,7 +603,8 @@ int srslte_ue_dl_decode_mbsfn(srslte_ue_dl_t * q, grant.sf_type = SRSLTE_SF_MBSFN; grant.nof_tb = 1; grant.mcs[0].idx = 2; - + grant.tb_en[0] = true; + grant.tb_en[1] = false; grant.nof_prb = q->pmch.cell.nof_prb; srslte_dl_fill_ra_mcs(&grant.mcs[0], grant.nof_prb); srslte_softbuffer_rx_reset_tbs(q->softbuffers[0], (uint32_t) grant.mcs[0].tbs); From 494802ba9b0299a5ddb02000240ea47236836ece Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 19 Oct 2017 14:33:05 -0400 Subject: [PATCH 41/55] Fixed bug in RLC UM when TX large number of segments in a PDU --- lib/src/upper/rlc_um.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/src/upper/rlc_um.cc b/lib/src/upper/rlc_um.cc index b2697178c..045fb0b7e 100644 --- a/lib/src/upper/rlc_um.cc +++ b/lib/src/upper/rlc_um.cc @@ -309,7 +309,7 @@ int rlc_um::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) } // Pull SDUs from queue - while(pdu_space > head_len && tx_sdu_queue.size() > 0) + while(pdu_space > head_len + 1 && tx_sdu_queue.size() > 0) { log->debug("pdu_space=%d, head_len=%d\n", pdu_space, head_len); if(last_li > 0) From 399f1cdbd1352aad215f2baaf8386f335334e70b Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 19 Oct 2017 14:47:14 -0400 Subject: [PATCH 42/55] Force retx for mcs>29 and new tb if rv=0 && mcs<29 --- lib/include/srslte/common/common.h | 1 - srsue/hdr/mac/dl_harq.h | 9 +++++---- srsue/hdr/mac/mac.h | 2 +- srsue/src/phy/phch_worker.cc | 6 +----- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/lib/include/srslte/common/common.h b/lib/include/srslte/common/common.h index fbc703486..6372af73e 100644 --- a/lib/include/srslte/common/common.h +++ b/lib/include/srslte/common/common.h @@ -52,7 +52,6 @@ #define TTIMOD_SZ (((2*HARQ_DELAY_MS) < 10)?10:20) #define TTIMOD(tti) (tti%TTIMOD_SZ) -#define MOD_N_PROC (2*HARQ_DELAY_MS-8) #define ASYNC_DL_SCHED (HARQ_DELAY_MS <= 4) // Cat 3 UE - Max number of DL-SCH transport block bits received within a TTI diff --git a/srsue/hdr/mac/dl_harq.h b/srsue/hdr/mac/dl_harq.h index 278dbc6dc..9abcf0922 100644 --- a/srsue/hdr/mac/dl_harq.h +++ b/srsue/hdr/mac/dl_harq.h @@ -259,7 +259,7 @@ private: memcpy(&cur_grant, &grant, sizeof(Tgrant)); // If data has not yet been successfully decoded - if (!ack) { + if (!ack || (grant.rv[tid]==0 && grant.phy_grant.dl.mcs[tid].idx < 29)) { // Instruct the PHY To combine the received data and attempt to decode it if (pid == HARQ_BCCH_PID) { @@ -347,9 +347,10 @@ private: // Determine if it's a new transmission 5.3.2.2 bool calc_is_new_transmission(Tgrant grant) { - if ((grant.ndi[tid] != cur_grant.ndi[tid]) || // 1st condition (NDI has changed) - (pid == HARQ_BCCH_PID && grant.rv[tid] == 0) || // 2nd condition (Broadcast and 1st transmission) - is_first_tb) // 3rd condition (first TB) + if (grant.phy_grant.dl.mcs[tid].idx < 28 && // mcs 29,30,31 always retx regardless of rest + ((grant.ndi[tid] != cur_grant.ndi[tid]) || // 1st condition (NDI has changed) + (pid == HARQ_BCCH_PID && grant.rv[tid] == 0) || // 2nd condition (Broadcast and 1st transmission) + is_first_tb)) { is_first_tb = false; is_new_transmission = true; diff --git a/srsue/hdr/mac/mac.h b/srsue/hdr/mac/mac.h index 87bb764e1..d19f668bf 100644 --- a/srsue/hdr/mac/mac.h +++ b/srsue/hdr/mac/mac.h @@ -135,7 +135,7 @@ private: demux demux_unit; /* DL/UL HARQ */ - dl_harq_entity dl_harq; + dl_harq_entity dl_harq; ul_harq_entity ul_harq; /* MAC Uplink-related Procedures */ diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index a32bb7537..1d86461cd 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -481,11 +481,7 @@ bool phch_worker::decode_pdcch_dl(srsue::mac_interface_phy::mac_grant_t* grant) /* Fill MAC grant structure */ grant->ndi[0] = dci_unpacked.ndi; grant->ndi[1] = dci_unpacked.ndi_1; - if (tti < MOD_N_PROC) { - grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:tti+(2*HARQ_DELAY_MS); - } else { - grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); - } + grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); grant->n_bytes[0] = grant->phy_grant.dl.mcs[0].tbs / (uint32_t) 8; grant->n_bytes[1] = grant->phy_grant.dl.mcs[1].tbs / (uint32_t) 8; grant->tti = tti; From 2f44e2bf3a4082211a4f57f3d99e778af103e8ec Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 19 Oct 2017 16:10:27 -0400 Subject: [PATCH 43/55] Refactored cases mcs>29 for ul/dl --- lib/include/srslte/phy/common/phy_common.h | 3 +- lib/include/srslte/phy/phch/ra.h | 3 +- lib/src/phy/modem/demod_hard.c | 1 + lib/src/phy/modem/modem_table.c | 1 + lib/src/phy/phch/dci.c | 4 +- lib/src/phy/phch/ra.c | 182 ++++++++------------- lib/src/phy/phch/test/pusch_test.c | 2 +- srsenb/src/phy/phch_worker.cc | 2 +- srsue/hdr/phy/phch_worker.h | 7 +- srsue/src/phy/phch_worker.cc | 47 ++++-- 10 files changed, 123 insertions(+), 129 deletions(-) diff --git a/lib/include/srslte/phy/common/phy_common.h b/lib/include/srslte/phy/common/phy_common.h index 148a12974..0b71ad82e 100644 --- a/lib/include/srslte/phy/common/phy_common.h +++ b/lib/include/srslte/phy/common/phy_common.h @@ -197,7 +197,8 @@ typedef enum SRSLTE_API { SRSLTE_MOD_BPSK = 0, SRSLTE_MOD_QPSK, SRSLTE_MOD_16QAM, - SRSLTE_MOD_64QAM + SRSLTE_MOD_64QAM, + SRSLTE_MOD_LAST } srslte_mod_t; typedef struct SRSLTE_API { diff --git a/lib/include/srslte/phy/phch/ra.h b/lib/include/srslte/phy/phch/ra.h index 3039455ed..680d80dd6 100644 --- a/lib/include/srslte/phy/phch/ra.h +++ b/lib/include/srslte/phy/phch/ra.h @@ -226,8 +226,7 @@ SRSLTE_API uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, SRSLTE_API int srslte_ra_ul_dci_to_grant(srslte_ra_ul_dci_t *dci, uint32_t nof_prb, uint32_t n_rb_ho, - srslte_ra_ul_grant_t *grant, - uint32_t harq_pid); + srslte_ra_ul_grant_t *grant); SRSLTE_API void srslte_ra_ul_grant_to_nbits(srslte_ra_ul_grant_t *grant, srslte_cp_t cp, diff --git a/lib/src/phy/modem/demod_hard.c b/lib/src/phy/modem/demod_hard.c index 76f54236d..899559ecc 100644 --- a/lib/src/phy/modem/demod_hard.c +++ b/lib/src/phy/modem/demod_hard.c @@ -44,6 +44,7 @@ int srslte_demod_hard_demodulate(srslte_demod_hard_t* q, cf_t* symbols, uint8_t int nbits=-1; switch(q->mod) { + case SRSLTE_MOD_LAST: case SRSLTE_MOD_BPSK: hard_bpsk_demod(symbols,bits,nsymbols); nbits=nsymbols; diff --git a/lib/src/phy/modem/modem_table.c b/lib/src/phy/modem/modem_table.c index c19e52e77..3c4ad2417 100644 --- a/lib/src/phy/modem/modem_table.c +++ b/lib/src/phy/modem/modem_table.c @@ -82,6 +82,7 @@ int srslte_modem_table_set(srslte_modem_table_t* q, cf_t* table, uint32_t nsymbo int srslte_modem_table_lte(srslte_modem_table_t* q, srslte_mod_t modulation) { srslte_modem_table_init(q); switch(modulation) { + case SRSLTE_MOD_LAST: case SRSLTE_MOD_BPSK: q->nbits_x_symbol = 1; q->nsymbols = 2; diff --git a/lib/src/phy/phch/dci.c b/lib/src/phy/phch/dci.c index 2daa7e10d..471429145 100644 --- a/lib/src/phy/phch/dci.c +++ b/lib/src/phy/phch/dci.c @@ -111,7 +111,7 @@ int srslte_dci_rar_to_ul_grant(srslte_dci_rar_grant_t *rar, uint32_t nof_prb, srslte_ra_type2_from_riv(riv, &ul_dci->type2_alloc.L_crb, &ul_dci->type2_alloc.RB_start, nof_prb, nof_prb); - if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant, 0)) { + if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant)) { return SRSLTE_ERROR; } @@ -177,7 +177,7 @@ int srslte_dci_msg_to_ul_grant(srslte_dci_msg_t *msg, uint32_t nof_prb, return ret; } - if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant, harq_pid)) { + if (srslte_ra_ul_dci_to_grant(ul_dci, nof_prb, n_rb_ho, grant)) { return ret; } diff --git a/lib/src/phy/phch/ra.c b/lib/src/phy/phch/ra.c index be10c304c..8cae9fe65 100644 --- a/lib/src/phy/phch/ra.c +++ b/lib/src/phy/phch/ra.c @@ -185,108 +185,87 @@ int srslte_ra_ul_dci_to_grant_prb_allocation(srslte_ra_ul_dci_t *dci, srslte_ra_ } } -srslte_mod_t last_mod[8]; -uint32_t last_ul_tbs_idx[8]; -uint32_t last_dl_tbs[8]; -uint32_t last_dl_tbs2[8]; - -static int ul_dci_to_grant_mcs(srslte_ra_ul_dci_t *dci, srslte_ra_ul_grant_t *grant, uint32_t harq_pid) { - int tbs = -1; +static void ul_dci_to_grant_mcs(srslte_ra_ul_dci_t *dci, srslte_ra_ul_grant_t *grant) { // 8.6.2 First paragraph if (dci->mcs_idx <= 28) { /* Table 8.6.1-1 on 36.213 */ if (dci->mcs_idx < 11) { grant->mcs.mod = SRSLTE_MOD_QPSK; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx, grant->L_prb); } else if (dci->mcs_idx < 21) { grant->mcs.mod = SRSLTE_MOD_16QAM; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-1, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx-1; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-1, grant->L_prb); } else if (dci->mcs_idx < 29) { grant->mcs.mod = SRSLTE_MOD_64QAM; - tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-2, grant->L_prb); - last_ul_tbs_idx[harq_pid%8] = dci->mcs_idx-2; + grant->mcs.tbs = srslte_ra_tbs_from_idx(dci->mcs_idx-2, grant->L_prb); } else { fprintf(stderr, "Invalid MCS index %d\n", dci->mcs_idx); } - last_mod[harq_pid%8] = grant->mcs.mod; } else if (dci->mcs_idx == 29 && dci->cqi_request && grant->L_prb <= 4) { // 8.6.1 and 8.6.2 36.213 second paragraph grant->mcs.mod = SRSLTE_MOD_QPSK; - tbs = srslte_ra_tbs_from_idx(last_ul_tbs_idx[harq_pid%8], grant->L_prb); - dci->rv_idx = 1; + grant->mcs.tbs = 0; + dci->rv_idx = 1; } else if (dci->mcs_idx >= 29) { - // Else use last TBS/Modulation and use mcs to obtain rv_idx - tbs = srslte_ra_tbs_from_idx(last_ul_tbs_idx[harq_pid%8], grant->L_prb); - grant->mcs.mod = last_mod[harq_pid%8]; + // Else use last TBS/Modulation and use mcs to obtain rv_idx + grant->mcs.tbs = -1; + grant->mcs.mod = SRSLTE_MOD_LAST; dci->rv_idx = dci->mcs_idx - 28; - DEBUG("TTI=%d, harq_pid=%d, mcs_idx=%d, tbs=%d, mod=%d, rv=%d\n", - harq_pid, harq_pid%8, dci->mcs_idx, tbs/8, grant->mcs.mod, dci->rv_idx); - } - if (tbs < 0) { - fprintf(stderr, "Error computing TBS\n"); - return SRSLTE_ERROR; - } else { - grant->mcs.tbs = (uint32_t) tbs; - return SRSLTE_SUCCESS; + DEBUG("mcs_idx=%d, tbs=%d, mod=%d, rv=%d\n", + dci->mcs_idx, grant->mcs.tbs/8, grant->mcs.mod, dci->rv_idx); } } -void srslte_ra_ul_grant_to_nbits(srslte_ra_ul_grant_t *grant, srslte_cp_t cp, uint32_t N_srs, srslte_ra_nbits_t *nbits) +void srslte_ra_ul_grant_to_nbits(srslte_ra_ul_grant_t *grant, srslte_cp_t cp, uint32_t N_srs, srslte_ra_nbits_t *nbits) { - nbits->nof_symb = 2*(SRSLTE_CP_NSYMB(cp)-1) - N_srs; + nbits->nof_symb = 2*(SRSLTE_CP_NSYMB(cp)-1) - N_srs; nbits->nof_re = nbits->nof_symb*grant->M_sc; nbits->nof_bits = nbits->nof_re * grant->Qm; } /** Compute PRB allocation for Uplink as defined in 8.1 and 8.4 of 36.213 */ -int srslte_ra_ul_dci_to_grant(srslte_ra_ul_dci_t *dci, uint32_t nof_prb, uint32_t n_rb_ho, srslte_ra_ul_grant_t *grant, - uint32_t harq_pid) +int srslte_ra_ul_dci_to_grant(srslte_ra_ul_dci_t *dci, uint32_t nof_prb, uint32_t n_rb_ho, srslte_ra_ul_grant_t *grant) { - - // Compute PRB allocation + + // Compute PRB allocation if (!srslte_ra_ul_dci_to_grant_prb_allocation(dci, grant, n_rb_ho, nof_prb)) { - - // Compute MCS - if (!ul_dci_to_grant_mcs(dci, grant, harq_pid)) { - - // Fill rest of grant structure - grant->mcs.idx = dci->mcs_idx; - grant->M_sc = grant->L_prb*SRSLTE_NRE; - grant->M_sc_init = grant->M_sc; // FIXME: What should M_sc_init be? - grant->Qm = srslte_mod_bits_x_symbol(grant->mcs.mod); - } else { - fprintf(stderr, "Error computing MCS\n"); - return SRSLTE_ERROR; - } + + // Compute MCS + ul_dci_to_grant_mcs(dci, grant); + + // Fill rest of grant structure + grant->mcs.idx = dci->mcs_idx; + grant->M_sc = grant->L_prb*SRSLTE_NRE; + grant->M_sc_init = grant->M_sc; // FIXME: What should M_sc_init be? + grant->Qm = srslte_mod_bits_x_symbol(grant->mcs.mod); + } else { printf("Error computing UL PRB allocation\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } return SRSLTE_SUCCESS; } -uint32_t srslte_ra_dl_approx_nof_re(srslte_cell_t cell, uint32_t nof_prb, uint32_t nof_ctrl_symbols) +uint32_t srslte_ra_dl_approx_nof_re(srslte_cell_t cell, uint32_t nof_prb, uint32_t nof_ctrl_symbols) { - uint32_t nof_refs = 0; + uint32_t nof_refs = 0; uint32_t nof_symb = 2*SRSLTE_CP_NSYMB(cell.cp)-nof_ctrl_symbols; switch(cell.nof_ports) { - case 1: - nof_refs = 2*3; - break; - case 2: - nof_refs = 4*3; - break; - case 4: - nof_refs = 4*4; - break; + case 1: + nof_refs = 2*3; + break; + case 2: + nof_refs = 4*3; + break; + case 4: + nof_refs = 4*4; + break; } return nof_prb * (nof_symb*SRSLTE_NRE-nof_refs); } /* Computes the number of RE for each PRB in the prb_dist structure */ -uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t cell, +uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t cell, uint32_t sf_idx, uint32_t nof_ctrl_symbols) { uint32_t j, s; @@ -300,7 +279,7 @@ uint32_t srslte_ra_dl_grant_nof_re(srslte_ra_dl_grant_t *grant, srslte_cell_t ce } } } - return nof_re; + return nof_re; } @@ -315,7 +294,7 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ uint32_t bitmask; uint32_t P = srslte_ra_type0_P(nof_prb); uint32_t n_rb_rbg_subset, n_rb_type1; - + bzero(grant, sizeof(srslte_ra_dl_grant_t)); switch (dci->alloc_type) { case SRSLTE_RA_ALLOC_TYPE0: @@ -352,14 +331,14 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ * P * P + dci->type1_alloc.rbg_subset * P + (i + shift) % P] = true; grant->nof_prb++; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } } memcpy(&grant->prb_idx[1], &grant->prb_idx[0], SRSLTE_MAX_PRB*sizeof(bool)); break; case SRSLTE_RA_ALLOC_TYPE2: - if (dci->type2_alloc.mode == SRSLTE_RA_TYPE2_LOC) { + if (dci->type2_alloc.mode == SRSLTE_RA_TYPE2_LOC) { for (i = 0; i < dci->type2_alloc.L_crb; i++) { grant->prb_idx[0][i + dci->type2_alloc.RB_start] = true; grant->nof_prb++; @@ -408,13 +387,13 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ if (n_tilde_prb_odd < nof_prb) { grant->prb_idx[0][n_tilde_prb_odd] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } else { if (n_tilde_prb_odd + N_gap - N_tilde_vrb / 2 < nof_prb) { grant->prb_idx[0][n_tilde_prb_odd + N_gap - N_tilde_vrb / 2] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } grant->nof_prb++; @@ -422,13 +401,13 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ if(n_tilde_prb_even < nof_prb) { grant->prb_idx[1][n_tilde_prb_even] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } else { if (n_tilde_prb_even + N_gap - N_tilde_vrb / 2 < nof_prb) { grant->prb_idx[1][n_tilde_prb_even + N_gap - N_tilde_vrb / 2] = true; } else { - return SRSLTE_ERROR; + return SRSLTE_ERROR; } } } @@ -442,8 +421,7 @@ int srslte_ra_dl_dci_to_grant_prb_allocation(srslte_ra_dl_dci_t *dci, srslte_ra_ } int srslte_dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) { - uint32_t i_tbs = 0; - int tbs = -1; + int i_tbs = 0; if (mcs->idx < 10) { mcs->mod = SRSLTE_MOD_QPSK; i_tbs = mcs->idx; @@ -455,30 +433,26 @@ int srslte_dl_fill_ra_mcs(srslte_ra_mcs_t *mcs, uint32_t nprb) { i_tbs = mcs->idx-2; } else if (mcs->idx == 29) { mcs->mod = SRSLTE_MOD_QPSK; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } else if (mcs->idx == 30) { mcs->mod = SRSLTE_MOD_16QAM; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } else if (mcs->idx == 31) { mcs->mod = SRSLTE_MOD_64QAM; - tbs = 0; - i_tbs = 0; + i_tbs = -1; } - - if (tbs == -1) { + + int tbs = -1; + if (i_tbs >= 0) { tbs = srslte_ra_tbs_from_idx(i_tbs, nprb); - if (tbs >= 0) { - mcs->tbs = tbs; - } - } - return tbs; + mcs->tbs = tbs; + } + return tbs; } int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { - uint32_t i_tbs = 0; - int tbs = -1; + uint32_t i_tbs = 0; + int tbs = -1; if (mcs->idx < 5) { mcs->mod = SRSLTE_MOD_QPSK; i_tbs = mcs->idx*2; @@ -492,7 +466,7 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { mcs->mod = SRSLTE_MOD_64QAM; i_tbs = mcs->idx + 5; }else if (mcs->idx < 28) { - //mcs->mod = SRSLTE_MOD_256QAM; + //mcs->mod = SRSLTE_MOD_256QAM; i_tbs = mcs->idx + 5; }else if (mcs->idx == 28) { mcs->mod = SRSLTE_MOD_QPSK; @@ -511,15 +485,15 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { tbs = 0; i_tbs = 0; } - - + + if (tbs == -1) { tbs = srslte_ra_tbs_from_idx(i_tbs, nprb); if (tbs >= 0) { - mcs->tbs = tbs; + mcs->tbs = tbs; } - } - return tbs; + } + return tbs; } /* Modulation order and transport block size determination 7.1.7 in 36.213 @@ -530,9 +504,9 @@ int srslte_dl_fill_ra_mcs_pmch(srslte_ra_mcs_t *mcs, uint32_t nprb) { * */ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *grant, bool crc_is_crnti) { uint32_t n_prb=0; - int tbs = -1; - uint32_t i_tbs = 0; - + int tbs = -1; + uint32_t i_tbs = 0; + if (!crc_is_crnti) { if (dci->dci_is_1a) { n_prb = dci->type2_alloc.n_prb1a == SRSLTE_RA_TYPE2_NPRB1A_2 ? 2 : 3; @@ -546,35 +520,23 @@ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *gr } } else { fprintf(stderr, "Error decoding DCI: P/SI/RA-RNTI supports Format1A/1C only\n"); - return SRSLTE_ERROR; + return SRSLTE_ERROR; } grant->mcs[0].mod = SRSLTE_MOD_QPSK; grant->mcs[0].tbs = (uint32_t) tbs; } else { n_prb = grant->nof_prb; - grant->nof_tb = 0; + grant->nof_tb = 0; if (dci->tb_en[0]) { grant->mcs[0].idx = dci->mcs_idx; - tbs = srslte_dl_fill_ra_mcs(&grant->mcs[0], n_prb); - if (tbs) { - last_dl_tbs[dci->harq_process%8] = tbs; - } else { - // For mcs>=29, set last TBS received for this PID - grant->mcs[0].tbs = last_dl_tbs[dci->harq_process%8]; - } + grant->mcs[0].tbs = srslte_dl_fill_ra_mcs(&grant->mcs[0], n_prb); grant->nof_tb++; } else { grant->mcs[0].tbs = 0; } if (dci->tb_en[1]) { grant->mcs[1].idx = dci->mcs_idx_1; - tbs = srslte_dl_fill_ra_mcs(&grant->mcs[1], n_prb); - if (tbs) { - last_dl_tbs2[dci->harq_process%8] = tbs; - } else { - // For mcs>=29, set last TBS received for this PID - grant->mcs[1].tbs = last_dl_tbs2[dci->harq_process%8]; - } + grant->mcs[1].tbs = srslte_dl_fill_ra_mcs(&grant->mcs[1], n_prb); } else { grant->mcs[1].tbs = 0; } diff --git a/lib/src/phy/phch/test/pusch_test.c b/lib/src/phy/phch/test/pusch_test.c index 9d048e680..cf0be75c3 100644 --- a/lib/src/phy/phch/test/pusch_test.c +++ b/lib/src/phy/phch/test/pusch_test.c @@ -136,7 +136,7 @@ int main(int argc, char **argv) { dci.mcs_idx = mcs_idx; srslte_ra_ul_grant_t grant; - if (srslte_ra_ul_dci_to_grant(&dci, cell.nof_prb, 0, &grant, 0)) { + if (srslte_ra_ul_dci_to_grant(&dci, cell.nof_prb, 0, &grant)) { fprintf(stderr, "Error computing resource allocation\n"); return ret; } diff --git a/srsenb/src/phy/phch_worker.cc b/srsenb/src/phy/phch_worker.cc index 52b617fd5..b80467f4d 100644 --- a/srsenb/src/phy/phch_worker.cc +++ b/srsenb/src/phy/phch_worker.cc @@ -408,7 +408,7 @@ int phch_worker::decode_pusch(srslte_enb_ul_pusch_t *grants, uint32_t nof_pusch) srslte_ra_ul_grant_t phy_grant; int res = -1; - if (!srslte_ra_ul_dci_to_grant(&grants[i].grant, enb_ul.cell.nof_prb, n_rb_ho, &phy_grant, tti_rx%8)) { + if (!srslte_ra_ul_dci_to_grant(&grants[i].grant, enb_ul.cell.nof_prb, n_rb_ho, &phy_grant)) { if (phy_grant.mcs.mod == SRSLTE_MOD_64QAM) { phy_grant.mcs.mod = SRSLTE_MOD_16QAM; } diff --git a/srsue/hdr/phy/phch_worker.h b/srsue/hdr/phy/phch_worker.h index 0811723e0..c26966c17 100644 --- a/srsue/hdr/phy/phch_worker.h +++ b/srsue/hdr/phy/phch_worker.h @@ -149,7 +149,12 @@ private: uint32_t I_sr; float cfo; bool rar_cqi_request; - + + // Save last TBS for mcs>28 cases + int last_dl_tbs[2*HARQ_DELAY_MS][SRSLTE_MAX_CODEWORDS]; + int last_ul_tbs[2*HARQ_DELAY_MS]; + srslte_mod_t last_ul_mod[2*HARQ_DELAY_MS]; + // Metrics dl_metrics_t dl_metrics; ul_metrics_t ul_metrics; diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index 1d86461cd..9fa3ff669 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -478,10 +478,20 @@ bool phch_worker::decode_pdcch_dl(srsue::mac_interface_phy::mac_grant_t* grant) return false; } + grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); + + // Set last TBS for this TB (pid) in case of mcs>29 (7.1.7.2 of 36.213) + for (int i=0;iphy_grant.dl.mcs[i].tbs < 0) { + grant->phy_grant.dl.mcs[i].tbs = last_dl_tbs[grant->pid%(2*HARQ_DELAY_MS)][i]; + } + // save it + last_dl_tbs[grant->pid%(2*HARQ_DELAY_MS)][i] = grant->phy_grant.dl.mcs[i].tbs; + } + /* Fill MAC grant structure */ grant->ndi[0] = dci_unpacked.ndi; grant->ndi[1] = dci_unpacked.ndi_1; - grant->pid = ASYNC_DL_SCHED?dci_unpacked.harq_process:(tti%(2*HARQ_DELAY_MS)); grant->n_bytes[0] = grant->phy_grant.dl.mcs[0].tbs / (uint32_t) 8; grant->n_bytes[1] = grant->phy_grant.dl.mcs[1].tbs / (uint32_t) 8; grant->tti = tti; @@ -718,14 +728,29 @@ bool phch_worker::decode_pdcch_ul(mac_interface_phy::mac_grant_t* grant) ue_dl.last_location_ul.ncce, (1<phy_grant.ul.mcs.tbs==0) { - srslte_vec_fprint_hex(stdout, dci_msg.data, dci_msg.nof_bits); + Info("Received PUSCH grant with empty data\n"); } } } - + + if (ret) { + + // Use last TBS for this TB in case of mcs>28 + if (grant->phy_grant.ul.mcs.tbs < 0) { + grant->phy_grant.ul.mcs.tbs = last_ul_tbs[tti%(2*HARQ_DELAY_MS)]; + } + last_ul_tbs[tti%(2*HARQ_DELAY_MS)] = grant->phy_grant.ul.mcs.tbs; + + if (grant->phy_grant.ul.mcs.mod == SRSLTE_MOD_LAST) { + grant->phy_grant.ul.mcs.mod = last_ul_mod[tti%(2*HARQ_DELAY_MS)]; + grant->phy_grant.ul.Qm = srslte_mod_bits_x_symbol(grant->phy_grant.ul.mcs.mod); + } + last_ul_mod[tti%(2*HARQ_DELAY_MS)] = grant->phy_grant.ul.mcs.mod; + } + /* Limit UL modulation if not supported by the UE or disabled by higher layers */ if (!phy->config->enable_64qam) { - if (grant->phy_grant.ul.mcs.mod == SRSLTE_MOD_64QAM) { + if (grant->phy_grant.ul.mcs.mod >= SRSLTE_MOD_64QAM) { grant->phy_grant.ul.mcs.mod = SRSLTE_MOD_16QAM; grant->phy_grant.ul.Qm = 4; } @@ -898,7 +923,7 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui if (srslte_ue_ul_cfg_grant(&ue_ul, grant, TTI_TX(tti), rv, current_tx_nb)) { Error("Configuring UL grant\n"); } - + if (srslte_ue_ul_pusch_encode_rnti_softbuffer(&ue_ul, payload, uci_data, softbuffer, @@ -925,12 +950,12 @@ void phch_worker::encode_pusch(srslte_ra_ul_grant_t *grant, uint8_t *payload, ui #endif Info("PUSCH: tti_tx=%d, alloc=(%d,%d), tbs=%d, mcs=%d, rv=%d, ack=%s, ri=%s, cfo=%.1f KHz%s\n", - (tti+4)%10240, - grant->n_prb[0], grant->n_prb[0]+grant->L_prb, - grant->mcs.tbs/8, grant->mcs.idx, rv, - uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", - uci_data.uci_ri_len>0?(uci_data.uci_ri?"1":"0"):"no", - cfo*15, timestr); + (tti+HARQ_DELAY_MS)%10240, + grant->n_prb[0], grant->n_prb[0]+grant->L_prb, + grant->mcs.tbs/8, grant->mcs.idx, rv, + uci_data.uci_ack_len>0?(uci_data.uci_ack?"1":"0"):"no", + uci_data.uci_ri_len>0?(uci_data.uci_ri?"1":"0"):"no", + cfo*15, timestr); // Store metrics ul_metrics.mcs = grant->mcs.idx; From 9dbbe9731a472d54e2a2a7f47186a04f5f02bb5a Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 19 Oct 2017 16:18:40 -0400 Subject: [PATCH 44/55] missing netmaks string after merge --- srsue/hdr/ue_base.h | 1 + 1 file changed, 1 insertion(+) diff --git a/srsue/hdr/ue_base.h b/srsue/hdr/ue_base.h index 411896f70..634219083 100644 --- a/srsue/hdr/ue_base.h +++ b/srsue/hdr/ue_base.h @@ -109,6 +109,7 @@ typedef struct { std::string ue_cateogry; bool metrics_csv_enable; std::string metrics_csv_filename; + std::string ip_netmask; }expert_args_t; typedef struct { From 05d6a1c82927859f183230243dbf1fd8f6c245ad Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 19 Oct 2017 16:26:00 -0400 Subject: [PATCH 45/55] added option to configure netmask (rules out previous commit) --- srsue/hdr/ue_base.h | 2 +- srsue/hdr/upper/gw.h | 5 +++++ srsue/src/ue.cc | 2 ++ srsue/src/upper/gw.cc | 13 ++++++++++++- srsue/ue.conf.example | 4 +++- 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/srsue/hdr/ue_base.h b/srsue/hdr/ue_base.h index 634219083..b547945a9 100644 --- a/srsue/hdr/ue_base.h +++ b/srsue/hdr/ue_base.h @@ -103,13 +103,13 @@ typedef struct { }gui_args_t; typedef struct { + std::string ip_netmask; phy_args_t phy; float metrics_period_secs; bool pregenerate_signals; std::string ue_cateogry; bool metrics_csv_enable; std::string metrics_csv_filename; - std::string ip_netmask; }expert_args_t; typedef struct { diff --git a/srsue/hdr/upper/gw.h b/srsue/hdr/upper/gw.h index 800b31624..b97ceb6c5 100644 --- a/srsue/hdr/upper/gw.h +++ b/srsue/hdr/upper/gw.h @@ -57,8 +57,13 @@ public: // NAS interface srslte::error_t setup_if_addr(uint32_t ip_addr, char *err_str); + void set_netmask(std::string netmask); + private: + bool default_netmask; + std::string netmask; + static const int GW_THREAD_PRIO = 7; pdcp_interface_gw *pdcp; diff --git a/srsue/src/ue.cc b/srsue/src/ue.cc index 3560a78fe..a99281e6f 100644 --- a/srsue/src/ue.cc +++ b/srsue/src/ue.cc @@ -185,6 +185,8 @@ bool ue::init(all_args_t *args_) nas.init(&usim, &rrc, &gw, &nas_log, 1 /* RB_ID_SRB1 */); gw.init(&pdcp, &nas, &gw_log, 3 /* RB_ID_DRB1 */); + gw.set_netmask(args->expert.ip_netmask); + rrc.init(&phy, &mac, &rlc, &pdcp, &nas, &usim, &mac, &rrc_log); rrc.set_ue_category(atoi(args->expert.ue_cateogry.c_str())); diff --git a/srsue/src/upper/gw.cc b/srsue/src/upper/gw.cc index 07ac36989..1e3e81999 100644 --- a/srsue/src/upper/gw.cc +++ b/srsue/src/upper/gw.cc @@ -44,6 +44,7 @@ gw::gw() :if_up(false) { current_ip_addr = 0; + default_netmask = true; } void gw::init(pdcp_interface_gw *pdcp_, nas_interface_gw *nas_, srslte::log *gw_log_, uint32_t lcid_) @@ -104,6 +105,12 @@ void gw::get_metrics(gw_metrics_t &m) ul_tput_bytes = 0; } +void gw::set_netmask(std::string netmask) { + default_netmask = false; + this->netmask = netmask; +} + + /******************************************************************************* PDCP interface *******************************************************************************/ @@ -152,7 +159,11 @@ srslte::error_t gw::setup_if_addr(uint32_t ip_addr, char *err_str) return(srslte::ERROR_CANT_START); } ifr.ifr_netmask.sa_family = AF_INET; - ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr = inet_addr("255.255.255.0"); + const char *mask = "255.255.255.0"; + if (!default_netmask) { + mask = netmask.c_str(); + } + ((struct sockaddr_in *)&ifr.ifr_netmask)->sin_addr.s_addr = inet_addr(mask); if(0 > ioctl(sock, SIOCSIFNETMASK, &ifr)) { err_str = strerror(errno); diff --git a/srsue/ue.conf.example b/srsue/ue.conf.example index 5d14d3c3c..d0dc67952 100644 --- a/srsue/ue.conf.example +++ b/srsue/ue.conf.example @@ -98,7 +98,8 @@ enable = false ##################################################################### # Expert configuration options # -# ue_category: Sets UE category (range 1-5). Default: 4 +# ip_netmask: Netmask of the tun_srsue device. Default: 255.255.255.0 +# ue_category: Sets UE category (range 1-5). Default: 4 # # prach_gain: PRACH gain (dB). If defined, forces a gain for the tranmsission of PRACH only., # Default is to use tx_gain in [rf] section. @@ -138,6 +139,7 @@ enable = false # ##################################################################### [expert] +#ip_netmask = 255.255.255.0 #ue_category = 4 #prach_gain = 30 #cqi_max = 15 From 49a105baed23e14fcac95e23a005837477b6b41c Mon Sep 17 00:00:00 2001 From: Paul Sutton Date: Thu, 19 Oct 2017 22:21:18 +0100 Subject: [PATCH 46/55] Fix for tests, minor fix for RLC UM --- lib/src/upper/rlc_um.cc | 2 +- lib/test/upper/rlc_um_test.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/src/upper/rlc_um.cc b/lib/src/upper/rlc_um.cc index 045fb0b7e..d95e186e7 100644 --- a/lib/src/upper/rlc_um.cc +++ b/lib/src/upper/rlc_um.cc @@ -277,7 +277,7 @@ int rlc_um::build_data_pdu(uint8_t *payload, uint32_t nof_bytes) int head_len = rlc_um_packed_length(&header); int pdu_space = nof_bytes; - if(pdu_space <= head_len) + if(pdu_space <= head_len + 1) { log->warning("%s Cannot build a PDU - %d bytes available, %d bytes required for header\n", rrc->get_rb_name(lcid).c_str(), nof_bytes, head_len); diff --git a/lib/test/upper/rlc_um_test.cc b/lib/test/upper/rlc_um_test.cc index 0894a2a8b..8abcfae3c 100644 --- a/lib/test/upper/rlc_um_test.cc +++ b/lib/test/upper/rlc_um_test.cc @@ -123,7 +123,7 @@ void basic_test() byte_buffer_t pdu_bufs[NBUFS]; for(int i=0;i Date: Thu, 19 Oct 2017 18:10:17 -0400 Subject: [PATCH 47/55] coverty uninitialized member --- srsue/src/mac/demux.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/srsue/src/mac/demux.cc b/srsue/src/mac/demux.cc index 14c524165..171e7bf45 100644 --- a/srsue/src/mac/demux.cc +++ b/srsue/src/mac/demux.cc @@ -36,7 +36,7 @@ namespace srsue { -demux::demux() : mac_msg(20), pending_mac_msg(20) +demux::demux() : mac_msg(20), pending_mac_msg(20), rlc(NULL) { } From c14393b24f874b4e12d3bbfb883133fedda4f6f1 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Thu, 19 Oct 2017 19:39:54 -0400 Subject: [PATCH 48/55] Disable RSSI sensor by default --- srsue/src/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/srsue/src/main.cc b/srsue/src/main.cc index 3b0cad2d0..1b146a9f3 100644 --- a/srsue/src/main.cc +++ b/srsue/src/main.cc @@ -155,7 +155,7 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { "Pregenerate uplink signals after attach. Improves CPU performance.") ("expert.rssi_sensor_enabled", - bpo::value(&args->expert.phy.rssi_sensor_enabled)->default_value(true), + bpo::value(&args->expert.phy.rssi_sensor_enabled)->default_value(false), "Enable or disable RF frontend RSSI sensor. In some USRP devices can cause segmentation fault") ("expert.prach_gain", From a570e63c5b8977a8f864cde6d7103a2233ae8c83 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Fri, 20 Oct 2017 15:05:04 +0200 Subject: [PATCH 49/55] Corrected bug in RA --- lib/include/srslte/phy/phch/ra.h | 2 -- lib/src/phy/phch/ra.c | 14 ++++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/include/srslte/phy/phch/ra.h b/lib/include/srslte/phy/phch/ra.h index 680d80dd6..aef8f4751 100644 --- a/lib/include/srslte/phy/phch/ra.h +++ b/lib/include/srslte/phy/phch/ra.h @@ -103,9 +103,7 @@ typedef struct SRSLTE_API { bool prb_idx[2][SRSLTE_MAX_PRB]; uint32_t nof_prb; uint32_t Qm[SRSLTE_MAX_CODEWORDS]; - uint32_t Qm2[SRSLTE_MAX_CODEWORDS]; srslte_ra_mcs_t mcs[SRSLTE_MAX_CODEWORDS]; - srslte_ra_mcs_t mcs2[SRSLTE_MAX_CODEWORDS]; uint32_t nof_tb; srslte_sf_t sf_type; bool tb_en[SRSLTE_MAX_CODEWORDS]; diff --git a/lib/src/phy/phch/ra.c b/lib/src/phy/phch/ra.c index 8cae9fe65..913bd9548 100644 --- a/lib/src/phy/phch/ra.c +++ b/lib/src/phy/phch/ra.c @@ -549,7 +549,7 @@ static int dl_dci_to_grant_mcs(srslte_ra_dl_dci_t *dci, srslte_ra_dl_grant_t *gr } grant->pinfo = dci->pinfo; - if (tbs < 0) { + if (grant->mcs[0].tbs < 0 || grant->mcs[1].tbs < 0) { return SRSLTE_ERROR; } else { return SRSLTE_SUCCESS; @@ -584,10 +584,12 @@ int srslte_ra_dl_dci_to_grant(srslte_ra_dl_dci_t *dci, if (msg_rnti >= SRSLTE_CRNTI_START && msg_rnti <= SRSLTE_CRNTI_END) { crc_is_crnti = true; } - // Compute PRB allocation - if (!srslte_ra_dl_dci_to_grant_prb_allocation(dci, grant, nof_prb)) { - // Compute MCS - if (!dl_dci_to_grant_mcs(dci, grant, crc_is_crnti)) { + // Compute PRB allocation + int ret =srslte_ra_dl_dci_to_grant_prb_allocation(dci, grant, nof_prb); + if (!ret) { + // Compute MCS + ret = dl_dci_to_grant_mcs(dci, grant, crc_is_crnti); + if (ret == SRSLTE_SUCCESS) { // Apply Section 7.1.7.3. If RA-RNTI and Format1C rv_idx=0 if (msg_rnti >= SRSLTE_RARNTI_START && msg_rnti <= SRSLTE_RARNTI_END && dci->dci_is_1c) @@ -869,4 +871,4 @@ void srslte_ra_prb_fprint(FILE *f, srslte_ra_dl_grant_t *grant) { } } -} \ No newline at end of file +} From 3b4649b9f730786d010962b33b924693137d8925 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 20 Oct 2017 09:29:56 -0400 Subject: [PATCH 50/55] Fixed bug in calc_new_transmission --- srsue/hdr/mac/dl_harq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/srsue/hdr/mac/dl_harq.h b/srsue/hdr/mac/dl_harq.h index 9abcf0922..521018d73 100644 --- a/srsue/hdr/mac/dl_harq.h +++ b/srsue/hdr/mac/dl_harq.h @@ -347,7 +347,7 @@ private: // Determine if it's a new transmission 5.3.2.2 bool calc_is_new_transmission(Tgrant grant) { - if (grant.phy_grant.dl.mcs[tid].idx < 28 && // mcs 29,30,31 always retx regardless of rest + if (grant.phy_grant.dl.mcs[tid].idx <= 28 && // mcs 29,30,31 always retx regardless of rest ((grant.ndi[tid] != cur_grant.ndi[tid]) || // 1st condition (NDI has changed) (pid == HARQ_BCCH_PID && grant.rv[tid] == 0) || // 2nd condition (Broadcast and 1st transmission) is_first_tb)) From c8bba2f4d07ada34376ff19c19c10a9cf50f7a17 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Fri, 20 Oct 2017 16:09:27 +0200 Subject: [PATCH 51/55] DFT optimization. memcpy removal (#76) * Solved PHICH Segmentation fault for MIMO * Initial Guru FFT optimitzation * Guru (i)FFT implemented. All test passed! * Integrated new DFT into pdsch_enodeb and pdsch_ue. Solved more DFT bugs. * Solved Merge Errors and bugs * Solved UL Guru bug (DC missing). Updated Init and OFDM calls for enb and ue (cell measurement too). --- lib/examples/cell_measurement.c | 28 +- lib/examples/pdsch_enodeb.c | 26 +- lib/examples/pdsch_ue.c | 13 +- lib/include/srslte/phy/common/phy_common.h | 4 +- lib/include/srslte/phy/dft/dft.h | 24 ++ lib/include/srslte/phy/dft/ofdm.h | 40 +-- lib/include/srslte/phy/enb/enb_dl.h | 8 +- lib/include/srslte/phy/enb/enb_ul.h | 1 + lib/include/srslte/phy/ue/ue_dl.h | 3 +- lib/include/srslte/phy/ue/ue_mib.h | 2 +- lib/include/srslte/phy/ue/ue_ul.h | 1 + lib/src/phy/dft/dft_fftw.c | 62 +++- lib/src/phy/dft/ofdm.c | 271 +++++++++++++++--- lib/src/phy/dft/test/ofdm_test.c | 63 +++- lib/src/phy/enb/enb_dl.c | 57 ++-- lib/src/phy/enb/enb_ul.c | 31 +- lib/src/phy/phch/test/pbch_file_test.c | 4 +- lib/src/phy/phch/test/pcfich_file_test.c | 10 +- lib/src/phy/phch/test/pdcch_file_test.c | 6 +- lib/src/phy/phch/test/pdsch_pdcch_file_test.c | 4 +- lib/src/phy/phch/test/phich_file_test.c | 6 +- lib/src/phy/phch/test/pmch_file_test.c | 2 +- lib/src/phy/phch/test/pmch_test.c | 122 ++++---- lib/src/phy/sync/test/sync_test.c | 12 +- lib/src/phy/ue/ue_dl.c | 77 ++--- lib/src/phy/ue/ue_mib.c | 13 +- lib/src/phy/ue/ue_ul.c | 22 +- srsenb/hdr/phy/phch_worker.h | 2 +- srsenb/src/phy/phch_worker.cc | 19 +- srsue/src/phy/phch_recv.cc | 8 +- srsue/src/phy/phch_worker.cc | 4 +- 31 files changed, 647 insertions(+), 298 deletions(-) diff --git a/lib/examples/cell_measurement.c b/lib/examples/cell_measurement.c index 37796682f..bfb8194df 100644 --- a/lib/examples/cell_measurement.c +++ b/lib/examples/cell_measurement.c @@ -249,7 +249,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Error initiating ue_sync\n"); return -1; } - if (srslte_ue_dl_init(&ue_dl, cell.nof_prb, 1)) { + if (srslte_ue_dl_init(&ue_dl, sf_buffer, cell.nof_prb, 1)) { fprintf(stderr, "Error initiating UE downlink processing module\n"); return -1; } @@ -257,7 +257,7 @@ int main(int argc, char **argv) { fprintf(stderr, "Error initiating UE downlink processing module\n"); return -1; } - if (srslte_ue_mib_init(&ue_mib, cell.nof_prb)) { + if (srslte_ue_mib_init(&ue_mib, sf_buffer, cell.nof_prb)) { fprintf(stderr, "Error initaiting UE MIB decoder\n"); return -1; } @@ -271,8 +271,16 @@ int main(int argc, char **argv) { /* Initialize subframe counter */ sf_cnt = 0; - - if (srslte_ofdm_rx_init(&fft, cell.cp, cell.nof_prb)) { + + int sf_re = SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp); + + cf_t *sf_symbols = srslte_vec_malloc(sf_re * sizeof(cf_t)); + + for (int i=0;iout = fftwf_malloc(size_out*len); } +int srslte_dft_replan_guru_c(srslte_dft_plan_t *plan, const int new_dft_points, cf_t *in_buffer, + cf_t *out_buffer, int istride, int ostride, int how_many, + int idist, int odist) { + int sign = (plan->forward) ? FFTW_FORWARD : FFTW_BACKWARD; + + const fftwf_iodim iodim = {new_dft_points, istride, ostride}; + const fftwf_iodim howmany_dims = {how_many, idist, odist}; + + /* Destroy current plan */ + fftwf_destroy_plan(plan->p); + + plan->p = fftwf_plan_guru_dft(1, &iodim, 1, &howmany_dims, in_buffer, out_buffer, sign, FFTW_TYPE); + if (!plan->p) { + return -1; + } + plan->size = new_dft_points; + plan->init_size = plan->size; + + return 0; +} + int srslte_dft_replan_c(srslte_dft_plan_t *plan, const int new_dft_points) { int sign = (plan->dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; if (plan->p) { @@ -107,6 +128,32 @@ int srslte_dft_replan_c(srslte_dft_plan_t *plan, const int new_dft_points) { return 0; } +int srslte_dft_plan_guru_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_dir_t dir, cf_t *in_buffer, + cf_t *out_buffer, int istride, int ostride, int how_many, + int idist, int odist) { + int sign = (dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; + + const fftwf_iodim iodim = {dft_points, istride, ostride}; + const fftwf_iodim howmany_dims = {how_many, idist, odist}; + + plan->p = fftwf_plan_guru_dft(1, &iodim, 1, &howmany_dims, in_buffer, out_buffer, sign, FFTW_TYPE); + if (!plan->p) { + return -1; + } + plan->size = dft_points; + plan->init_size = plan->size; + plan->mode = SRSLTE_DFT_COMPLEX; + plan->dir = dir; + plan->forward = (dir==SRSLTE_DFT_FORWARD)?true:false; + plan->mirror = false; + plan->db = false; + plan->norm = false; + plan->dc = false; + plan->is_guru = true; + + return 0; +} + int srslte_dft_plan_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_dir_t dir) { allocate(plan,sizeof(fftwf_complex),sizeof(fftwf_complex), dft_points); int sign = (dir == SRSLTE_DFT_FORWARD) ? FFTW_FORWARD : FFTW_BACKWARD; @@ -123,6 +170,7 @@ int srslte_dft_plan_c(srslte_dft_plan_t *plan, const int dft_points, srslte_dft_ plan->db = false; plan->norm = false; plan->dc = false; + plan->is_guru = false; return 0; } @@ -232,6 +280,14 @@ void srslte_dft_run_c(srslte_dft_plan_t *plan, cf_t *in, cf_t *out) { plan->forward, plan->mirror, plan->dc); } +void srslte_dft_run_guru_c(srslte_dft_plan_t *plan) { + if (plan->is_guru == true) { + fftwf_execute(plan->p); + } else { + fprintf(stderr, "srslte_dft_run_guru_c: the selected plan is not guru!\n"); + } +} + void srslte_dft_run_r(srslte_dft_plan_t *plan, float *in, float *out) { float norm; int i; @@ -255,8 +311,10 @@ void srslte_dft_run_r(srslte_dft_plan_t *plan, float *in, float *out) { void srslte_dft_plan_free(srslte_dft_plan_t *plan) { if (!plan) return; if (!plan->size) return; - if (plan->in) fftwf_free(plan->in); - if (plan->out) fftwf_free(plan->out); + if (!plan->is_guru) { + if (plan->in) fftwf_free(plan->in); + if (plan->out) fftwf_free(plan->out); + } if (plan->p) fftwf_destroy_plan(plan->p); bzero(plan, sizeof(srslte_dft_plan_t)); } diff --git a/lib/src/phy/dft/ofdm.c b/lib/src/phy/dft/ofdm.c index db5939274..8ea690bb5 100644 --- a/lib/src/phy/dft/ofdm.c +++ b/lib/src/phy/dft/ofdm.c @@ -37,23 +37,79 @@ #include "srslte/phy/utils/debug.h" #include "srslte/phy/utils/vector.h" +/* Uncomment next line for avoiding Guru DFT call */ +//#define AVOID_GURU -int srslte_ofdm_init_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof_prb, srslte_dft_dir_t dir) { - return srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, dir, SRSLTE_SF_NORM); +int srslte_ofdm_init_(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, int symbol_sz, int nof_prb, srslte_dft_dir_t dir) { + return srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, dir, SRSLTE_SF_NORM); } +int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, int symbol_sz, int nof_prb, srslte_dft_dir_t dir, srslte_sf_t sf_type) { -int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof_prb, srslte_dft_dir_t dir, srslte_sf_t sf_type) { + /* Set OFDM object attributes */ + q->symbol_sz = (uint32_t) symbol_sz; + q->nof_symbols = SRSLTE_CP_NSYMB(cp); + q->nof_symbols_mbsfn = SRSLTE_CP_NSYMB(SRSLTE_CP_EXT); + q->cp = cp; + q->freq_shift = false; + q->nof_re = (uint32_t) nof_prb * SRSLTE_NRE; + q->nof_guards = ((symbol_sz - q->nof_re) / 2); + q->slot_sz = (uint32_t) SRSLTE_SLOT_LEN(symbol_sz); + q->sf_sz = (uint32_t) SRSLTE_SF_LEN(symbol_sz); + q->in_buffer = in_buffer; + q->out_buffer= out_buffer; if (srslte_dft_plan_c(&q->fft_plan, symbol_sz, dir)) { fprintf(stderr, "Error: Creating DFT plan\n"); return -1; } + +#ifdef AVOID_GURU q->tmp = srslte_vec_malloc((uint32_t) symbol_sz * sizeof(cf_t)); if (!q->tmp) { perror("malloc"); return -1; } + bzero(q->tmp, sizeof(cf_t) * symbol_sz); +#else + int cp1 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(0, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + int cp2 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(1, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + + q->tmp = srslte_vec_malloc(sizeof(cf_t) * q->sf_sz); + if (!q->tmp) { + perror("malloc"); + return -1; + } + bzero(q->tmp, sizeof(cf_t) * q->sf_sz); + + if (dir == SRSLTE_DFT_BACKWARD) { + bzero(in_buffer, sizeof(cf_t) * SRSLTE_SF_LEN_RE(nof_prb, cp)); + }else { + bzero(in_buffer, sizeof(cf_t) * q->sf_sz); + } + + for (int slot = 0; slot < 2; slot++) { + //bzero(&q->fft_plan_sf[slot], sizeof(srslte_dft_plan_t)); + //bzero(q->tmp + SRSLTE_CP_NSYMB(cp)*symbol_sz*slot, sizeof(cf_t) * (cp1 + (SRSLTE_CP_NSYMB(cp) - 1)*cp2 + SRSLTE_CP_NSYMB(cp)*symbol_sz)); + if (dir == SRSLTE_DFT_FORWARD) { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + in_buffer + cp1 + q->slot_sz * slot, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz + cp2, symbol_sz)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } else { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + out_buffer + cp1 + q->slot_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz, symbol_sz + cp2)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } + } +#endif q->shift_buffer = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN(symbol_sz)); if (!q->shift_buffer) { @@ -64,15 +120,6 @@ int srslte_ofdm_init_mbsfn_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int srslte_dft_plan_set_mirror(&q->fft_plan, true); srslte_dft_plan_set_dc(&q->fft_plan, true); - q->symbol_sz = (uint32_t) symbol_sz; - q->nof_symbols = SRSLTE_CP_NSYMB(cp); - q->nof_symbols_mbsfn = SRSLTE_CP_NSYMB(SRSLTE_CP_EXT); - q->cp = cp; - q->freq_shift = false; - q->nof_re = nof_prb * SRSLTE_NRE; - q->nof_guards = ((symbol_sz - q->nof_re) / 2); - q->slot_sz = SRSLTE_SLOT_LEN(symbol_sz); - DEBUG("Init %s symbol_sz=%d, nof_symbols=%d, cp=%s, nof_re=%d, nof_guards=%d\n", dir==SRSLTE_DFT_FORWARD?"FFT":"iFFT", q->symbol_sz, q->nof_symbols, q->cp==SRSLTE_CP_NORM?"Normal":"Extended", q->nof_re, q->nof_guards); @@ -101,9 +148,60 @@ int srslte_ofdm_replan_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof q->symbol_sz = (uint32_t) symbol_sz; q->nof_symbols = SRSLTE_CP_NSYMB(cp); q->cp = cp; - q->nof_re = nof_prb * SRSLTE_NRE; + q->nof_re = (uint32_t) nof_prb * SRSLTE_NRE; q->nof_guards = ((symbol_sz - q->nof_re) / 2); - q->slot_sz = SRSLTE_SLOT_LEN(symbol_sz); + q->slot_sz = (uint32_t) SRSLTE_SLOT_LEN(symbol_sz); + q->sf_sz = (uint32_t) SRSLTE_SF_LEN(symbol_sz); + +#ifndef AVOID_GURU + cf_t *in_buffer = q->in_buffer; + cf_t *out_buffer = q->out_buffer; + + int cp1 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(0, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + int cp2 = SRSLTE_CP_ISNORM(cp)?SRSLTE_CP_LEN_NORM(1, symbol_sz):SRSLTE_CP_LEN_EXT(symbol_sz); + + srslte_dft_dir_t dir = q->fft_plan_sf[0].dir; + + if (q->tmp) { + free(q->tmp); + } + + q->tmp = srslte_vec_malloc(sizeof(cf_t) * q->sf_sz); + if (!q->tmp) { + perror("malloc"); + return -1; + } + bzero(q->tmp, sizeof(cf_t) * q->sf_sz); + + if (dir == SRSLTE_DFT_BACKWARD) { + bzero(in_buffer, sizeof(cf_t) * SRSLTE_SF_LEN_RE(nof_prb, cp)); + }else { + bzero(in_buffer, sizeof(cf_t) * q->sf_sz); + } + + for (int slot = 0; slot < 2; slot++) { + srslte_dft_plan_free(&q->fft_plan_sf[slot]); + + if (dir == SRSLTE_DFT_FORWARD) { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + in_buffer + cp1 + q->slot_sz * slot, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz + cp2, symbol_sz)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } else { + if (srslte_dft_plan_guru_c(&q->fft_plan_sf[slot], symbol_sz, dir, + q->tmp + q->nof_symbols * q->symbol_sz * slot, + out_buffer + cp1 + q->slot_sz * slot, + 1, 1, SRSLTE_CP_NSYMB(cp), symbol_sz, symbol_sz + cp2)) { + fprintf(stderr, "Error: Creating DFT plan (1)\n"); + return -1; + } + } + } +#endif /* AVOID_GURU */ + if (q->freq_shift) { srslte_ofdm_set_freq_shift(q, q->freq_shift_f); @@ -118,6 +216,15 @@ int srslte_ofdm_replan_(srslte_ofdm_t *q, srslte_cp_t cp, int symbol_sz, int nof void srslte_ofdm_free_(srslte_ofdm_t *q) { srslte_dft_plan_free(&q->fft_plan); + +#ifndef AVOID_GURU + for (int slot = 0; slot < 2; slot++) { + if (q->fft_plan_sf[slot].init_size) { + srslte_dft_plan_free(&q->fft_plan_sf[slot]); + } + } +#endif + if (q->tmp) { free(q->tmp); } @@ -127,28 +234,28 @@ void srslte_ofdm_free_(srslte_ofdm_t *q) { bzero(q, sizeof(srslte_ofdm_t)); } -int srslte_ofdm_rx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { +int srslte_ofdm_rx_init(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t max_prb) { int symbol_sz = srslte_symbol_sz(max_prb); if (symbol_sz < 0) { fprintf(stderr, "Error: Invalid nof_prb=%d\n", max_prb); return -1; } q->max_prb = max_prb; - return srslte_ofdm_init_(q, cp, symbol_sz, max_prb, SRSLTE_DFT_FORWARD); + return srslte_ofdm_init_(q, cp, in_buffer, out_buffer, symbol_sz, max_prb, SRSLTE_DFT_FORWARD); } -int srslte_ofdm_rx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) +int srslte_ofdm_rx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t nof_prb) { int symbol_sz = srslte_symbol_sz(nof_prb); if (symbol_sz < 0) { fprintf(stderr, "Error: Invalid nof_prb=%d\n", nof_prb); return -1; } - return srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, SRSLTE_DFT_FORWARD, SRSLTE_SF_MBSFN); + return srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, SRSLTE_DFT_FORWARD, SRSLTE_SF_MBSFN); } -int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { +int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t max_prb) { uint32_t i; int ret; @@ -158,7 +265,7 @@ int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { return -1; } q->max_prb = max_prb; - ret = srslte_ofdm_init_(q, cp, symbol_sz, max_prb, SRSLTE_DFT_BACKWARD); + ret = srslte_ofdm_init_(q, cp, in_buffer, out_buffer, symbol_sz, max_prb, SRSLTE_DFT_BACKWARD); if (ret == SRSLTE_SUCCESS) { @@ -173,7 +280,7 @@ int srslte_ofdm_tx_init(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t max_prb) { return ret; } -int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) +int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, cf_t *in_buffer, cf_t *out_buffer, uint32_t nof_prb) { uint32_t i; int ret; @@ -184,7 +291,7 @@ int srslte_ofdm_tx_init_mbsfn(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb return -1; } - ret = srslte_ofdm_init_mbsfn_(q, cp, symbol_sz, nof_prb, SRSLTE_DFT_BACKWARD, SRSLTE_SF_MBSFN); + ret = srslte_ofdm_init_mbsfn_(q, cp, in_buffer, out_buffer, symbol_sz, nof_prb, SRSLTE_DFT_BACKWARD, SRSLTE_SF_MBSFN); if (ret == SRSLTE_SUCCESS) { srslte_dft_plan_set_norm(&q->fft_plan, false); @@ -207,7 +314,8 @@ int srslte_ofdm_rx_set_prb(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) { } return srslte_ofdm_replan_(q, cp, symbol_sz, nof_prb); } else { - fprintf(stderr, "OFDM: Error calling set_prb: nof_prb must be equal or lower initialized max_prb\n"); + fprintf(stderr, "OFDM (Rx): Error calling set_prb: nof_prb (%d) must be equal or lower initialized max_prb (%d)\n", + nof_prb, q->max_prb); return -1; } } @@ -234,7 +342,8 @@ int srslte_ofdm_tx_set_prb(srslte_ofdm_t *q, srslte_cp_t cp, uint32_t nof_prb) { } return ret; } else { - fprintf(stderr, "OFDM: Error calling set_prb: nof_prb must be equal or lower initialized max_prb\n"); + fprintf(stderr, "OFDM (Tx): Error calling set_prb: nof_prb (%d) must be equal or lower initialized max_prb (%d)\n", + nof_prb, q->max_prb); return -1; } } @@ -274,8 +383,12 @@ void srslte_ofdm_tx_free(srslte_ofdm_t *q) { /* Transforms input samples into output OFDM symbols. * Performs FFT on a each symbol and removes CP. */ -void srslte_ofdm_rx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { +void srslte_ofdm_rx_slot(srslte_ofdm_t *q, int slot_in_sf) { + cf_t *output = q->out_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + +#ifdef AVOID_GURU uint32_t i; + cf_t *input = q->in_buffer + slot_in_sf * q->slot_sz; for (i=0;inof_symbols;i++) { input += SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); srslte_dft_run_c(&q->fft_plan, input, q->tmp); @@ -283,6 +396,25 @@ void srslte_ofdm_rx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { input += q->symbol_sz; output += q->nof_re; } +#else + float norm = 1.0f/sqrtf(q->fft_plan.size); + cf_t *tmp = q->tmp + slot_in_sf * q->symbol_sz * q->nof_symbols; + uint32_t dc = (q->fft_plan.dc) ? 1:0; + + srslte_dft_run_guru_c(&q->fft_plan_sf[slot_in_sf]); + + for (int i = 0; i < q->nof_symbols; i++) { + memcpy(output, tmp + q->symbol_sz - q->nof_re / 2, sizeof(cf_t) * q->nof_re / 2); + memcpy(output + q->nof_re / 2, &tmp[dc], sizeof(cf_t) * q->nof_re / 2); + + if (q->fft_plan.norm) { + srslte_vec_sc_prod_cfc(output, norm, output, q->nof_re); + } + + tmp += q->symbol_sz; + output += q->nof_re; + } +#endif } void srslte_ofdm_rx_slot_mbsfn(srslte_ofdm_t *q, cf_t *input, cf_t *output) @@ -314,29 +446,32 @@ void srslte_ofdm_rx_slot_zerocopy(srslte_ofdm_t *q, cf_t *input, cf_t *output) { } } -void srslte_ofdm_rx_sf(srslte_ofdm_t *q, cf_t *input, cf_t *output) { - uint32_t n; +void srslte_ofdm_rx_sf(srslte_ofdm_t *q) { + uint32_t n; if (q->freq_shift) { - srslte_vec_prod_ccc(input, q->shift_buffer, input, 2*q->slot_sz); + srslte_vec_prod_ccc(q->in_buffer, q->shift_buffer, q->in_buffer, 2*q->slot_sz); } if(!q->mbsfn_subframe){ for (n=0;n<2;n++) { - srslte_ofdm_rx_slot(q, &input[n*q->slot_sz], &output[n*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot(q, n); } } else{ - srslte_ofdm_rx_slot_mbsfn(q, &input[0*q->slot_sz], &output[0*q->nof_re*q->nof_symbols]); - srslte_ofdm_rx_slot(q, &input[1*q->slot_sz], &output[1*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot_mbsfn(q, &q->in_buffer[0*q->slot_sz], &q->out_buffer[0*q->nof_re*q->nof_symbols]); + srslte_ofdm_rx_slot(q, 1); } } /* Transforms input OFDM symbols into output samples. * Performs FFT on a each symbol and adds CP. */ -void srslte_ofdm_tx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { - uint32_t i, cp_len; - for (i=0;inof_symbols;i++) { - cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); +void srslte_ofdm_tx_slot(srslte_ofdm_t *q, int slot_in_sf) { + cf_t *input = q->in_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + cf_t *output = q->out_buffer + slot_in_sf * q->slot_sz; + +#ifdef AVOID_GURU + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); memcpy(&q->tmp[q->nof_guards], input, q->nof_re * sizeof(cf_t)); srslte_dft_run_c(&q->fft_plan, q->tmp, &output[cp_len]); input += q->nof_re; @@ -344,6 +479,60 @@ void srslte_ofdm_tx_slot(srslte_ofdm_t *q, cf_t *input, cf_t *output) { memcpy(output, &output[q->symbol_sz], cp_len * sizeof(cf_t)); output += q->symbol_sz + cp_len; } +#else + float norm = 1.0f/sqrtf(q->symbol_sz); + cf_t *tmp = q->tmp + slot_in_sf * q->symbol_sz * q->nof_symbols; + + bzero(tmp, q->slot_sz); + uint32_t dc = (q->fft_plan.dc) ? 1:0; + + for (int i = 0; i < q->nof_symbols; i++) { + memcpy(&tmp[dc], &input[q->nof_re / 2], q->nof_re / 2 * sizeof(cf_t)); + memcpy(&tmp[q->symbol_sz - q->nof_re / 2], &input[0], q->nof_re / 2 * sizeof(cf_t)); + + input += q->nof_re; + tmp += q->symbol_sz; + } + + srslte_dft_run_guru_c(&q->fft_plan_sf[slot_in_sf]); + + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp) ? SRSLTE_CP_LEN_NORM(i, q->symbol_sz) : SRSLTE_CP_LEN_EXT(q->symbol_sz); + + if (q->fft_plan.norm) { + srslte_vec_sc_prod_cfc(&output[cp_len], norm, &output[cp_len], q->symbol_sz); + } + + /* add CP */ + memcpy(output, &output[q->symbol_sz], cp_len * sizeof(cf_t)); + output += q->symbol_sz + cp_len; + } +#endif + + /*input = q->in_buffer + slot_in_sf * q->nof_re * q->nof_symbols; + cf_t *output2 = srslte_vec_malloc(sizeof(cf_t) * q->slot_sz); + cf_t *o2 = output2; + bzero(q->tmp, sizeof(cf_t)*q->symbol_sz); + //bzero(output2, sizeof(cf_t)*q->slot_sz); + for (int i=0;inof_symbols;i++) { + int cp_len = SRSLTE_CP_ISNORM(q->cp)?SRSLTE_CP_LEN_NORM(i, q->symbol_sz):SRSLTE_CP_LEN_EXT(q->symbol_sz); + memcpy(&q->tmp[q->nof_guards], input, q->nof_re * sizeof(cf_t)); + srslte_dft_run_c(&q->fft_plan, q->tmp, &o2[cp_len]); + input += q->nof_re; + memcpy(o2, &o2[q->symbol_sz], cp_len * sizeof(cf_t)); + o2 += q->symbol_sz + cp_len; + } + cf_t *output1 = q->out_buffer + slot_in_sf * q->slot_sz;//srslte_vec_malloc(sizeof(cf_t) * q->slot_sz); + + for (int i = 0; i < q->slot_sz; i++) { + float error = cabsf(output1[i] - output2[i])/cabsf(output2[i]); + cf_t k = output1[i]/output2[i]; + if (error > 0.1) printf("%d/%05d error=%f output=%+f%+fi gold=%+f%+fi k=%+f%+fi\n", slot_in_sf, i, error, + __real__ output1[i], __imag__ output1[i], + __real__ output2[i], __imag__ output2[i], + __real__ k, __imag__ k); + } + free(output2);/**/ } void srslte_ofdm_tx_slot_mbsfn(srslte_ofdm_t *q, cf_t *input, cf_t *output) @@ -369,20 +558,20 @@ void srslte_ofdm_set_normalize(srslte_ofdm_t *q, bool normalize_enable) { srslte_dft_plan_set_norm(&q->fft_plan, normalize_enable); } -void srslte_ofdm_tx_sf(srslte_ofdm_t *q, cf_t *input, cf_t *output) +void srslte_ofdm_tx_sf(srslte_ofdm_t *q) { - uint32_t n; + uint32_t n; if(!q->mbsfn_subframe){ for (n=0;n<2;n++) { - srslte_ofdm_tx_slot(q, &input[n*q->nof_re*q->nof_symbols], &output[n*q->slot_sz]); + srslte_ofdm_tx_slot(q, n); } } else{ - srslte_ofdm_tx_slot_mbsfn(q, &input[0*q->nof_re*q->nof_symbols], &output[0*q->slot_sz]); - srslte_ofdm_tx_slot(q, &input[1*q->nof_re*q->nof_symbols], &output[1*q->slot_sz]); + srslte_ofdm_tx_slot_mbsfn(q, &q->in_buffer[0*q->nof_re*q->nof_symbols], &q->out_buffer[0*q->slot_sz]); + srslte_ofdm_tx_slot(q, 1); } if (q->freq_shift) { - srslte_vec_prod_ccc(output, q->shift_buffer, output, 2*q->slot_sz); + srslte_vec_prod_ccc(q->out_buffer, q->shift_buffer, q->out_buffer, 2*q->slot_sz); } } diff --git a/lib/src/phy/dft/test/ofdm_test.c b/lib/src/phy/dft/test/ofdm_test.c index 11aac7f4e..e77fcd39e 100644 --- a/lib/src/phy/dft/test/ofdm_test.c +++ b/lib/src/phy/dft/test/ofdm_test.c @@ -35,16 +35,28 @@ int nof_prb = -1; srslte_cp_t cp = SRSLTE_CP_NORM; +int nof_repetitions = 128; + +static double elapsed_us(struct timeval *ts_start, struct timeval *ts_end) { + if (ts_end->tv_usec > ts_start->tv_usec) { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec) * 1000000 + + (double) ts_end->tv_usec - (double) ts_start->tv_usec; + } else { + return ((double) ts_end->tv_sec - (double) ts_start->tv_sec - 1) * 1000000 + + ((double) ts_end->tv_usec + 1000000) - (double) ts_start->tv_usec; + } +} void usage(char *prog) { printf("Usage: %s\n", prog); printf("\t-n nof_prb [Default All]\n"); printf("\t-e extended cyclic prefix [Default Normal]\n"); + printf("\t-r nof_repetitions [Default %d]\n", nof_repetitions); } void parse_args(int argc, char **argv) { int opt; - while ((opt = getopt(argc, argv, "ne")) != -1) { + while ((opt = getopt(argc, argv, "ner")) != -1) { switch (opt) { case 'n': nof_prb = atoi(argv[optind]); @@ -52,6 +64,9 @@ void parse_args(int argc, char **argv) { case 'e': cp = SRSLTE_CP_EXT; break; + case 'r': + nof_repetitions = atoi(argv[optind]); + break; default: usage(argv[0]); exit(-1); @@ -61,6 +76,7 @@ void parse_args(int argc, char **argv) { int main(int argc, char **argv) { + struct timeval start, end; srslte_ofdm_t fft, ifft; cf_t *input, *outfft, *outifft; float mse; @@ -81,48 +97,65 @@ int main(int argc, char **argv) { printf("Running test for %d PRB, %d RE... ", n_prb, n_re);fflush(stdout); - input = malloc(sizeof(cf_t) * n_re); + input = srslte_vec_malloc(sizeof(cf_t) * n_re * 2); if (!input) { perror("malloc"); exit(-1); } - outfft = malloc(sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb))); + outfft = srslte_vec_malloc(sizeof(cf_t) * n_re * 2); if (!outfft) { perror("malloc"); exit(-1); } - outifft = malloc(sizeof(cf_t) * n_re); + outifft = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb)) * 2); if (!outifft) { perror("malloc"); exit(-1); } + bzero(outifft, sizeof(cf_t) * SRSLTE_SLOT_LEN(srslte_symbol_sz(n_prb)) * 2); - if (srslte_ofdm_rx_init(&fft, cp, n_prb)) { + if (srslte_ofdm_rx_init(&fft, cp, outifft, outfft, n_prb)) { fprintf(stderr, "Error initializing FFT\n"); exit(-1); } - srslte_dft_plan_set_norm(&fft.fft_plan, true); + srslte_ofdm_set_normalize(&fft, true); - if (srslte_ofdm_tx_init(&ifft, cp, n_prb)) { + if (srslte_ofdm_tx_init(&ifft, cp, input, outifft, n_prb)) { fprintf(stderr, "Error initializing iFFT\n"); exit(-1); } - srslte_dft_plan_set_norm(&ifft.fft_plan, true); + srslte_ofdm_set_normalize(&ifft, true); for (i=0;i 1.0f) printf("%04d. %+.1f%+.1fi Vs. %+.1f%+.1f %+.1f%+.1f (mse=%f)\n", i, __real__ input[i], __imag__ input[i], __real__ outifft[i], __imag__ outifft[i], __real__ outfft[i], __imag__ outfft[i], mse); } - printf("MSE=%f\n", mse); + /*for (i=0;i= 0.07) { printf("MSE too large\n"); diff --git a/lib/src/phy/enb/enb_dl.c b/lib/src/phy/enb/enb_dl.c index 2ba179399..54a63bc2a 100644 --- a/lib/src/phy/enb/enb_dl.c +++ b/lib/src/phy/enb/enb_dl.c @@ -41,7 +41,7 @@ #define SRSLTE_ENB_RF_AMP 0.1 -int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) +int srslte_enb_dl_init(srslte_enb_dl_t *q, cf_t *out_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -53,13 +53,26 @@ int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) q->cfi = 3; q->tx_amp = SRSLTE_ENB_RF_AMP; - - if (srslte_ofdm_tx_init(&q->ifft, SRSLTE_CP_NORM, max_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - goto clean_exit; + + for (int i=0;isf_symbols[i] = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->sf_symbols[i]) { + perror("malloc"); + goto clean_exit; + } + q->slot1_symbols[i] = &q->sf_symbols[i][SRSLTE_SLOT_LEN_RE(max_prb, SRSLTE_CP_NORM)]; } - srslte_ofdm_set_normalize(&q->ifft, true); + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + if (srslte_ofdm_tx_init(&q->ifft[i], SRSLTE_CP_NORM, q->sf_symbols[i], out_buffer[i], max_prb)) { + fprintf(stderr, "Error initiating FFT (%d)\n", i); + goto clean_exit; + } + } + + for (int i = 0; i < q->cell.nof_ports; i++) { + srslte_ofdm_set_normalize(&q->ifft[i], true); + } if (srslte_pbch_init(&q->pbch)) { fprintf(stderr, "Error creating PBCH object\n"); @@ -89,15 +102,6 @@ int srslte_enb_dl_init(srslte_enb_dl_t *q, uint32_t max_prb) goto clean_exit; } - for (int i=0;isf_symbols[i] = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->sf_symbols[i]) { - perror("malloc"); - goto clean_exit; - } - q->slot1_symbols[i] = &q->sf_symbols[i][SRSLTE_SLOT_LEN_RE(max_prb, SRSLTE_CP_NORM)]; - } - ret = SRSLTE_SUCCESS; } else { @@ -114,7 +118,9 @@ clean_exit: void srslte_enb_dl_free(srslte_enb_dl_t *q) { if (q) { - srslte_ofdm_tx_free(&q->ifft); + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + srslte_ofdm_tx_free(&q->ifft[i]); + } srslte_regs_free(&q->regs); srslte_pbch_free(&q->pbch); srslte_pcfich_free(&q->pcfich); @@ -152,9 +158,11 @@ int srslte_enb_dl_set_cell(srslte_enb_dl_t *q, srslte_cell_t cell) fprintf(stderr, "Error resizing REGs\n"); return SRSLTE_ERROR; } - if (srslte_ofdm_rx_set_prb(&q->ifft, q->cell.cp, q->cell.nof_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - return SRSLTE_ERROR; + for (int i = 0; i < q->cell.nof_ports; i++) { + if (srslte_ofdm_tx_set_prb(&q->ifft[i], q->cell.cp, q->cell.nof_prb)) { + fprintf(stderr, "Error re-planning iFFT (%d)\n", i); + return SRSLTE_ERROR; + } } if (srslte_pbch_set_cell(&q->pbch, q->cell)) { fprintf(stderr, "Error creating PBCH object\n"); @@ -264,14 +272,15 @@ void srslte_enb_dl_put_base(srslte_enb_dl_t *q, uint32_t tti) } -void srslte_enb_dl_gen_signal(srslte_enb_dl_t *q, cf_t *signal_buffer) +void srslte_enb_dl_gen_signal(srslte_enb_dl_t *q) { - - srslte_ofdm_tx_sf(&q->ifft, q->sf_symbols[0], signal_buffer); - // TODO: PAPR control float norm_factor = (float) sqrt(q->cell.nof_prb)/15; - srslte_vec_sc_prod_cfc(signal_buffer, q->tx_amp*norm_factor, signal_buffer, SRSLTE_SF_LEN_PRB(q->cell.nof_prb)); + + for (int i = 0; i < q->cell.nof_ports; i++) { + srslte_ofdm_tx_sf(&q->ifft[i]); + srslte_vec_sc_prod_cfc(q->ifft[i].out_buffer, q->tx_amp*norm_factor, q->ifft[i].out_buffer, (uint32_t) SRSLTE_SF_LEN_PRB(q->cell.nof_prb)); + } } int srslte_enb_dl_add_rnti(srslte_enb_dl_t *q, uint16_t rnti) diff --git a/lib/src/phy/enb/enb_ul.c b/lib/src/phy/enb/enb_ul.c index db05d44ea..f94eb0277 100644 --- a/lib/src/phy/enb/enb_ul.c +++ b/lib/src/phy/enb/enb_ul.c @@ -40,6 +40,7 @@ #define MAX_CANDIDATES 16 int srslte_enb_ul_init(srslte_enb_ul_t *q, + cf_t *in_buffer, uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -55,8 +56,20 @@ int srslte_enb_ul_init(srslte_enb_ul_t *q, perror("malloc"); goto clean_exit; } - - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + + q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->sf_symbols) { + perror("malloc"); + goto clean_exit; + } + + q->ce = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); + if (!q->ce) { + perror("malloc"); + goto clean_exit; + } + + if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, in_buffer, q->sf_symbols, max_prb)) { fprintf(stderr, "Error initiating FFT\n"); goto clean_exit; } @@ -80,18 +93,6 @@ int srslte_enb_ul_init(srslte_enb_ul_t *q, goto clean_exit; } - q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->sf_symbols) { - perror("malloc"); - goto clean_exit; - } - - q->ce = srslte_vec_malloc(SRSLTE_SF_LEN_RE(max_prb, SRSLTE_CP_NORM) * sizeof(cf_t)); - if (!q->ce) { - perror("malloc"); - goto clean_exit; - } - ret = SRSLTE_SUCCESS; } else { @@ -254,7 +255,7 @@ int srslte_enb_ul_cfg_ue(srslte_enb_ul_t *q, uint16_t rnti, void srslte_enb_ul_fft(srslte_enb_ul_t *q, cf_t *signal_buffer) { - srslte_ofdm_rx_sf(&q->fft, signal_buffer, q->sf_symbols); + srslte_ofdm_rx_sf(&q->fft); } int get_pucch(srslte_enb_ul_t *q, uint16_t rnti, diff --git a/lib/src/phy/phch/test/pbch_file_test.c b/lib/src/phy/phch/test/pbch_file_test.c index 734640d55..2ca12e4c9 100644 --- a/lib/src/phy/phch/test/pbch_file_test.c +++ b/lib/src/phy/phch/test/pbch_file_test.c @@ -140,7 +140,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -203,7 +203,7 @@ int main(int argc, char **argv) { if (nread > 0) { // process 1st subframe only - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); /* Get channel estimates for each port */ srslte_chest_dl_estimate(&chest, fft_buffer, ce, 0); diff --git a/lib/src/phy/phch/test/pcfich_file_test.c b/lib/src/phy/phch/test/pcfich_file_test.c index dfb8d72e3..e92d6c7ba 100644 --- a/lib/src/phy/phch/test/pcfich_file_test.c +++ b/lib/src/phy/phch/test/pcfich_file_test.c @@ -120,15 +120,15 @@ int base_init() { fmatlab = NULL; } - flen = SRSLTE_SF_LEN(srslte_symbol_sz(cell.nof_prb)); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); - input_buffer = malloc(flen * sizeof(cf_t)); + input_buffer = srslte_vec_malloc(flen * sizeof(cf_t)); if (!input_buffer) { perror("malloc"); exit(-1); } - fft_buffer = malloc(SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp) * sizeof(cf_t)); + fft_buffer = srslte_vec_malloc(SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp) * sizeof(cf_t)); if (!fft_buffer) { perror("malloc"); return -1; @@ -151,7 +151,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -215,7 +215,7 @@ int main(int argc, char **argv) { n = srslte_filesource_read(&fsrc, input_buffer, flen); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); if (fmatlab) { fprintf(fmatlab, "infft="); diff --git a/lib/src/phy/phch/test/pdcch_file_test.c b/lib/src/phy/phch/test/pdcch_file_test.c index d4ceed4b6..5482d9f98 100644 --- a/lib/src/phy/phch/test/pdcch_file_test.c +++ b/lib/src/phy/phch/test/pdcch_file_test.c @@ -126,7 +126,7 @@ int base_init() { exit(-1); } - flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz(cell.nof_prb))); + flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz_power2(cell.nof_prb))); input_buffer = malloc(flen * sizeof(cf_t)); if (!input_buffer) { @@ -157,7 +157,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -231,7 +231,7 @@ int main(int argc, char **argv) { INFO("Reading %d samples sub-frame %d\n", flen, frame_cnt); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); /* Get channel estimates for each port */ srslte_chest_dl_estimate(&chest, fft_buffer, ce, frame_cnt %10); diff --git a/lib/src/phy/phch/test/pdsch_pdcch_file_test.c b/lib/src/phy/phch/test/pdsch_pdcch_file_test.c index 90c0e1c17..0faf7eca1 100644 --- a/lib/src/phy/phch/test/pdsch_pdcch_file_test.c +++ b/lib/src/phy/phch/test/pdsch_pdcch_file_test.c @@ -129,7 +129,7 @@ int base_init() { exit(-1); } - flen = 2 * (SRSLTE_SLOT_LEN(srslte_symbol_sz(cell.nof_prb))); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); input_buffer[0] = malloc(flen * sizeof(cf_t)); if (!input_buffer[0]) { @@ -137,7 +137,7 @@ int base_init() { exit(-1); } - if (srslte_ue_dl_init(&ue_dl, cell.nof_prb, 1)) { + if (srslte_ue_dl_init(&ue_dl, input_buffer, cell.nof_prb, 1)) { fprintf(stderr, "Error initializing UE DL\n"); return -1; } diff --git a/lib/src/phy/phch/test/phich_file_test.c b/lib/src/phy/phch/test/phich_file_test.c index d7078f933..65f7ce9c0 100644 --- a/lib/src/phy/phch/test/phich_file_test.c +++ b/lib/src/phy/phch/test/phich_file_test.c @@ -144,7 +144,7 @@ int base_init() { fmatlab = NULL; } - flen = SRSLTE_SF_LEN(srslte_symbol_sz(cell.nof_prb)); + flen = SRSLTE_SF_LEN(srslte_symbol_sz_power2(cell.nof_prb)); input_buffer = malloc(flen * sizeof(cf_t)); if (!input_buffer) { @@ -175,7 +175,7 @@ int base_init() { return -1; } - if (srslte_ofdm_init_(&fft, cell.cp, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { + if (srslte_ofdm_init_(&fft, cell.cp, input_buffer, fft_buffer, srslte_symbol_sz_power2(cell.nof_prb), cell.nof_prb, SRSLTE_DFT_FORWARD)) { fprintf(stderr, "Error initializing FFT\n"); return -1; } @@ -242,7 +242,7 @@ int main(int argc, char **argv) { n = srslte_filesource_read(&fsrc, input_buffer, flen); - srslte_ofdm_rx_sf(&fft, input_buffer, fft_buffer); + srslte_ofdm_rx_sf(&fft); if (fmatlab) { fprintf(fmatlab, "infft="); diff --git a/lib/src/phy/phch/test/pmch_file_test.c b/lib/src/phy/phch/test/pmch_file_test.c index ac66072fa..6586b2ee9 100644 --- a/lib/src/phy/phch/test/pmch_file_test.c +++ b/lib/src/phy/phch/test/pmch_file_test.c @@ -140,7 +140,7 @@ int base_init() { exit(-1); } - if (srslte_ue_dl_init(&ue_dl, cell.nof_prb, 1)) { + if (srslte_ue_dl_init(&ue_dl, input_buffer, cell.nof_prb, 1)) { fprintf(stderr, "Error initializing UE DL\n"); return -1; } diff --git a/lib/src/phy/phch/test/pmch_test.c b/lib/src/phy/phch/test/pmch_test.c index a187ca2bb..a9c29ef64 100644 --- a/lib/src/phy/phch/test/pmch_test.c +++ b/lib/src/phy/phch/test/pmch_test.c @@ -139,7 +139,7 @@ cf_t *tx_slot_symbols[SRSLTE_MAX_PORTS]; cf_t *rx_slot_symbols[SRSLTE_MAX_PORTS]; srslte_pmch_t pmch_tx, pmch_rx; srslte_pdsch_cfg_t pmch_cfg; -srslte_ofdm_t ifft_mbsfn, fft_mbsfn; +srslte_ofdm_t ifft_mbsfn[SRSLTE_MAX_PORTS], fft_mbsfn[SRSLTE_MAX_PORTS]; int main(int argc, char **argv) { uint32_t i, j, k; @@ -169,10 +169,10 @@ int main(int argc, char **argv) { grant.tb_en[1] = false; grant.nof_tb = 1; grant.mcs[0].idx = mcs_idx; - + grant.nof_prb = cell.nof_prb; grant.sf_type = SRSLTE_SF_MBSFN; - + srslte_dl_fill_ra_mcs(&grant.mcs[0], cell.nof_prb); grant.Qm[0] = srslte_mod_bits_x_symbol(grant.mcs[0].mod); for(int i = 0; i < 2; i++){ @@ -181,41 +181,6 @@ int main(int argc, char **argv) { } } - - -#ifdef DO_OFDM - - if (srslte_ofdm_tx_init_mbsfn(&ifft_mbsfn, SRSLTE_CP_EXT, cell.nof_prb)) { - fprintf(stderr, "Error creating iFFT object\n"); - exit(-1); - } - if (srslte_ofdm_rx_init_mbsfn(&fft_mbsfn, SRSLTE_CP_EXT, cell.nof_prb)) { - fprintf(stderr, "Error creating iFFT object\n"); - exit(-1); - } - - srslte_ofdm_set_non_mbsfn_region(&ifft_mbsfn, non_mbsfn_region); - srslte_ofdm_set_non_mbsfn_region(&fft_mbsfn, non_mbsfn_region); - srslte_ofdm_set_normalize(&ifft_mbsfn, true); - srslte_ofdm_set_normalize(&fft_mbsfn, true); - - - for (i = 0; i < cell.nof_ports; i++) { - tx_sf_symbols[i] = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN_PRB(cell.nof_prb)); - } - - for (i = 0; i < nof_rx_antennas; i++) { - rx_sf_symbols[i] = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN_PRB(cell.nof_prb)); - } -#endif /* DO_OFDM */ - - /* Configure PDSCH */ - - if (srslte_pmch_cfg(&pmch_cfg, cell, &grant, cfi, subframe)) { - fprintf(stderr, "Error configuring PMCH\n"); - exit(-1); - } - /* init memory */ for (i=0;isample_offset = 0; q->nof_rx_antennas = nof_rx_antennas; - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { - fprintf(stderr, "Error initiating FFT\n"); - goto clean_exit; + for (int j = 0; j < SRSLTE_MAX_PORTS; j++) { + q->sf_symbols_m[j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); + if (!q->sf_symbols_m[j]) { + perror("malloc"); + goto clean_exit; + } + for (uint32_t i=0;ice_m[i][j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); + if (!q->ce_m[i][j]) { + perror("malloc"); + goto clean_exit; + } + bzero(q->ce_m[i][j], MAX_SFLEN_RE * sizeof(cf_t)); + } } - - if (srslte_ofdm_rx_init_mbsfn(&q->fft_mbsfn, SRSLTE_CP_EXT, max_prb)) { + + q->sf_symbols = q->sf_symbols_m[0]; + for (int i=0;ice[i] = q->ce_m[i][0]; + } + + for (int i = 0; i < nof_rx_antennas; i++) { + if (srslte_ofdm_rx_init(&q->fft[i], SRSLTE_CP_NORM, in_buffer[i], q->sf_symbols_m[i], max_prb)) { + fprintf(stderr, "Error initiating FFT\n"); + goto clean_exit; + } + } + + if (srslte_ofdm_rx_init_mbsfn(&q->fft_mbsfn, SRSLTE_CP_EXT, in_buffer[0], q->sf_symbols_m[0], max_prb)) { fprintf(stderr, "Error initiating FFT for MBSFN subframes \n"); goto clean_exit; } @@ -127,28 +151,7 @@ int srslte_ue_dl_init(srslte_ue_dl_t *q, fprintf(stderr, "Error initiating SFO correct\n"); goto clean_exit; } - srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft.symbol_sz); - - for (int j = 0; j < SRSLTE_MAX_PORTS; j++) { - q->sf_symbols_m[j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); - if (!q->sf_symbols_m[j]) { - perror("malloc"); - goto clean_exit; - } - for (uint32_t i=0;ice_m[i][j] = srslte_vec_malloc(MAX_SFLEN_RE * sizeof(cf_t)); - if (!q->ce_m[i][j]) { - perror("malloc"); - goto clean_exit; - } - bzero(q->ce_m[i][j], MAX_SFLEN_RE * sizeof(cf_t)); - } - } - - q->sf_symbols = q->sf_symbols_m[0]; - for (int i=0;ice[i] = q->ce_m[i][0]; - } + srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft[0].symbol_sz); ret = SRSLTE_SUCCESS; } else { @@ -164,7 +167,9 @@ clean_exit: void srslte_ue_dl_free(srslte_ue_dl_t *q) { if (q) { - srslte_ofdm_rx_free(&q->fft); + for (int port = 0; port < SRSLTE_MAX_PORTS; port++) { + srslte_ofdm_rx_free(&q->fft[port]); + } srslte_ofdm_rx_free(&q->fft_mbsfn); srslte_chest_dl_free(&q->chest); srslte_regs_free(&q->regs); @@ -219,10 +224,12 @@ int srslte_ue_dl_set_cell(srslte_ue_dl_t *q, srslte_cell_t cell) fprintf(stderr, "Error resizing SFO correct\n"); return SRSLTE_ERROR; } - srslte_cfo_set_tol(&q->sfo_correct, 1e-5/q->fft.symbol_sz); - if (srslte_ofdm_rx_set_prb(&q->fft, q->cell.cp, q->cell.nof_prb)) { - fprintf(stderr, "Error resizing FFT\n"); - return SRSLTE_ERROR; + srslte_cfo_set_tol(&q->sfo_correct, 1e-5f/q->fft[0].symbol_sz); + for (int port = 0; port < q->nof_rx_antennas; port++) { + if (srslte_ofdm_rx_set_prb(&q->fft[port], q->cell.cp, q->cell.nof_prb)) { + fprintf(stderr, "Error resizing FFT\n"); + return SRSLTE_ERROR; + } } if (srslte_chest_dl_set_cell(&q->chest, q->cell)) { fprintf(stderr, "Error resizing channel estimator\n"); @@ -339,9 +346,9 @@ int srslte_ue_dl_decode_fft_estimate_mbsfn(srslte_ue_dl_t *q, cf_t *input[SRSLTE /* Run FFT for all subframe data */ for (int j=0;jnof_rx_antennas;j++) { if(sf_type == SRSLTE_SF_MBSFN ) { - srslte_ofdm_rx_sf(&q->fft_mbsfn, input[j], q->sf_symbols_m[j]); + srslte_ofdm_rx_sf(&q->fft_mbsfn); }else{ - srslte_ofdm_rx_sf(&q->fft, input[j], q->sf_symbols_m[j]); + srslte_ofdm_rx_sf(&q->fft[j]); } /* Correct SFO multiplying by complex exponential in the time domain */ @@ -351,7 +358,7 @@ int srslte_ue_dl_decode_fft_estimate_mbsfn(srslte_ue_dl_t *q, cf_t *input[SRSLTE srslte_cfo_correct(&q->sfo_correct, &q->sf_symbols_m[j][i*q->cell.nof_prb*SRSLTE_NRE], &q->sf_symbols_m[j][i*q->cell.nof_prb*SRSLTE_NRE], - q->sample_offset / q->fft.symbol_sz); + q->sample_offset / q->fft[j].symbol_sz); } } } diff --git a/lib/src/phy/ue/ue_mib.c b/lib/src/phy/ue/ue_mib.c index 46a470ab8..c003a2ba3 100644 --- a/lib/src/phy/ue/ue_mib.c +++ b/lib/src/phy/ue/ue_mib.c @@ -35,7 +35,8 @@ #include "srslte/phy/utils/debug.h" #include "srslte/phy/utils/vector.h" -int srslte_ue_mib_init(srslte_ue_mib_t * q, +int srslte_ue_mib_init(srslte_ue_mib_t * q, + cf_t *in_buffer[SRSLTE_MAX_PORTS], uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -65,7 +66,7 @@ int srslte_ue_mib_init(srslte_ue_mib_t * q, } } - if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + if (srslte_ofdm_rx_init(&q->fft, SRSLTE_CP_NORM, in_buffer[0], q->sf_symbols, max_prb)) { fprintf(stderr, "Error initializing FFT\n"); goto clean_exit; } @@ -143,14 +144,14 @@ void srslte_ue_mib_reset(srslte_ue_mib_t * q) srslte_pbch_decode_reset(&q->pbch); } -int srslte_ue_mib_decode(srslte_ue_mib_t * q, cf_t *input, +int srslte_ue_mib_decode(srslte_ue_mib_t * q, uint8_t bch_payload[SRSLTE_BCH_PAYLOAD_LEN], uint32_t *nof_tx_ports, int *sfn_offset) { int ret = SRSLTE_SUCCESS; cf_t *ce_slot1[SRSLTE_MAX_PORTS]; /* Run FFT for the slot symbols */ - srslte_ofdm_rx_sf(&q->fft, input, q->sf_symbols); + srslte_ofdm_rx_sf(&q->fft); /* Get channel estimates of sf idx #0 for each port */ ret = srslte_chest_dl_estimate(&q->chest, q->sf_symbols, q->ce, 0); @@ -198,7 +199,7 @@ int srslte_ue_mib_sync_init_multi(srslte_ue_mib_sync_t *q, } q->nof_rx_antennas = nof_rx_antennas; - if (srslte_ue_mib_init(&q->ue_mib, SRSLTE_UE_MIB_NOF_PRB)) { + if (srslte_ue_mib_init(&q->ue_mib, q->sf_buffer, SRSLTE_UE_MIB_NOF_PRB)) { fprintf(stderr, "Error initiating ue_mib\n"); return SRSLTE_ERROR; } @@ -274,7 +275,7 @@ int srslte_ue_mib_sync_decode(srslte_ue_mib_sync_t * q, return -1; } else if (srslte_ue_sync_get_sfidx(&q->ue_sync) == 0) { if (ret == 1) { - mib_ret = srslte_ue_mib_decode(&q->ue_mib, q->sf_buffer[0], bch_payload, nof_tx_ports, sfn_offset); + mib_ret = srslte_ue_mib_decode(&q->ue_mib, bch_payload, nof_tx_ports, sfn_offset); } else { DEBUG("Resetting PBCH decoder after %d frames\n", q->ue_mib.frame_cnt); srslte_ue_mib_reset(&q->ue_mib); diff --git a/lib/src/phy/ue/ue_ul.c b/lib/src/phy/ue/ue_ul.c index 37dfecd93..853937f7c 100644 --- a/lib/src/phy/ue/ue_ul.c +++ b/lib/src/phy/ue/ue_ul.c @@ -41,6 +41,7 @@ #define DEFAULT_CFO_TOL 50.0 // Hz int srslte_ue_ul_init(srslte_ue_ul_t *q, + cf_t *out_buffer, uint32_t max_prb) { int ret = SRSLTE_ERROR_INVALID_INPUTS; @@ -50,8 +51,14 @@ int srslte_ue_ul_init(srslte_ue_ul_t *q, ret = SRSLTE_ERROR; bzero(q, sizeof(srslte_ue_ul_t)); - - if (srslte_ofdm_tx_init(&q->fft, SRSLTE_CP_NORM, max_prb)) { + + q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_PRB(max_prb) * sizeof(cf_t)); + if (!q->sf_symbols) { + perror("malloc"); + goto clean_exit; + } + + if (srslte_ofdm_tx_init(&q->fft, SRSLTE_CP_NORM, q->sf_symbols, out_buffer, max_prb)) { fprintf(stderr, "Error initiating FFT\n"); goto clean_exit; } @@ -83,11 +90,6 @@ int srslte_ue_ul_init(srslte_ue_ul_t *q, fprintf(stderr, "Error initiating srslte_refsignal_ul\n"); goto clean_exit; } - q->sf_symbols = srslte_vec_malloc(SRSLTE_SF_LEN_PRB(max_prb) * sizeof(cf_t)); - if (!q->sf_symbols) { - perror("malloc"); - goto clean_exit; - } q->refsignal = srslte_vec_malloc(2 * SRSLTE_NRE * max_prb * sizeof(cf_t)); if (!q->refsignal) { perror("malloc"); @@ -347,7 +349,7 @@ int srslte_ue_ul_pucch_encode(srslte_ue_ul_t *q, srslte_uci_data_t uci_data, q->last_pucch_format = format; - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); @@ -417,7 +419,7 @@ int srslte_ue_ul_srs_encode(srslte_ue_ul_t *q, uint32_t tti, cf_t *output_signal } } - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); @@ -486,7 +488,7 @@ int srslte_ue_ul_pusch_encode_rnti_softbuffer(srslte_ue_ul_t *q, } } - srslte_ofdm_tx_sf(&q->fft, q->sf_symbols, output_signal); + srslte_ofdm_tx_sf(&q->fft); if (q->cfo_en) { srslte_cfo_correct(&q->cfo, output_signal, output_signal, q->current_cfo / srslte_symbol_sz(q->cell.nof_prb)); diff --git a/srsenb/hdr/phy/phch_worker.h b/srsenb/hdr/phy/phch_worker.h index 906e8b9d0..48878a879 100644 --- a/srsenb/hdr/phy/phch_worker.h +++ b/srsenb/hdr/phy/phch_worker.h @@ -88,7 +88,7 @@ private: bool running; cf_t *signal_buffer_rx; - cf_t *signal_buffer_tx; + cf_t *signal_buffer_tx[SRSLTE_MAX_PORTS]; uint32_t tti_rx, tti_tx, tti_sched_ul, sf_rx, sf_tx, sf_sched_ul, tx_mutex_cnt; srslte_enb_dl_t enb_dl; diff --git a/srsenb/src/phy/phch_worker.cc b/srsenb/src/phy/phch_worker.cc index 3a1fb8ca4..8a6173347 100644 --- a/srsenb/src/phy/phch_worker.cc +++ b/srsenb/src/phy/phch_worker.cc @@ -93,12 +93,13 @@ void phch_worker::init(phch_common* phy_, srslte::log *log_h_) fprintf(stderr, "Error allocating memory\n"); return; } - signal_buffer_tx = (cf_t*) srslte_vec_malloc(2*SRSLTE_SF_LEN_PRB(phy->cell.nof_prb)*sizeof(cf_t)); - if (!signal_buffer_tx) { + bzero(&signal_buffer_tx, sizeof(cf_t *) * SRSLTE_MAX_PORTS); + signal_buffer_tx[0] = (cf_t*) srslte_vec_malloc(2*SRSLTE_SF_LEN_PRB(phy->cell.nof_prb)*sizeof(cf_t)); + if (!signal_buffer_tx[0]) { fprintf(stderr, "Error allocating memory\n"); return; } - if (srslte_enb_dl_init(&enb_dl, phy->cell.nof_prb)) { + if (srslte_enb_dl_init(&enb_dl, signal_buffer_tx, phy->cell.nof_prb)) { fprintf(stderr, "Error initiating ENB DL\n"); return; } @@ -106,7 +107,7 @@ void phch_worker::init(phch_common* phy_, srslte::log *log_h_) fprintf(stderr, "Error initiating ENB DL\n"); return; } - if (srslte_enb_ul_init(&enb_ul, phy->cell.nof_prb)) { + if (srslte_enb_ul_init(&enb_ul, signal_buffer_rx, phy->cell.nof_prb)) { fprintf(stderr, "Error initiating ENB UL\n"); return; } @@ -156,8 +157,10 @@ void phch_worker::stop() if (signal_buffer_rx) { free(signal_buffer_rx); } - if (signal_buffer_tx) { - free(signal_buffer_tx); + for (int i = 0; i < SRSLTE_MAX_PORTS; i++) { + if (signal_buffer_tx[i]) { + free(signal_buffer_tx[i]); + } } pthread_mutex_unlock(&mutex); pthread_mutex_destroy(&mutex); @@ -336,9 +339,9 @@ void phch_worker::work_imp() } // Generate signal and transmit - srslte_enb_dl_gen_signal(&enb_dl, signal_buffer_tx); + srslte_enb_dl_gen_signal(&enb_dl); Debug("Sending to radio\n"); - phy->worker_end(tx_mutex_cnt, signal_buffer_tx, SRSLTE_SF_LEN_PRB(phy->cell.nof_prb), tx_time); + phy->worker_end(tx_mutex_cnt, signal_buffer_tx[0], SRSLTE_SF_LEN_PRB(phy->cell.nof_prb), tx_time); #ifdef DEBUG_WRITE_FILE fwrite(signal_buffer_tx, SRSLTE_SF_LEN_PRB(phy->cell.nof_prb)*sizeof(cf_t), 1, f); diff --git a/srsue/src/phy/phch_recv.cc b/srsue/src/phy/phch_recv.cc index c14141257..041418b5a 100644 --- a/srsue/src/phy/phch_recv.cc +++ b/srsue/src/phy/phch_recv.cc @@ -101,13 +101,13 @@ void phch_recv:: init(srslte::radio_multi *_radio_handler, mac_interface_phy *_ if (do_agc) { srslte_ue_sync_start_agc(&cs.ue_sync, callback_set_rx_gain, last_gain); } - - if (srslte_ue_dl_init(&ue_dl_measure, SRSLTE_MAX_PRB, nof_rx_antennas)) { + + if (srslte_ue_dl_init(&ue_dl_measure, sf_buffer, SRSLTE_MAX_PRB, nof_rx_antennas)) { Error("SYNC: Initiating ue_dl_measure\n"); return; } - if (srslte_ue_mib_init(&ue_mib, SRSLTE_MAX_PRB)) { + if (srslte_ue_mib_init(&ue_mib, sf_buffer, SRSLTE_MAX_PRB)) { Error("SYNC: Initiating UE MIB decoder\n"); return; } @@ -374,7 +374,7 @@ int phch_recv::cell_sync_sfn(void) { int sfn_offset = 0; Info("SYNC: Trying to decode MIB... SNR=%.1f dB\n", 10*log10(srslte_chest_dl_get_snr(&ue_mib.chest))); - int n = srslte_ue_mib_decode(&ue_mib, sf_buffer[0], bch_payload, NULL, &sfn_offset); + int n = srslte_ue_mib_decode(&ue_mib, bch_payload, NULL, &sfn_offset); if (n < 0) { Error("SYNC: Error decoding MIB while synchronising SFN"); return -1; diff --git a/srsue/src/phy/phch_worker.cc b/srsue/src/phy/phch_worker.cc index c0fec2ba2..ee1011aea 100644 --- a/srsue/src/phy/phch_worker.cc +++ b/srsue/src/phy/phch_worker.cc @@ -118,12 +118,12 @@ bool phch_worker::init(uint32_t max_prb, srslte::log *log_h) } } - if (srslte_ue_dl_init(&ue_dl, max_prb, phy->args->nof_rx_ant)) { + if (srslte_ue_dl_init(&ue_dl, signal_buffer, max_prb, phy->args->nof_rx_ant)) { Error("Initiating UE DL\n"); return false; } - if (srslte_ue_ul_init(&ue_ul, max_prb)) { + if (srslte_ue_ul_init(&ue_ul, signal_buffer[0], max_prb)) { Error("Initiating UE UL\n"); return false; } From b0157cb4f870b330c6b2daa737c9335a43aaa17e Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 20 Oct 2017 10:40:59 -0400 Subject: [PATCH 52/55] Removed error when can't save fft wisdom (prints in mkl) --- lib/src/phy/dft/dft_fftw.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/src/phy/dft/dft_fftw.c b/lib/src/phy/dft/dft_fftw.c index a06dd397f..9d6898117 100644 --- a/lib/src/phy/dft/dft_fftw.c +++ b/lib/src/phy/dft/dft_fftw.c @@ -56,9 +56,7 @@ void srslte_dft_load() { void srslte_dft_exit() { #ifdef FFTW_WISDOM_FILE - if (!fftwf_export_wisdom_to_filename(FFTW_WISDOM_FILE)) { - fprintf(stderr, "Error saving FFTW wisdom to file %s\n", FFTW_WISDOM_FILE); - } + fftwf_export_wisdom_to_filename(FFTW_WISDOM_FILE); #endif } From c1b296eb2cb516329b6f66a48549c81cfff02ff0 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Fri, 20 Oct 2017 18:17:07 +0200 Subject: [PATCH 53/55] SSE optimization for srslte_bit_interleave_w_offset --- lib/src/phy/utils/bit.c | 126 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/lib/src/phy/utils/bit.c b/lib/src/phy/utils/bit.c index 9ef53c35a..b1ae383a6 100644 --- a/lib/src/phy/utils/bit.c +++ b/lib/src/phy/utils/bit.c @@ -31,6 +31,12 @@ #include #include +#ifdef LV_HAVE_SSE + +#include + +#endif /* LV_HAVE_SSE */ + #include "srslte/phy/utils/bit.h" void srslte_bit_interleave(uint8_t *input, uint8_t *output, uint16_t *interleaver, uint32_t nof_bits) { @@ -53,6 +59,125 @@ void srslte_bit_interleave_w_offset(uint8_t *input, uint8_t *output, uint16_t *i } w_offset_p=8-w_offset; } +#ifdef LV_HAVE_SSE + __m64 m64mask = _mm_setr_pi8((uint8_t) 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1); + __m128i m128mask = _mm_set1_epi64(m64mask); + + union { + uint8_t v[8]; + __m64 m64; + } a, b, c; + + union { + __m128i m128; + uint16_t u16[8]; + uint8_t u8[16]; + struct { + __m64 reg_a; + __m64 reg_b; + } m64; + struct { + uint16_t i0, i1, i2, i3, i4, i5, i6, i7; + } v; + } ipx, epx, ipx2, epx2, b128, a128, c128; + + uint32_t i = st; + for (; i < (nof_bits / 8 - 1); i += 2) { + ipx.m128 = _mm_loadu_si128((__m128i *) (interleaver + (i * 8) - w_offset_p)); + epx.m128 = _mm_shuffle_epi8(ipx.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + ipx2.m128 = _mm_loadu_si128((__m128i *) (interleaver + ((i + 1) * 8) - w_offset_p)); + epx2.m128 = _mm_shuffle_epi8(ipx2.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + + epx.m64.reg_b = epx2.m64.reg_a; + + b128.m128 = _mm_and_si128(epx.m128, _mm_set1_epi8(0x7)); + b128.m128 = _mm_shuffle_epi8(m128mask, b128.m128); + + ipx.m128 = _mm_srli_epi16(ipx.m128, 3); + ipx2.m128 = _mm_srli_epi16(ipx2.m128, 3); + + a128.m128 = _mm_set_epi8(input[ipx2.v.i0], + input[ipx2.v.i1], + input[ipx2.v.i2], + input[ipx2.v.i3], + input[ipx2.v.i4], + input[ipx2.v.i5], + input[ipx2.v.i6], + input[ipx2.v.i7], + input[ipx.v.i0], + input[ipx.v.i1], + input[ipx.v.i2], + input[ipx.v.i3], + input[ipx.v.i4], + input[ipx.v.i5], + input[ipx.v.i6], + input[ipx.v.i7]); + + c128.m128 = _mm_cmpeq_epi8(_mm_and_si128(a128.m128, b128.m128), b128.m128); + uint16_t o = (uint16_t) _mm_movemask_epi8(c128.m128); + *((uint16_t *) (output + i)) = o; + } + + for (; i < nof_bits / 8; i++) { + ipx.m128 = _mm_loadu_si128((__m128i *) (interleaver + i * 8 - w_offset_p)); + epx.m128 = _mm_shuffle_epi8(ipx.m128, _mm_set_epi8(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E)); + b.m64 = _mm_and_si64(epx.m64.reg_a, _mm_set1_pi8(0x7)); + b.m64 = _mm_shuffle_pi8(m64mask, b.m64); + + ipx.m128 = _mm_srli_epi16(ipx.m128, 3); + + a.m64 = _mm_set_pi8(input[ipx.v.i0], + input[ipx.v.i1], + input[ipx.v.i2], + input[ipx.v.i3], + input[ipx.v.i4], + input[ipx.v.i5], + input[ipx.v.i6], + input[ipx.v.i7]); + + c.m64 = _mm_cmpeq_pi8(_mm_and_si64(a.m64, b.m64), b.m64); + output[i] = (uint8_t) _mm_movemask_pi8(c.m64); + } + +#if 0 + /* THIS PIECE OF CODE IS FOR CHECKING SIMD BEHAVIOUR. DO NOT ENABLE. */ + uint8_t *output2 = malloc(nof_bits/8); + for (i=st;i Date: Fri, 20 Oct 2017 12:34:34 -0400 Subject: [PATCH 54/55] Added option to ue.conf to configure CFO EMA (set default to 0.4) --- lib/include/srslte/interfaces/ue_interfaces.h | 3 ++- lib/include/srslte/phy/ue/ue_sync.h | 5 ++++- lib/src/phy/ue/ue_sync.c | 5 +++++ srsue/src/main.cc | 5 +++++ srsue/src/phy/phch_recv.cc | 1 + srsue/ue.conf.example | 9 ++++++++- 6 files changed, 25 insertions(+), 3 deletions(-) diff --git a/lib/include/srslte/interfaces/ue_interfaces.h b/lib/include/srslte/interfaces/ue_interfaces.h index 8561ba55c..88dfd285c 100644 --- a/lib/include/srslte/interfaces/ue_interfaces.h +++ b/lib/include/srslte/interfaces/ue_interfaces.h @@ -445,7 +445,8 @@ typedef struct { float snr_ema_coeff; std::string snr_estim_alg; bool cfo_integer_enabled; - float cfo_correct_tol_hz; + float cfo_correct_tol_hz; + float cfo_ema; int time_correct_period; bool sfo_correct_disable; std::string sss_algorithm; diff --git a/lib/include/srslte/phy/ue/ue_sync.h b/lib/include/srslte/phy/ue/ue_sync.h index bd280acd0..e5877dd23 100644 --- a/lib/include/srslte/phy/ue/ue_sync.h +++ b/lib/include/srslte/phy/ue/ue_sync.h @@ -185,7 +185,10 @@ SRSLTE_API void srslte_ue_sync_set_cfo_tol(srslte_ue_sync_t *q, float tol); SRSLTE_API void srslte_ue_sync_set_cfo(srslte_ue_sync_t *q, - float cfo); + float cfo); + +SRSLTE_API void srslte_ue_sync_set_cfo_ema(srslte_ue_sync_t *q, + float ema); SRSLTE_API void srslte_ue_sync_cfo_i_detec_en(srslte_ue_sync_t *q, bool enable); diff --git a/lib/src/phy/ue/ue_sync.c b/lib/src/phy/ue/ue_sync.c index f121c5bac..e9c02ca8b 100644 --- a/lib/src/phy/ue/ue_sync.c +++ b/lib/src/phy/ue/ue_sync.c @@ -359,6 +359,11 @@ uint32_t srslte_ue_sync_peak_idx(srslte_ue_sync_t *q) { return q->peak_idx; } +void srslte_ue_sync_set_cfo_ema(srslte_ue_sync_t *q, float ema) { + srslte_sync_set_cfo_ema_alpha(&q->sfind, ema); + srslte_sync_set_cfo_ema_alpha(&q->strack, ema); +} + srslte_ue_sync_state_t srslte_ue_sync_get_state(srslte_ue_sync_t *q) { return q->state; } diff --git a/srsue/src/main.cc b/srsue/src/main.cc index 1b146a9f3..52d4b13c9 100644 --- a/srsue/src/main.cc +++ b/srsue/src/main.cc @@ -202,6 +202,11 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { bpo::value(&args->expert.phy.cfo_correct_tol_hz)->default_value(50.0), "Tolerance (in Hz) for digial CFO compensation.") + ("expert.cfo_ema", + bpo::value(&args->expert.phy.cfo_ema)->default_value(0.4), + "CFO Exponential Moving Average coefficient. Lower makes it more robust to noise " + "but vulnerable to periodic interruptions due to VCO corrections.") + ("expert.time_correct_period", bpo::value(&args->expert.phy.time_correct_period)->default_value(5), "Period for sampling time offset correction.") diff --git a/srsue/src/phy/phch_recv.cc b/srsue/src/phy/phch_recv.cc index cf8bd6786..8a69a7f76 100644 --- a/srsue/src/phy/phch_recv.cc +++ b/srsue/src/phy/phch_recv.cc @@ -209,6 +209,7 @@ void phch_recv::set_ue_sync_opts(srslte_ue_sync_t *q) { srslte_ue_sync_cfo_i_detec_en(q, true); } + srslte_ue_sync_set_cfo_ema(q, worker_com->args->cfo_ema); srslte_ue_sync_set_cfo_tol(q, worker_com->args->cfo_correct_tol_hz); int time_correct_period = worker_com->args->time_correct_period; diff --git a/srsue/ue.conf.example b/srsue/ue.conf.example index d0dc67952..5b3559ae9 100644 --- a/srsue/ue.conf.example +++ b/srsue/ue.conf.example @@ -98,7 +98,10 @@ enable = false ##################################################################### # Expert configuration options # +# ue_category: Sets UE category (range 1-5). Default: 4 # ip_netmask: Netmask of the tun_srsue device. Default: 255.255.255.0 +# rssi_sensor_enabled: Enable or disable RF frontend RSSI sensor. Required for RSRP metrics but +# can cause UHD instability for long-duration testing. Default true. # ue_category: Sets UE category (range 1-5). Default: 4 # # prach_gain: PRACH gain (dB). If defined, forces a gain for the tranmsission of PRACH only., @@ -116,7 +119,9 @@ enable = false # nof_phy_threads: Selects the number of PHY threads (maximum 4, minimum 1, default 2) # equalizer_mode: Selects equalizer mode. Valid modes are: "mmse", "zf" or any # non-negative real number to indicate a regularized zf coefficient. -# Default is MMSE. +# Default is MMSE. +# cfo_ema: CFO Exponential Moving Average coefficient. Lower makes it more robust to noise +# but vulnerable to periodic interruptions due to VCO corrections. # cfo_integer_enabled: Enables integer CFO estimation and correction. This needs improvement # and may lead to incorrect synchronization. Use with caution. # cfo_correct_tol_hz: Tolerance (in Hz) for digial CFO compensation. Lower tolerance means that @@ -140,6 +145,7 @@ enable = false ##################################################################### [expert] #ip_netmask = 255.255.255.0 +#rssi_sensor_enabled = false #ue_category = 4 #prach_gain = 30 #cqi_max = 15 @@ -150,6 +156,7 @@ enable = false #attach_enable_64qam = false #nof_phy_threads = 2 #equalizer_mode = mmse +#cfo_ema = 0.4 #cfo_integer_enabled = false #cfo_correct_tol_hz = 50 #time_correct_period = 5 From 3cbe526cbc7fbaad9cb1febfcba4fa2c47783671 Mon Sep 17 00:00:00 2001 From: Ismael Gomez Date: Fri, 20 Oct 2017 12:43:04 -0400 Subject: [PATCH 55/55] Revert "Disable RSSI sensor by default" This reverts commit c14393b24f874b4e12d3bbfb883133fedda4f6f1. --- srsue/src/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/srsue/src/main.cc b/srsue/src/main.cc index 52d4b13c9..b933b66f3 100644 --- a/srsue/src/main.cc +++ b/srsue/src/main.cc @@ -155,7 +155,7 @@ void parse_args(all_args_t *args, int argc, char *argv[]) { "Pregenerate uplink signals after attach. Improves CPU performance.") ("expert.rssi_sensor_enabled", - bpo::value(&args->expert.phy.rssi_sensor_enabled)->default_value(false), + bpo::value(&args->expert.phy.rssi_sensor_enabled)->default_value(true), "Enable or disable RF frontend RSSI sensor. In some USRP devices can cause segmentation fault") ("expert.prach_gain",