Merge branch 'next_novolk' into mobility

This commit is contained in:
Ismael Gomez 2017-10-10 14:37:13 +02:00
commit 77c6322db2
7 changed files with 518 additions and 278 deletions

View File

@ -61,7 +61,7 @@ set(CMAKE_BUILD_TYPE ${CMAKE_BUILD_TYPE} CACHE STRING "")
option(ENABLE_SRSUE "Build srsUE application" ON)
option(ENABLE_SRSENB "Build srsENB application" ON)
option(ENABLE_VOLK "Enable use of VOLK SIMD library" ON)
option(ENABLE_VOLK "Enable use of VOLK SIMD library" OFF)
option(ENABLE_GUI "Enable GUI (using srsGUI)" ON)
option(ENABLE_BLADERF "Enable BladeRF" ON)

View File

@ -4,10 +4,10 @@
include(CheckCSourceRuns)
option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON)
option(ENABLE_AVX "Enable compile-time AVX support." ON)
option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON)
option(ENABLE_FMA "Enable compile-time FMA support." ON)
option(ENABLE_SSE "Enable compile-time SSE4.1 support." ON)
option(ENABLE_AVX "Enable compile-time AVX support." ON)
option(ENABLE_AVX2 "Enable compile-time AVX2 support." ON)
option(ENABLE_FMA "Enable compile-time FMA support." ON)
if (ENABLE_SSE)
#

View File

@ -1,161 +0,0 @@
INCLUDE(FindPkgConfig)
PKG_CHECK_MODULES(PC_VOLK volk QUIET)
FIND_PATH(
VOLK_INCLUDE_DIRS
NAMES volk/volk.h
HINTS $ENV{VOLK_DIR}/include
${CMAKE_INSTALL_PREFIX}/include
${PC_VOLK_INCLUDE_DIR}
PATHS /usr/local/include
/usr/include
)
FIND_LIBRARY(
VOLK_LIBRARIES
NAMES volk
HINTS $ENV{VOLK_DIR}/lib
${CMAKE_INSTALL_PREFIX}/lib
${CMAKE_INSTALL_PREFIX}/lib64
${PC_VOLK_LIBDIR}
PATHS /usr/local/lib
/usr/local/lib64
/usr/lib
/usr/lib64
)
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS)
IF(VOLK_FOUND)
SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m)
CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION_16)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_32u HAVE_VOLK_MAX_FUNCTION_32)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32fc_multiply_32fc HAVE_VOLK_MULT_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_conjugate_32fc HAVE_VOLK_CONJ_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_32fc HAVE_VOLK_MULT2_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_conjugate_32fc HAVE_VOLK_MULT2_CONJ_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_multiply_32fc HAVE_VOLK_MULT_REAL_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_multiply_32f HAVE_VOLK_MULT_FLOAT_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_32f HAVE_VOLK_MAG_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_squared_32f HAVE_VOLK_MAG_SQUARE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_divide_32f HAVE_VOLK_DIVIDE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_dot_prod_32fc HAVE_VOLK_DOTPROD_FC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_dot_prod_32fc HAVE_VOLK_DOTPROD_CFC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_conjugate_dot_prod_32fc HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_dot_prod_32f HAVE_VOLK_DOTPROD_F_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32f_atan2_32f HAVE_VOLK_ATAN_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_convert_16i HAVE_VOLK_CONVERT_FI_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_32f_x2 HAVE_VOLK_DEINTERLEAVE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_interleave_32fc HAVE_VOLK_INTERLEAVE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_add_32f HAVE_VOLK_ADD_FLOAT_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION_16)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_32u HAVE_VOLK_MAX_ABS_FUNCTION_32)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_multiply_32f HAVE_VOLK_MULT_REAL2_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_16i_max_star_16i HAVE_VOLK_MAX_STAR_S_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_8i_convert_16i HAVE_VOLK_CONVERT_CI_FUNCTION)
SET(VOLK_DEFINITIONS "HAVE_VOLK")
IF(${HAVE_VOLK_CONVERT_IF_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_IF_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT_REAL2_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL2_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_CONVERT_CI_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_CI_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAX_STAR_S_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_STAR_S_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAX_ABS_FUNCTION_16})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION_16")
ENDIF()
IF(${HAVE_VOLK_MAX_ABS_FUNCTION_32})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION_32")
ENDIF()
IF(${HAVE_VOLK_MAX_VEC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_VEC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAG_SQUARE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_SQUARE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_SQUARE_DIST_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SQUARE_DIST_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_INTERLEAVE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_INTERLEAVE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_SUB_FLOAT_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SUB_FLOAT_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_ADD_FLOAT_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ADD_FLOAT_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT2_CONJ_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_CONJ_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_CONVERT_FI_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_FI_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAX_FUNCTION_16})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION_16")
ENDIF()
IF(${HAVE_VOLK_MAX_FUNCTION_32})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION_32")
ENDIF()
IF(${HAVE_VOLK_ACC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ACC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_CONJ_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONJ_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT2_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT_FLOAT_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FLOAT_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT_REAL_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAG_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DIVIDE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DIVIDE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DOTPROD_FC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_FC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DOTPROD_CFC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CFC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DOTPROD_F_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_F_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_ATAN_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ATAN_FUNCTION")
ENDIF()
ENDIF(VOLK_FOUND)

View File

@ -47,20 +47,54 @@ SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t le
SRSLTE_API void srslte_vec_sub_sss_avx2(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_fff_sse(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_fff_avx(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_fff_sse(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_fff_avx(float *x, float *y, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_fff_sse(float *x, float h, float *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_ccc_sse(cf_t *x, cf_t h, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_avx2(short *x, short *y, short *z, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_sse(cf_t *x, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_conj_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_ccc_sse(cf_t *x, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_avx2(short *x, int k, short *z, uint32_t len);
SRSLTE_API void srslte_vec_abs_square_cf_sse(cf_t *x, float *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len);
SRSLTE_API void srslte_vec_mult_scalar_cf_f_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_cfc_avx(const cf_t *x,const float h,cf_t *y,const uint32_t len);
#ifdef __cplusplus
}
#endif

View File

@ -741,4 +741,3 @@ int srslte_rm_turbo_rx(float *w_buff, uint32_t w_buff_len, float *input, uint32_
return 0;
}

View File

@ -65,18 +65,19 @@ int srslte_vec_acc_ii(int *x, uint32_t len) {
return z;
}
// Used in PRACH detector, AGC and chest_dl for noise averaging
float srslte_vec_acc_ff(float *x, uint32_t len) {
#ifdef HAVE_VOLK_ACC_FUNCTION
float result;
volk_32f_accumulator_s32f(&result,x,len);
return result;
#else
int i;
float z=0;
for (i=0;i<len;i++) {
z+=x[i];
}
return z;
int i;
float z=0;
for (i=0;i<len;i++) {
z+=x[i];
}
return z;
#endif
}
@ -96,27 +97,27 @@ cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len) {
}
void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints) {
#ifndef HAVE_VOLK_SQUARE_DIST_FUNCTION
uint32_t i;
cf_t diff;
for (i=0;i<npoints;i++) {
diff = symbol - points[i];
distance[i] = crealf(diff) * crealf(diff) + cimagf(diff) * cimagf(diff);
}
#else
volk_32fc_x2_square_dist_32f(distance,&symbol,points,npoints);
#endif
}
void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
#ifndef HAVE_VOLK_SUB_FLOAT_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]-y[i];
}
#else
volk_32f_x2_subtract_32f(z,x,y,len);
#endif
#ifdef LV_HAVE_AVX
srslte_vec_sub_fff_avx(x, y, z, len);
#else
srslte_vec_sub_fff_sse(x, y, z, len);
#endif
#endif
}
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
@ -134,18 +135,24 @@ void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
#endif
}
// Noise estimation in chest_dl, interpolation
void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) {
return srslte_vec_sub_fff((float*) x,(float*) y,(float*) z, 2*len);
}
// Used in PSS/SSS and sum_ccc
void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
#ifndef HAVE_VOLK_ADD_FLOAT_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]+y[i];
}
#else
volk_32f_x2_add_32f(z,x,y,len);
#ifdef LV_HAVE_AVX
srslte_vec_sum_fff_avx(x, y, z, len);
#else
srslte_vec_sum_fff_sse(x, y, z, len);
#endif
#endif
}
@ -202,15 +209,15 @@ void srslte_vec_sc_add_sss(int16_t *x, int16_t h, int16_t *z, uint32_t len) {
z[i] = x[i]+ h;
}
}
// PSS, PBCH, DEMOD, FFTW, etc.
void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_FLOAT_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
#else
volk_32f_s32f_multiply_32f(z,x,h,len);
srslte_vec_sc_prod_fff_sse(x, h, z, len);
#endif
}
@ -248,7 +255,8 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) {
srslte_vec_sc_prod_cfc(x, amplitude/max, y, len);
}
void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
// Used throughout
void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_AVX
srslte_vec_sc_prod_cfc_avx(x,h,z,len);
#else
@ -256,42 +264,36 @@ void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
#endif
}
// Chest UL
void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
#else
volk_32fc_s32fc_multiply_32fc(z,x,h,len);
srslte_vec_sc_prod_ccc_sse(x,h,z,len);
#endif
}
// Used in turbo decoder
void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len) {
#ifndef HAVE_VOLK_CONVERT_IF_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = ((float) x[i])/scale;
}
#else
volk_16i_s32f_convert_32f(z,x,scale,len);
#endif
}
void srslte_vec_convert_ci(int8_t *x, int16_t *z, uint32_t len) {
#ifndef HAVE_VOLK_CONVERT_CI_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = ((int16_t) x[i]);
}
#else
volk_8i_convert_16i(z,x,len);
#endif
}
void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
@ -329,7 +331,7 @@ void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) {
for (i=0;i<len;i++) {
x[i] = real[i] + _Complex_I*imag[i];
}
#endif
#endif
}
void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len) {
@ -341,18 +343,14 @@ void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len)
real[i] = __real__ x[i];
imag[i] = __imag__ x[i];
}
#endif
#endif
}
void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
#ifdef HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION
volk_32fc_deinterleave_real_32f(real, x, len);
#else
int i;
for (i=0;i<len;i++) {
real[i] = __real__ x[i];
}
#endif
}
/* Note: We align memory to 32 bytes (for AVX2 compatibility)
@ -370,7 +368,7 @@ void *srslte_vec_malloc(uint32_t size) {
}
void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_size) {
#ifndef HAVE_VOLK
#ifndef LV_HAVE_SSE
return realloc(ptr, new_size);
#else
void *new_ptr;
@ -495,40 +493,31 @@ void srslte_vec_load_file(char *filename, void *buffer, uint32_t len) {
}
}
// Used in PSS
void srslte_vec_conj_cc(cf_t *x, cf_t *y, uint32_t len) {
#ifndef HAVE_VOLK_CONJ_FUNCTION
int i;
for (i=0;i<len;i++) {
y[i] = conjf(x[i]);
}
#else
volk_32fc_conjugate_32fc(y,x,len);
#endif
}
// Used in scrambling complex
void srslte_vec_prod_cfc(cf_t *x, float *y, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_REAL_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
volk_32fc_32f_multiply_32fc(z,x,y,len);
#endif
}
// Used in scrambling float
void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_REAL2_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
volk_32f_x2_multiply_32f(z,x,y,len);
#endif
}
// Scrambling Short
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifdef LV_HAVE_AVX2
srslte_vec_prod_sss_avx2(x,y,z,len);
@ -544,31 +533,33 @@ void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#endif
}
// CFO and OFDM processing
void srslte_vec_prod_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT2_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
volk_32fc_x2_multiply_32fc(z,x,y,len);
srslte_vec_prod_ccc_sse(x,y,z,len);
#endif
}
// PRACH, CHEST UL, etc.
void srslte_vec_prod_conj_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT2_CONJ_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*conjf(y[i]);
}
#else
volk_32fc_x2_multiply_conjugate_32fc(z,x,y,len);
srslte_vec_prod_conj_ccc_sse(x,y,z,len);
#endif
}
#define DIV_USE_VEC
//#define DIV_USE_VEC
// Used in SSS
/* Complex division is conjugate multiplication + real division */
void srslte_vec_div_ccc(cf_t *x, cf_t *y, float *y_mod, cf_t *z, float *z_real, float *z_imag, uint32_t len) {
#ifdef DIV_USE_VEC
@ -599,75 +590,59 @@ void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, float *z_real, float *z_imag
}
void srslte_vec_div_fff(float *x, float *y, float *z, uint32_t len) {
#ifdef HAVE_VOLK_DIVIDE_FUNCTION
volk_32f_x2_divide_32f(z, x, y, len);
#else
int i;
for (i=0;i<len;i++) {
z[i] = x[i] / y[i];
}
#endif
}
// PSS. convolution
cf_t srslte_vec_dot_prod_ccc(cf_t *x, cf_t *y, uint32_t len) {
#ifdef HAVE_VOLK_DOTPROD_FC_FUNCTION
cf_t res;
volk_32fc_x2_dot_prod_32fc(&res, x, y, len);
return res;
#else
#ifndef LV_HAVE_SSE
uint32_t i;
cf_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*y[i];
}
return res;
#else
return srslte_vec_dot_prod_ccc_sse(x, y, len);
#endif
}
// Convolution filter and in SSS search
cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len) {
#ifdef HAVE_VOLK_DOTPROD_CFC_FUNCTION
cf_t res;
volk_32fc_32f_dot_prod_32fc(&res, x, y, len);
return res;
#else
uint32_t i;
cf_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*y[i];
}
return res;
#endif
}
// SYNC
cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) {
#ifdef HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION
cf_t res;
volk_32fc_x2_conjugate_dot_prod_32fc(&res, x, y, len);
return res;
#else
#ifndef LV_HAVE_SSE
uint32_t i;
cf_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*conjf(y[i]);
}
return res;
#else
return srslte_vec_dot_prod_conj_ccc_sse(x, y, len);
#endif
}
// PHICH
float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) {
#ifdef HAVE_VOLK_DOTPROD_F_FUNCTION
float res;
volk_32f_x2_dot_prod_32f(&res, x, y, len);
return res;
#else
uint32_t i;
float res = 0;
for (i=0;i<len;i++) {
res += x[i]*y[i];
}
return res;
#endif
}
int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) {
@ -700,39 +675,32 @@ float srslte_vec_corr_ccc(cf_t *x, cf_t *y, uint32_t len) {
return cov/(sqrt(s_x*s_y));
}
// PSS (disabled and using abs_square )
void srslte_vec_abs_cf(cf_t *x, float *abs, uint32_t len) {
#ifndef HAVE_VOLK_MAG_FUNCTION
int i;
for (i=0;i<len;i++) {
abs[i] = cabsf(x[i]);
}
#else
volk_32fc_magnitude_32f(abs,x,len);
#endif
}
// PRACH
void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t len) {
#ifndef HAVE_VOLK_MAG_SQUARE_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
abs_square[i] = crealf(x[i])*crealf(x[i])+cimagf(x[i])*cimagf(x[i]);
}
#else
volk_32fc_magnitude_squared_32f(abs_square,x,len);
srslte_vec_abs_square_cf_sse(x,abs_square,len);
#endif
}
void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len) {
#ifndef HAVE_VOLK_ATAN_FUNCTION
int i;
for (i=0;i<len;i++) {
arg[i] = cargf(x[i]);
}
#else
volk_32fc_s32f_atan2_32f(arg,x,1,len);
#endif
}
uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
@ -763,12 +731,6 @@ uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
}
int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
#ifdef HAVE_VOLK_MAX_STAR_S_FUNCTION
int16_t target=0;
volk_16i_max_star_16i(&target,x,len);
return target;
#else
uint32_t i;
int16_t m=-INT16_MIN;
for (i=0;i<len;i++) {
@ -777,7 +739,6 @@ int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
}
}
return m;
#endif
}
int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) {
@ -792,9 +753,6 @@ int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) {
}
void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
#ifdef HAVE_VOLK_MAX_VEC_FUNCTION
volk_32f_x2_max_32f(z,x,y,len);
#else
uint32_t i;
for (i=0;i<len;i++) {
if (x[i] > y[i]) {
@ -803,10 +761,10 @@ void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
z[i] = y[i];
}
}
#endif
}
// CP autocorr
uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
#ifdef HAVE_VOLK_MAX_ABS_FUNCTION_32
uint32_t target=0;

View File

@ -211,7 +211,7 @@ void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len)
zVal = _mm_sub_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
_mm_store_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
@ -278,7 +278,7 @@ void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len)
zVal = _mm_mullo_epi16(xVal, yVal);
_mm_store_si128(zPtr, zVal);
_mm_store_si128(zPtr, zVal);
xPtr ++;
yPtr ++;
@ -345,7 +345,7 @@ void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len)
zVal = _mm_srai_epi16(xVal, k);
_mm_store_si128(zPtr, zVal);
_mm_store_si128(zPtr, zVal);
xPtr ++;
zPtr ++;
@ -468,6 +468,416 @@ void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len)
#endif
}
// for enb no-volk
void srslte_vec_sum_fff_sse(float *x, float *y, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 4;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
float* zPtr = (float*) z;
__m128 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_add_ps(xVal, yVal);
_mm_storeu_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
number = points * 4;
for(;number < len; number++){
z[number] = x[number] + y[number];
}
#endif
}
void srslte_vec_sum_fff_avx(float *x, float *y, float *z, uint32_t len) {
#ifdef LV_HAVE_AVX
unsigned int number = 0;
const unsigned int points = len / 8;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
float* zPtr = (float*) z;
__m256 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm256_loadu_ps(xPtr);
yVal = _mm256_loadu_ps(yPtr);
zVal = _mm256_add_ps(xVal, yVal);
_mm256_storeu_ps(zPtr, zVal);
xPtr += 8;
yPtr += 8;
zPtr += 8;
}
for(number = points * 8;number < len; number++){
z[number] = x[number] + y[number];
}
#endif
}
void srslte_vec_sub_fff_sse(float *x, float *y, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 4;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
float* zPtr = (float*) z;
__m128 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_sub_ps(xVal, yVal);
_mm_storeu_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
for(number = points * 4;number < len; number++){
z[number] = x[number] - y[number];
}
#endif
}
void srslte_vec_sub_fff_avx(float *x, float *y, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 8;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
float* zPtr = (float*) z;
__m256 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm256_loadu_ps(xPtr);
yVal = _mm256_loadu_ps(yPtr);
zVal = _mm256_sub_ps(xVal, yVal);
_mm256_storeu_ps(zPtr, zVal);
xPtr += 8;
yPtr += 8;
zPtr += 8;
}
for(number = points * 8;number < len; number++){
z[number] = x[number] - y[number];
}
#endif
}
#ifdef LV_HAVE_SSE
static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) {
__m128 yl, yh, tmp1, tmp2;
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
}
#endif
#ifdef LV_HAVE_SSE
static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) {
const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
y = _mm_xor_ps(y, conjugator);
return _mm_complexmul_ps(x, y);
}
#endif
cf_t srslte_vec_dot_prod_ccc_sse(cf_t *x, cf_t *y, uint32_t len)
{
cf_t result = 0;
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 2;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
__m128 dotProdVal = _mm_setzero_ps();
__m128 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_complexmul_ps(xVal, yVal);
dotProdVal = _mm_add_ps(dotProdVal, zVal);
xPtr += 4;
yPtr += 4;
}
cf_t dotProdVector[2];
_mm_storeu_ps((float*) dotProdVector, dotProdVal);
for (int i=0;i<2;i++) {
result += dotProdVector[i];
}
number = points * 2;
for(;number < len; number++){
result += (x[number] * y[number]);
}
#endif
return result;
}
cf_t srslte_vec_dot_prod_conj_ccc_sse(cf_t *x, cf_t *y, uint32_t len)
{
cf_t result = 0;
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 2;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
__m128 dotProdVal = _mm_setzero_ps();
__m128 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_complexmulconj_ps(xVal, yVal);
dotProdVal = _mm_add_ps(dotProdVal, zVal);
xPtr += 4;
yPtr += 4;
}
cf_t dotProdVector[2];
_mm_storeu_ps((float*) dotProdVector, dotProdVal);
for (int i=0;i<2;i++) {
result += dotProdVector[i];
}
number = points * 2;
for(;number < len; number++){
result += (x[number] * y[number]);
}
#endif
return result;
}
void srslte_vec_prod_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
__m128 xVal, yVal, zVal;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
for(; number < halfPoints; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_complexmul_ps(xVal, yVal);
_mm_storeu_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
number = halfPoints * 2;
for(;number < len; number++){
z[number] = x[number] * y[number];
}
#endif
}
void srslte_vec_prod_conj_ccc_sse(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
__m128 xVal, yVal, zVal;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
for(; number < halfPoints; number++){
xVal = _mm_loadu_ps(xPtr);
yVal = _mm_loadu_ps(yPtr);
zVal = _mm_complexmulconj_ps(xVal, yVal);
_mm_storeu_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
number = halfPoints * 2;
for(;number < len; number++){
z[number] = x[number] * conjf(y[number]);
}
#endif
}
void srslte_vec_sc_prod_ccc_sse(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
__m128 xVal, yl, yh, zVal, tmp1, tmp2;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
// Set up constant scalar vector
yl = _mm_set_ps1(creal(h));
yh = _mm_set_ps1(cimag(h));
for(;number < halfPoints; number++){
xVal = _mm_loadu_ps(xPtr);
tmp1 = _mm_mul_ps(xVal,yl);
xVal = _mm_shuffle_ps(xVal,xVal,0xB1);
tmp2 = _mm_mul_ps(xVal,yh);
zVal = _mm_addsub_ps(tmp1,tmp2);
_mm_storeu_ps(zPtr,zVal);
xPtr += 4;
zPtr += 4;
}
number = halfPoints * 2;
for(;number < len; number++){
z[number] = x[number] * h;
}
#endif
}
void srslte_vec_sc_prod_cfc_sse(cf_t *x, float h, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
__m128 xVal, hVal, zVal;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
// Set up constant scalar vector
hVal = _mm_set_ps1(h);
for(;number < halfPoints; number++){
xVal = _mm_loadu_ps(xPtr);
zVal = _mm_mul_ps(xVal,hVal);
_mm_storeu_ps(zPtr,zVal);
xPtr += 4;
zPtr += 4;
}
number = halfPoints * 2;
for(;number < len; number++){
z[number] = x[number] * h;
}
#endif
}
void srslte_vec_sc_prod_fff_sse(float *x, float h, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int quarterPoints = len / 4;
__m128 xVal, hVal, zVal;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
// Set up constant scalar vector
hVal = _mm_set_ps1(h);
for(;number < quarterPoints; number++){
xVal = _mm_loadu_ps(xPtr);
zVal = _mm_mul_ps(xVal,hVal);
_mm_storeu_ps(zPtr,zVal);
xPtr += 4;
zPtr += 4;
}
number = quarterPoints * 4;
for(;number < len; number++){
z[number] = x[number] * h;
}
#endif
}
void srslte_vec_abs_square_cf_sse(cf_t *x, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int quarterPoints = len / 4;
const float* xPtr = (const float*) x;
float* zPtr = z;
__m128 xVal1, xVal2, zVal;
for(; number < quarterPoints; number++){
xVal1 = _mm_loadu_ps(xPtr);
xPtr += 4;
xVal2 = _mm_loadu_ps(xPtr);
xPtr += 4;
xVal1 = _mm_mul_ps(xVal1, xVal1);
xVal2 = _mm_mul_ps(xVal2, xVal2);
zVal = _mm_hadd_ps(xVal1, xVal2);
_mm_storeu_ps(zPtr, zVal);
zPtr += 4;
}
number = quarterPoints * 4;
for(;number < len; number++){
z[number] = creal(x[number]) * creal(x[number]) + cimag(x[number])*cimag(x[number]);
}
#endif
}
//srslte_32fc_s32f_multiply_32fc_avx
void srslte_vec_sc_prod_cfc_avx( const cf_t *x,const float h,cf_t *z,const uint32_t len)
{