removed volk dependency. Checked and working

This commit is contained in:
Ismael Gomez 2017-01-17 11:31:03 +01:00
parent fe867af319
commit 86750b2db7
6 changed files with 222 additions and 305 deletions

View File

@ -1,144 +0,0 @@
INCLUDE(FindPkgConfig)
PKG_CHECK_MODULES(PC_VOLK volk QUIET)
FIND_PATH(
VOLK_INCLUDE_DIRS
NAMES volk/volk.h
HINTS $ENV{VOLK_DIR}/include
${CMAKE_INSTALL_PREFIX}/include
${PC_VOLK_INCLUDE_DIR}
PATHS /usr/local/include
/usr/include
)
FIND_LIBRARY(
VOLK_LIBRARIES
NAMES volk
HINTS $ENV{VOLK_DIR}/lib
${CMAKE_INSTALL_PREFIX}/lib
${CMAKE_INSTALL_PREFIX}/lib64
${PC_VOLK_LIBDIR}
PATHS /usr/local/lib
/usr/local/lib64
/usr/lib
/usr/lib64
)
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS)
IF(VOLK_FOUND)
SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m)
CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32fc_multiply_32fc HAVE_VOLK_MULT_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_conjugate_32fc HAVE_VOLK_CONJ_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_32fc HAVE_VOLK_MULT2_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_conjugate_32fc HAVE_VOLK_MULT2_CONJ_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_multiply_32fc HAVE_VOLK_MULT_REAL_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_multiply_32f HAVE_VOLK_MULT_FLOAT_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_32f HAVE_VOLK_MAG_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_squared_32f HAVE_VOLK_MAG_SQUARE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_divide_32f HAVE_VOLK_DIVIDE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_dot_prod_32fc HAVE_VOLK_DOTPROD_FC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_dot_prod_32fc HAVE_VOLK_DOTPROD_CFC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_conjugate_dot_prod_32fc HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_dot_prod_32f HAVE_VOLK_DOTPROD_F_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32f_atan2_32f HAVE_VOLK_ATAN_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_convert_16i HAVE_VOLK_CONVERT_FI_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_32f_x2 HAVE_VOLK_DEINTERLEAVE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_interleave_32fc HAVE_VOLK_INTERLEAVE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_add_32f HAVE_VOLK_ADD_FLOAT_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_multiply_32f HAVE_VOLK_MULT_REAL2_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_16i_max_star_16i HAVE_VOLK_MAX_STAR_S_FUNCTION)
CHECK_FUNCTION_EXISTS_MATH(volk_8i_convert_16i HAVE_VOLK_CONVERT_CI_FUNCTION)
SET(VOLK_DEFINITIONS "HAVE_VOLK")
IF(${HAVE_VOLK_CONVERT_CI_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_CI_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAX_STAR_S_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_STAR_S_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAX_ABS_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAX_VEC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_VEC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAG_SQUARE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_SQUARE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_SQUARE_DIST_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SQUARE_DIST_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_INTERLEAVE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_INTERLEAVE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_SUB_FLOAT_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SUB_FLOAT_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_ADD_FLOAT_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ADD_FLOAT_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT2_CONJ_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_CONJ_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_CONVERT_FI_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_FI_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAX_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_ACC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ACC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_CONJ_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONJ_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT2_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT_FLOAT_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FLOAT_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MULT_REAL_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_MAG_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DIVIDE_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DIVIDE_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DOTPROD_FC_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_FC_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_DOTPROD_F_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_F_FUNCTION")
ENDIF()
IF(${HAVE_VOLK_ATAN_FUNCTION})
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ATAN_FUNCTION")
ENDIF()
ENDIF(VOLK_FOUND)

View File

@ -74,25 +74,6 @@ else(BLADERF_FOUND OR UHD_FOUND)
add_definitions(-DDISABLE_RF)
endif(BLADERF_FOUND OR UHD_FOUND)
include(CheckFunctionExistsMath)
if(${DISABLE_VOLK})
if(${DISABLE_VOLK} EQUAL 0)
find_package(Volk)
else(${DISABLE_VOLK} EQUAL 0)
message(STATUS "VOLK library disabled (DISABLE_VOLK=1)")
endif(${DISABLE_VOLK} EQUAL 0)
else(${DISABLE_VOLK})
find_package(Volk)
endif(${DISABLE_VOLK})
if(VOLK_FOUND)
include_directories(${VOLK_INCLUDE_DIRS})
link_directories(${VOLK_LIBRARY_DIRS})
message(STATUS " Compiling with VOLK SIMD library.")
else(VOLK_FOUND)
message(STATUS " VOLK SIMD library NOT found. Using generic implementation.")
endif(VOLK_FOUND)
########################################################################
# Add subdirectories
########################################################################

View File

@ -49,6 +49,26 @@ SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y,
SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len);
SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len);
SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len);
SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len);
SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_if_simd(int16_t *x, float *z, float scale, uint32_t len);
#ifdef __cplusplus
}
#endif

View File

@ -90,13 +90,6 @@ if(RF_FOUND)
endif(BLADERF_FOUND)
endif(RF_FOUND)
if(VOLK_FOUND)
target_link_libraries(srslte ${VOLK_LIBRARIES})
if(NOT DisableMEX)
target_link_libraries(srslte_static ${VOLK_LIBRARIES})
endif(NOT DisableMEX)
endif(VOLK_FOUND)
INSTALL(TARGETS srslte DESTINATION ${LIBRARY_DIR})
SRSLTE_SET_PIC(srslte)

View File

@ -35,10 +35,6 @@
#include "srslte/utils/vector_simd.h"
#include "srslte/utils/bit.h"
#ifdef HAVE_VOLK
#include "volk/volk.h"
#endif
int srslte_vec_acc_ii(int *x, uint32_t len) {
int i;
int z=0;
@ -48,19 +44,14 @@ int srslte_vec_acc_ii(int *x, uint32_t len) {
return z;
}
// Used in PRACH detector
float srslte_vec_acc_ff(float *x, uint32_t len) {
#ifdef HAVE_VOLK_ACC_FUNCTION
float result;
volk_32f_accumulator_s32f(&result,x,len);
return result;
#else
int i;
float z=0;
for (i=0;i<len;i++) {
z+=x[i];
}
return z;
#endif
}
void srslte_vec_ema_filter(cf_t *new_data, cf_t *average, cf_t *output, float coeff, uint32_t len) {
@ -79,27 +70,19 @@ cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len) {
}
void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints) {
#ifndef HAVE_VOLK_SQUARE_DIST_FUNCTION
uint32_t i;
cf_t diff;
for (i=0;i<npoints;i++) {
diff = symbol - points[i];
distance[i] = crealf(diff) * crealf(diff) + cimagf(diff) * cimagf(diff);
}
#else
volk_32fc_x2_square_dist_32f(distance,&symbol,points,npoints);
#endif
}
void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
#ifndef HAVE_VOLK_SUB_FLOAT_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = x[i]-y[i];
}
#else
volk_32f_x2_subtract_32f(z,x,y,len);
#endif
}
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
@ -117,14 +100,15 @@ void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) {
return srslte_vec_sub_fff((float*) x,(float*) y,(float*) z, 2*len);
}
// Used in PSS/SSS and sum_ccc
void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
#ifndef HAVE_VOLK_ADD_FLOAT_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]+y[i];
}
#else
volk_32f_x2_add_32f(z,x,y,len);
srslte_vec_sum_fff_simd(x, y, z, len);
#endif
}
@ -179,14 +163,10 @@ void srslte_vec_sc_add_sss(int16_t *x, int16_t h, int16_t *z, uint32_t len) {
}
void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_FLOAT_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
#else
volk_32f_s32f_multiply_32f(z,x,h,len);
#endif
}
void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
@ -219,8 +199,9 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) {
srslte_vec_sc_prod_cfc(x, amplitude/max, y, len);
}
// Used throughout
void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
@ -229,42 +210,36 @@ void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
cf_t hh;
__real__ hh = h;
__imag__ hh = 0;
volk_32fc_s32fc_multiply_32fc(z,x,hh,len);
srslte_vec_sc_prod_ccc_simd(x,hh,z,len);
#endif
}
// Chest UL
void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
#else
volk_32fc_s32fc_multiply_32fc(z,x,h,len);
srslte_vec_sc_prod_ccc_simd(x,h,z,len);
#endif
}
// Used in turbo decoder
void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len) {
#ifndef HAVE_VOLK_CONVERT_IF_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = ((float) x[i])/scale;
}
#else
volk_16i_s32f_convert_32f(z,x,scale,len);
#endif
}
void srslte_vec_convert_ci(int8_t *x, int16_t *z, uint32_t len) {
#ifndef HAVE_VOLK_CONVERT_CI_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = ((int16_t) x[i]);
}
#else
volk_8i_convert_16i(z,x,len);
#endif
}
void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
@ -295,37 +270,25 @@ void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
}
void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) {
#ifdef HAVE_VOLK_INTERLEAVE_FUNCTION
volk_32f_x2_interleave_32fc(x, real, imag, len);
#else
int i;
for (i=0;i<len;i++) {
x[i] = real[i] + _Complex_I*imag[i];
}
#endif
}
void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len) {
#ifdef HAVE_VOLK_DEINTERLEAVE_FUNCTION
volk_32fc_deinterleave_32f_x2(real, imag, x, len);
#else
int i;
for (i=0;i<len;i++) {
real[i] = __real__ x[i];
imag[i] = __imag__ x[i];
}
#endif
}
void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
#ifdef HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION
volk_32fc_deinterleave_real_32f(real, x, len);
#else
int i;
for (i=0;i<len;i++) {
real[i] = __real__ x[i];
}
#endif
}
/* Note: We align memory to 32 bytes (for AVX compatibility)
@ -335,7 +298,7 @@ void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
*/
void *srslte_vec_malloc(uint32_t size) {
void *ptr;
if (posix_memalign(&ptr,32,size)) {
if (posix_memalign(&ptr,64,size)) {
return NULL;
} else {
return ptr;
@ -343,11 +306,11 @@ void *srslte_vec_malloc(uint32_t size) {
}
void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_size) {
#ifndef HAVE_VOLK
#ifndef LV_HAVE_SSE
return realloc(ptr, new_size);
#else
void *new_ptr;
if (posix_memalign(&new_ptr,volk_get_alignment(),new_size)) {
if (posix_memalign(&new_ptr,64,new_size)) {
return NULL;
} else {
memcpy(new_ptr, ptr, old_size);
@ -468,40 +431,31 @@ void srslte_vec_load_file(char *filename, void *buffer, uint32_t len) {
}
}
// Used in PSS
void srslte_vec_conj_cc(cf_t *x, cf_t *y, uint32_t len) {
#ifndef HAVE_VOLK_CONJ_FUNCTION
int i;
for (i=0;i<len;i++) {
y[i] = conjf(x[i]);
}
#else
volk_32fc_conjugate_32fc(y,x,len);
#endif
}
// Used in scrambling complex
void srslte_vec_prod_cfc(cf_t *x, float *y, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_REAL_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
volk_32fc_32f_multiply_32fc(z,x,y,len);
#endif
}
// Used in scrambling float
void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_REAL2_FUNCTION
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
volk_32f_x2_multiply_32f(z,x,y,len);
#endif
}
// Scrambling Short
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE
int i;
@ -513,26 +467,27 @@ void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#endif
}
// CFO and OFDM processing
void srslte_vec_prod_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT2_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
volk_32fc_x2_multiply_32fc(z,x,y,len);
srslte_vec_prod_ccc_simd(x,y,z,len);
#endif
}
// PRACH, CHEST UL, etc.
void srslte_vec_prod_conj_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT2_CONJ_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*conjf(y[i]);
}
#else
volk_32fc_x2_multiply_conjugate_32fc(z,x,y,len);
srslte_vec_prod_conj_ccc_simd(x,y,z,len);
#endif
}
@ -568,75 +523,48 @@ void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, float *z_real, float *z_imag
}
void srslte_vec_div_fff(float *x, float *y, float *z, uint32_t len) {
#ifdef HAVE_VOLK_DIVIDE_FUNCTION
volk_32f_x2_divide_32f(z, x, y, len);
#else
int i;
for (i=0;i<len;i++) {
z[i] = x[i] / y[i];
}
#endif
}
cf_t srslte_vec_dot_prod_ccc(cf_t *x, cf_t *y, uint32_t len) {
#ifdef HAVE_VOLK_DOTPROD_FC_FUNCTION
cf_t res;
volk_32fc_x2_dot_prod_32fc(&res, x, y, len);
return res;
#else
uint32_t i;
cf_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*y[i];
}
return res;
#endif
}
// Convolution filter
cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len) {
#ifdef HAVE_VOLK_DOTPROD_CFC_FUNCTION
cf_t res;
volk_32fc_32f_dot_prod_32fc(&res, x, y, len);
return res;
#else
uint32_t i;
cf_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*y[i];
}
return res;
#endif
}
cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) {
#ifdef HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION
cf_t res;
volk_32fc_x2_conjugate_dot_prod_32fc(&res, x, y, len);
return res;
#else
uint32_t i;
cf_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*conjf(y[i]);
}
return res;
#endif
}
// PHICH
float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) {
#ifdef HAVE_VOLK_DOTPROD_F_FUNCTION
float res;
volk_32f_x2_dot_prod_32f(&res, x, y, len);
return res;
#else
uint32_t i;
float res = 0;
for (i=0;i<len;i++) {
res += x[i]*y[i];
}
return res;
#endif
}
int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) {
@ -656,48 +584,35 @@ float srslte_vec_avg_power_cf(cf_t *x, uint32_t len) {
return crealf(srslte_vec_dot_prod_conj_ccc(x,x,len)) / len;
}
// PSS
void srslte_vec_abs_cf(cf_t *x, float *abs, uint32_t len) {
#ifndef HAVE_VOLK_MAG_FUNCTION
int i;
for (i=0;i<len;i++) {
abs[i] = cabsf(x[i]);
}
#else
volk_32fc_magnitude_32f(abs,x,len);
#endif
}
// PRACH
void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t len) {
#ifndef HAVE_VOLK_MAG_SQUARE_FUNCTION
#ifndef LV_HAVE_SSE
int i;
for (i=0;i<len;i++) {
abs_square[i] = crealf(x[i])*crealf(x[i])+cimagf(x[i])*cimagf(x[i]);
}
#else
volk_32fc_magnitude_squared_32f(abs_square,x,len);
srslte_vec_abs_square_cf_simd(x,abs_square,len);
#endif
}
void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len) {
#ifndef HAVE_VOLK_ATAN_FUNCTION
int i;
for (i=0;i<len;i++) {
arg[i] = cargf(x[i]);
}
#else
volk_32fc_s32f_atan2_32f(arg,x,1,len);
#endif
}
uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
#ifdef HAVE_VOLK_MAX_FUNCTION
uint32_t target=0;
volk_32f_index_max_16u(&target,x,len);
return target;
#else
uint32_t i;
float m=-FLT_MAX;
uint32_t p=0;
@ -708,16 +623,9 @@ uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
}
}
return p;
#endif
}
int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
#ifdef HAVE_VOLK_MAX_STAR_S_FUNCTION
int16_t target=0;
volk_16i_max_star_16i(&target,x,len);
return target;
#else
uint32_t i;
int16_t m=-INT16_MIN;
for (i=0;i<len;i++) {
@ -726,7 +634,6 @@ int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
}
}
return m;
#endif
}
int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) {
@ -741,9 +648,6 @@ int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) {
}
void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
#ifdef HAVE_VOLK_MAX_VEC_FUNCTION
volk_32f_x2_max_32f(z,x,y,len);
#else
uint32_t i;
for (i=0;i<len;i++) {
if (x[i] > y[i]) {
@ -752,17 +656,11 @@ void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
z[i] = y[i];
}
}
#endif
}
// CP autocorr
uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
#ifdef HAVE_VOLK_MAX_ABS_FUNCTION
uint32_t target=0;
volk_32fc_index_max_16u(&target,x,len);
return target;
#else
uint32_t i;
float m=-FLT_MAX;
uint32_t p=0;
@ -775,7 +673,6 @@ uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
}
}
return p;
#endif
}
void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) {

View File

@ -280,3 +280,173 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
}
#endif
}
// for enb no-volk
void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int points = len / 4;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
float* zPtr = (float*) z;
__m128 xVal, yVal, zVal;
for(;number < points; number++){
xVal = _mm_load_ps(xPtr);
yVal = _mm_load_ps(yPtr);
zVal = _mm_add_ps(xVal, yVal);
_mm_store_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
number = points * 4;
for(;number < len; number++){
z[number] = x[number] + y[number];
}
#endif
}
static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) {
__m128 yl, yh, tmp1, tmp2;
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br
tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
}
void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
__m128 xVal, yVal, zVal;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
for(; number < halfPoints; number++){
xVal = _mm_load_ps(xPtr);
yVal = _mm_load_ps(yPtr);
zVal = _mm_complexmul_ps(xVal, yVal);
_mm_store_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
if((len % 2) != 0){
*zPtr = (*xPtr) * (*yPtr);
}
#endif
}
static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) {
const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
y = _mm_xor_ps(y, conjugator);
return _mm_complexmul_ps(x, y);
}
void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
__m128 xVal, yVal, zVal;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
const float* yPtr = (const float*) y;
for(; number < halfPoints; number++){
xVal = _mm_load_ps(xPtr);
yVal = _mm_load_ps(yPtr);
zVal = _mm_complexmulconj_ps(xVal, yVal);
_mm_store_ps(zPtr, zVal);
xPtr += 4;
yPtr += 4;
zPtr += 4;
}
if((len % 2) != 0){
*zPtr = (*xPtr) * (*yPtr);
}
#endif
}
void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int halfPoints = len / 2;
__m128 xVal, yl, yh, zVal, tmp1, tmp2;
float* zPtr = (float*) z;
const float* xPtr = (const float*) x;
// Set up constant scalar vector
yl = _mm_set_ps1(creal(h));
yh = _mm_set_ps1(cimag(h));
for(;number < halfPoints; number++){
xVal = _mm_load_ps(xPtr);
tmp1 = _mm_mul_ps(xVal,yl);
xVal = _mm_shuffle_ps(xVal,xVal,0xB1);
tmp2 = _mm_mul_ps(xVal,yh);
zVal = _mm_addsub_ps(tmp1,tmp2);
_mm_storeu_ps(zPtr,zVal);
xPtr += 4;
zPtr += 4;
}
if((len % 2) != 0) {
*zPtr = (*xPtr) * h;
}
#endif
}
void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) {
#ifdef LV_HAVE_SSE
unsigned int number = 0;
const unsigned int quarterPoints = len / 4;
const float* xPtr = (const float*) x;
float* zPtr = z;
__m128 xVal1, xVal2, zVal;
for(; number < quarterPoints; number++){
xVal1 = _mm_load_ps(xPtr);
xPtr += 4;
xVal2 = _mm_load_ps(xPtr);
xPtr += 4;
xVal1 = _mm_mul_ps(xVal1, xVal1);
xVal2 = _mm_mul_ps(xVal2, xVal2);
zVal = _mm_hadd_ps(xVal1, xVal2);
_mm_store_ps(zPtr, zVal);
zPtr += 4;
}
number = quarterPoints * 4;
for(; number < len; number++){
float val1Real = *xPtr++;
float val1Imag = *xPtr++;
*zPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
}
#endif
}