mirror of https://github.com/PentHertz/srsLTE.git
removed volk dependency. Checked and working
This commit is contained in:
parent
fe867af319
commit
86750b2db7
|
@ -1,144 +0,0 @@
|
|||
INCLUDE(FindPkgConfig)
|
||||
PKG_CHECK_MODULES(PC_VOLK volk QUIET)
|
||||
|
||||
FIND_PATH(
|
||||
VOLK_INCLUDE_DIRS
|
||||
NAMES volk/volk.h
|
||||
HINTS $ENV{VOLK_DIR}/include
|
||||
${CMAKE_INSTALL_PREFIX}/include
|
||||
${PC_VOLK_INCLUDE_DIR}
|
||||
PATHS /usr/local/include
|
||||
/usr/include
|
||||
)
|
||||
|
||||
FIND_LIBRARY(
|
||||
VOLK_LIBRARIES
|
||||
NAMES volk
|
||||
HINTS $ENV{VOLK_DIR}/lib
|
||||
${CMAKE_INSTALL_PREFIX}/lib
|
||||
${CMAKE_INSTALL_PREFIX}/lib64
|
||||
${PC_VOLK_LIBDIR}
|
||||
PATHS /usr/local/lib
|
||||
/usr/local/lib64
|
||||
/usr/lib
|
||||
/usr/lib64
|
||||
)
|
||||
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
|
||||
MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS)
|
||||
|
||||
IF(VOLK_FOUND)
|
||||
SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32fc_multiply_32fc HAVE_VOLK_MULT_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_conjugate_32fc HAVE_VOLK_CONJ_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_32fc HAVE_VOLK_MULT2_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_multiply_conjugate_32fc HAVE_VOLK_MULT2_CONJ_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_multiply_32fc HAVE_VOLK_MULT_REAL_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_multiply_32f HAVE_VOLK_MULT_FLOAT_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_32f HAVE_VOLK_MAG_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_magnitude_squared_32f HAVE_VOLK_MAG_SQUARE_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_divide_32f HAVE_VOLK_DIVIDE_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_dot_prod_32fc HAVE_VOLK_DOTPROD_FC_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_32f_dot_prod_32fc HAVE_VOLK_DOTPROD_CFC_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_conjugate_dot_prod_32fc HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_dot_prod_32f HAVE_VOLK_DOTPROD_F_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_s32f_atan2_32f HAVE_VOLK_ATAN_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_s32f_convert_16i HAVE_VOLK_CONVERT_FI_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_32f_x2 HAVE_VOLK_DEINTERLEAVE_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_interleave_32fc HAVE_VOLK_INTERLEAVE_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_add_32f HAVE_VOLK_ADD_FLOAT_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_multiply_32f HAVE_VOLK_MULT_REAL2_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_16i_max_star_16i HAVE_VOLK_MAX_STAR_S_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_8i_convert_16i HAVE_VOLK_CONVERT_CI_FUNCTION)
|
||||
|
||||
|
||||
|
||||
SET(VOLK_DEFINITIONS "HAVE_VOLK")
|
||||
IF(${HAVE_VOLK_CONVERT_CI_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_CI_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MAX_STAR_S_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_STAR_S_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MAX_ABS_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_ABS_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MAX_VEC_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_VEC_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MAG_SQUARE_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_SQUARE_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_SQUARE_DIST_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SQUARE_DIST_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_INTERLEAVE_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_INTERLEAVE_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_SUB_FLOAT_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_SUB_FLOAT_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_ADD_FLOAT_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ADD_FLOAT_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MULT2_CONJ_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_CONJ_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_DEINTERLEAVE_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DEINTERLEAVE_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_CONVERT_FI_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONVERT_FI_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MAX_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAX_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_ACC_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ACC_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MULT_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_CONJ_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_CONJ_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MULT2_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT2_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MULT_FLOAT_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_FLOAT_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MULT_REAL_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MULT_REAL_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_MAG_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_MAG_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_DIVIDE_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DIVIDE_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_DOTPROD_FC_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_FC_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_DOTPROD_F_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_DOTPROD_F_FUNCTION")
|
||||
ENDIF()
|
||||
IF(${HAVE_VOLK_ATAN_FUNCTION})
|
||||
SET(VOLK_DEFINITIONS "${VOLK_DEFINITIONS}; HAVE_VOLK_ATAN_FUNCTION")
|
||||
ENDIF()
|
||||
ENDIF(VOLK_FOUND)
|
|
@ -74,25 +74,6 @@ else(BLADERF_FOUND OR UHD_FOUND)
|
|||
add_definitions(-DDISABLE_RF)
|
||||
endif(BLADERF_FOUND OR UHD_FOUND)
|
||||
|
||||
include(CheckFunctionExistsMath)
|
||||
if(${DISABLE_VOLK})
|
||||
if(${DISABLE_VOLK} EQUAL 0)
|
||||
find_package(Volk)
|
||||
else(${DISABLE_VOLK} EQUAL 0)
|
||||
message(STATUS "VOLK library disabled (DISABLE_VOLK=1)")
|
||||
endif(${DISABLE_VOLK} EQUAL 0)
|
||||
else(${DISABLE_VOLK})
|
||||
find_package(Volk)
|
||||
endif(${DISABLE_VOLK})
|
||||
|
||||
if(VOLK_FOUND)
|
||||
include_directories(${VOLK_INCLUDE_DIRS})
|
||||
link_directories(${VOLK_LIBRARY_DIRS})
|
||||
message(STATUS " Compiling with VOLK SIMD library.")
|
||||
else(VOLK_FOUND)
|
||||
message(STATUS " VOLK SIMD library NOT found. Using generic implementation.")
|
||||
endif(VOLK_FOUND)
|
||||
|
||||
########################################################################
|
||||
# Add subdirectories
|
||||
########################################################################
|
||||
|
|
|
@ -49,6 +49,26 @@ SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y,
|
|||
|
||||
SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_abs_square_cf_simd(cf_t *x, float *abs_square, uint32_t len);
|
||||
|
||||
SRSLTE_API cf_t srslte_vec_dot_prod_ccc_simd(cf_t *x, cf_t *y, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len);
|
||||
|
||||
SRSLTE_API cf_t srslte_vec_dot_prod_conj_ccc_simd(cf_t *x, cf_t *y, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len);
|
||||
|
||||
SRSLTE_API float srslte_vec_acc_ff_simd(float *x, uint32_t len);
|
||||
|
||||
SRSLTE_API cf_t srslte_vec_dot_prod_cfc_simd(cf_t *x, float *y, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_convert_if_simd(int16_t *x, float *z, float scale, uint32_t len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -90,13 +90,6 @@ if(RF_FOUND)
|
|||
endif(BLADERF_FOUND)
|
||||
endif(RF_FOUND)
|
||||
|
||||
if(VOLK_FOUND)
|
||||
target_link_libraries(srslte ${VOLK_LIBRARIES})
|
||||
if(NOT DisableMEX)
|
||||
target_link_libraries(srslte_static ${VOLK_LIBRARIES})
|
||||
endif(NOT DisableMEX)
|
||||
endif(VOLK_FOUND)
|
||||
|
||||
INSTALL(TARGETS srslte DESTINATION ${LIBRARY_DIR})
|
||||
SRSLTE_SET_PIC(srslte)
|
||||
|
||||
|
|
|
@ -35,10 +35,6 @@
|
|||
#include "srslte/utils/vector_simd.h"
|
||||
#include "srslte/utils/bit.h"
|
||||
|
||||
#ifdef HAVE_VOLK
|
||||
#include "volk/volk.h"
|
||||
#endif
|
||||
|
||||
int srslte_vec_acc_ii(int *x, uint32_t len) {
|
||||
int i;
|
||||
int z=0;
|
||||
|
@ -48,19 +44,14 @@ int srslte_vec_acc_ii(int *x, uint32_t len) {
|
|||
return z;
|
||||
}
|
||||
|
||||
// Used in PRACH detector
|
||||
float srslte_vec_acc_ff(float *x, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_ACC_FUNCTION
|
||||
float result;
|
||||
volk_32f_accumulator_s32f(&result,x,len);
|
||||
return result;
|
||||
#else
|
||||
int i;
|
||||
float z=0;
|
||||
for (i=0;i<len;i++) {
|
||||
z+=x[i];
|
||||
}
|
||||
return z;
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_ema_filter(cf_t *new_data, cf_t *average, cf_t *output, float coeff, uint32_t len) {
|
||||
|
@ -79,27 +70,19 @@ cf_t srslte_vec_acc_cc(cf_t *x, uint32_t len) {
|
|||
}
|
||||
|
||||
void srslte_vec_square_dist(cf_t symbol, cf_t *points, float *distance, uint32_t npoints) {
|
||||
#ifndef HAVE_VOLK_SQUARE_DIST_FUNCTION
|
||||
uint32_t i;
|
||||
cf_t diff;
|
||||
for (i=0;i<npoints;i++) {
|
||||
diff = symbol - points[i];
|
||||
distance[i] = crealf(diff) * crealf(diff) + cimagf(diff) * cimagf(diff);
|
||||
}
|
||||
#else
|
||||
volk_32fc_x2_square_dist_32f(distance,&symbol,points,npoints);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_SUB_FLOAT_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]-y[i];
|
||||
}
|
||||
#else
|
||||
volk_32f_x2_subtract_32f(z,x,y,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
|
||||
|
@ -117,14 +100,15 @@ void srslte_vec_sub_ccc(cf_t *x, cf_t *y, cf_t *z, uint32_t len) {
|
|||
return srslte_vec_sub_fff((float*) x,(float*) y,(float*) z, 2*len);
|
||||
}
|
||||
|
||||
// Used in PSS/SSS and sum_ccc
|
||||
void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_ADD_FLOAT_FUNCTION
|
||||
#ifndef LV_HAVE_SSE
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]+y[i];
|
||||
}
|
||||
#else
|
||||
volk_32f_x2_add_32f(z,x,y,len);
|
||||
srslte_vec_sum_fff_simd(x, y, z, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -179,14 +163,10 @@ void srslte_vec_sc_add_sss(int16_t *x, int16_t h, int16_t *z, uint32_t len) {
|
|||
}
|
||||
|
||||
void srslte_vec_sc_prod_fff(float *x, float h, float *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MULT_FLOAT_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]*h;
|
||||
}
|
||||
#else
|
||||
volk_32f_s32f_multiply_32f(z,x,h,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
|
||||
|
@ -219,8 +199,9 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) {
|
|||
srslte_vec_sc_prod_cfc(x, amplitude/max, y, len);
|
||||
}
|
||||
|
||||
// Used throughout
|
||||
void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MULT_FUNCTION
|
||||
#ifndef LV_HAVE_SSE
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]*h;
|
||||
|
@ -229,42 +210,36 @@ void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
|
|||
cf_t hh;
|
||||
__real__ hh = h;
|
||||
__imag__ hh = 0;
|
||||
volk_32fc_s32fc_multiply_32fc(z,x,hh,len);
|
||||
srslte_vec_sc_prod_ccc_simd(x,hh,z,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Chest UL
|
||||
void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MULT_FUNCTION
|
||||
#ifndef LV_HAVE_SSE
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]*h;
|
||||
}
|
||||
#else
|
||||
volk_32fc_s32fc_multiply_32fc(z,x,h,len);
|
||||
srslte_vec_sc_prod_ccc_simd(x,h,z,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Used in turbo decoder
|
||||
void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_CONVERT_IF_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = ((float) x[i])/scale;
|
||||
}
|
||||
#else
|
||||
volk_16i_s32f_convert_32f(z,x,scale,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void srslte_vec_convert_ci(int8_t *x, int16_t *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_CONVERT_CI_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = ((int16_t) x[i]);
|
||||
}
|
||||
#else
|
||||
volk_8i_convert_16i(z,x,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
|
||||
|
@ -295,37 +270,25 @@ void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
|
|||
}
|
||||
|
||||
void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_INTERLEAVE_FUNCTION
|
||||
volk_32f_x2_interleave_32fc(x, real, imag, len);
|
||||
#else
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
x[i] = real[i] + _Complex_I*imag[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_deinterleave_cf(cf_t *x, float *real, float *imag, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_DEINTERLEAVE_FUNCTION
|
||||
volk_32fc_deinterleave_32f_x2(real, imag, x, len);
|
||||
#else
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
real[i] = __real__ x[i];
|
||||
imag[i] = __imag__ x[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_DEINTERLEAVE_REAL_FUNCTION
|
||||
volk_32fc_deinterleave_real_32f(real, x, len);
|
||||
#else
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
real[i] = __real__ x[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Note: We align memory to 32 bytes (for AVX compatibility)
|
||||
|
@ -335,7 +298,7 @@ void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
|
|||
*/
|
||||
void *srslte_vec_malloc(uint32_t size) {
|
||||
void *ptr;
|
||||
if (posix_memalign(&ptr,32,size)) {
|
||||
if (posix_memalign(&ptr,64,size)) {
|
||||
return NULL;
|
||||
} else {
|
||||
return ptr;
|
||||
|
@ -343,11 +306,11 @@ void *srslte_vec_malloc(uint32_t size) {
|
|||
}
|
||||
|
||||
void *srslte_vec_realloc(void *ptr, uint32_t old_size, uint32_t new_size) {
|
||||
#ifndef HAVE_VOLK
|
||||
#ifndef LV_HAVE_SSE
|
||||
return realloc(ptr, new_size);
|
||||
#else
|
||||
void *new_ptr;
|
||||
if (posix_memalign(&new_ptr,volk_get_alignment(),new_size)) {
|
||||
if (posix_memalign(&new_ptr,64,new_size)) {
|
||||
return NULL;
|
||||
} else {
|
||||
memcpy(new_ptr, ptr, old_size);
|
||||
|
@ -468,40 +431,31 @@ void srslte_vec_load_file(char *filename, void *buffer, uint32_t len) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
// Used in PSS
|
||||
void srslte_vec_conj_cc(cf_t *x, cf_t *y, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_CONJ_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
y[i] = conjf(x[i]);
|
||||
}
|
||||
#else
|
||||
volk_32fc_conjugate_32fc(y,x,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Used in scrambling complex
|
||||
void srslte_vec_prod_cfc(cf_t *x, float *y, cf_t *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MULT_REAL_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]*y[i];
|
||||
}
|
||||
#else
|
||||
volk_32fc_32f_multiply_32fc(z,x,y,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Used in scrambling float
|
||||
void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MULT_REAL2_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]*y[i];
|
||||
}
|
||||
#else
|
||||
volk_32f_x2_multiply_32f(z,x,y,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Scrambling Short
|
||||
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
|
||||
#ifndef LV_HAVE_SSE
|
||||
int i;
|
||||
|
@ -513,26 +467,27 @@ void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
|
|||
#endif
|
||||
}
|
||||
|
||||
// CFO and OFDM processing
|
||||
void srslte_vec_prod_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MULT2_FUNCTION
|
||||
#ifndef LV_HAVE_SSE
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]*y[i];
|
||||
}
|
||||
#else
|
||||
volk_32fc_x2_multiply_32fc(z,x,y,len);
|
||||
srslte_vec_prod_ccc_simd(x,y,z,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// PRACH, CHEST UL, etc.
|
||||
void srslte_vec_prod_conj_ccc(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MULT2_CONJ_FUNCTION
|
||||
#ifndef LV_HAVE_SSE
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i]*conjf(y[i]);
|
||||
}
|
||||
#else
|
||||
volk_32fc_x2_multiply_conjugate_32fc(z,x,y,len);
|
||||
srslte_vec_prod_conj_ccc_simd(x,y,z,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -568,75 +523,48 @@ void srslte_vec_div_cfc(cf_t *x, float *y, cf_t *z, float *z_real, float *z_imag
|
|||
}
|
||||
|
||||
void srslte_vec_div_fff(float *x, float *y, float *z, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_DIVIDE_FUNCTION
|
||||
volk_32f_x2_divide_32f(z, x, y, len);
|
||||
#else
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = x[i] / y[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
cf_t srslte_vec_dot_prod_ccc(cf_t *x, cf_t *y, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_DOTPROD_FC_FUNCTION
|
||||
cf_t res;
|
||||
volk_32fc_x2_dot_prod_32fc(&res, x, y, len);
|
||||
return res;
|
||||
#else
|
||||
uint32_t i;
|
||||
cf_t res = 0;
|
||||
for (i=0;i<len;i++) {
|
||||
res += x[i]*y[i];
|
||||
}
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Convolution filter
|
||||
cf_t srslte_vec_dot_prod_cfc(cf_t *x, float *y, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_DOTPROD_CFC_FUNCTION
|
||||
cf_t res;
|
||||
volk_32fc_32f_dot_prod_32fc(&res, x, y, len);
|
||||
return res;
|
||||
#else
|
||||
uint32_t i;
|
||||
cf_t res = 0;
|
||||
for (i=0;i<len;i++) {
|
||||
res += x[i]*y[i];
|
||||
}
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
cf_t srslte_vec_dot_prod_conj_ccc(cf_t *x, cf_t *y, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_DOTPROD_CONJ_FC_FUNCTION
|
||||
cf_t res;
|
||||
volk_32fc_x2_conjugate_dot_prod_32fc(&res, x, y, len);
|
||||
return res;
|
||||
#else
|
||||
uint32_t i;
|
||||
cf_t res = 0;
|
||||
for (i=0;i<len;i++) {
|
||||
res += x[i]*conjf(y[i]);
|
||||
}
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// PHICH
|
||||
float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_DOTPROD_F_FUNCTION
|
||||
float res;
|
||||
volk_32f_x2_dot_prod_32f(&res, x, y, len);
|
||||
return res;
|
||||
#else
|
||||
uint32_t i;
|
||||
float res = 0;
|
||||
for (i=0;i<len;i++) {
|
||||
res += x[i]*y[i];
|
||||
}
|
||||
return res;
|
||||
#endif
|
||||
}
|
||||
|
||||
int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) {
|
||||
|
@ -656,48 +584,35 @@ float srslte_vec_avg_power_cf(cf_t *x, uint32_t len) {
|
|||
return crealf(srslte_vec_dot_prod_conj_ccc(x,x,len)) / len;
|
||||
}
|
||||
|
||||
// PSS
|
||||
void srslte_vec_abs_cf(cf_t *x, float *abs, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MAG_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
abs[i] = cabsf(x[i]);
|
||||
}
|
||||
#else
|
||||
volk_32fc_magnitude_32f(abs,x,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
// PRACH
|
||||
void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_MAG_SQUARE_FUNCTION
|
||||
#ifndef LV_HAVE_SSE
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
abs_square[i] = crealf(x[i])*crealf(x[i])+cimagf(x[i])*cimagf(x[i]);
|
||||
}
|
||||
#else
|
||||
volk_32fc_magnitude_squared_32f(abs_square,x,len);
|
||||
srslte_vec_abs_square_cf_simd(x,abs_square,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_ATAN_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
arg[i] = cargf(x[i]);
|
||||
}
|
||||
#else
|
||||
volk_32fc_s32f_atan2_32f(arg,x,1,len);
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_MAX_FUNCTION
|
||||
uint32_t target=0;
|
||||
volk_32f_index_max_16u(&target,x,len);
|
||||
return target;
|
||||
|
||||
#else
|
||||
uint32_t i;
|
||||
float m=-FLT_MAX;
|
||||
uint32_t p=0;
|
||||
|
@ -708,16 +623,9 @@ uint32_t srslte_vec_max_fi(float *x, uint32_t len) {
|
|||
}
|
||||
}
|
||||
return p;
|
||||
#endif
|
||||
}
|
||||
|
||||
int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_MAX_STAR_S_FUNCTION
|
||||
int16_t target=0;
|
||||
volk_16i_max_star_16i(&target,x,len);
|
||||
return target;
|
||||
|
||||
#else
|
||||
uint32_t i;
|
||||
int16_t m=-INT16_MIN;
|
||||
for (i=0;i<len;i++) {
|
||||
|
@ -726,7 +634,6 @@ int16_t srslte_vec_max_star_si(int16_t *x, uint32_t len) {
|
|||
}
|
||||
}
|
||||
return m;
|
||||
#endif
|
||||
}
|
||||
|
||||
int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) {
|
||||
|
@ -741,9 +648,6 @@ int16_t srslte_vec_max_abs_star_si(int16_t *x, uint32_t len) {
|
|||
}
|
||||
|
||||
void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_MAX_VEC_FUNCTION
|
||||
volk_32f_x2_max_32f(z,x,y,len);
|
||||
#else
|
||||
uint32_t i;
|
||||
for (i=0;i<len;i++) {
|
||||
if (x[i] > y[i]) {
|
||||
|
@ -752,17 +656,11 @@ void srslte_vec_max_fff(float *x, float *y, float *z, uint32_t len) {
|
|||
z[i] = y[i];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// CP autocorr
|
||||
uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_MAX_ABS_FUNCTION
|
||||
uint32_t target=0;
|
||||
volk_32fc_index_max_16u(&target,x,len);
|
||||
return target;
|
||||
|
||||
#else
|
||||
uint32_t i;
|
||||
float m=-FLT_MAX;
|
||||
uint32_t p=0;
|
||||
|
@ -775,7 +673,6 @@ uint32_t srslte_vec_max_abs_ci(cf_t *x, uint32_t len) {
|
|||
}
|
||||
}
|
||||
return p;
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_quant_fuc(float *in, uint8_t *out, float gain, float offset, float clip, uint32_t len) {
|
||||
|
|
|
@ -280,3 +280,173 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
|
|||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// for enb no-volk
|
||||
void srslte_vec_sum_fff_simd(float *x, float *y, float *z, uint32_t len) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int points = len / 4;
|
||||
|
||||
const float* xPtr = (const float*) x;
|
||||
const float* yPtr = (const float*) y;
|
||||
float* zPtr = (float*) z;
|
||||
|
||||
__m128 xVal, yVal, zVal;
|
||||
for(;number < points; number++){
|
||||
|
||||
xVal = _mm_load_ps(xPtr);
|
||||
yVal = _mm_load_ps(yPtr);
|
||||
|
||||
zVal = _mm_add_ps(xVal, yVal);
|
||||
|
||||
_mm_store_ps(zPtr, zVal);
|
||||
|
||||
xPtr += 4;
|
||||
yPtr += 4;
|
||||
zPtr += 4;
|
||||
}
|
||||
|
||||
number = points * 4;
|
||||
for(;number < len; number++){
|
||||
z[number] = x[number] + y[number];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128 _mm_complexmul_ps(__m128 x, __m128 y) {
|
||||
__m128 yl, yh, tmp1, tmp2;
|
||||
yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
|
||||
yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
|
||||
tmp1 = _mm_mul_ps(x, yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
|
||||
x = _mm_shuffle_ps(x, x, 0xB1); // Re-arrange x to be ai,ar,bi,br
|
||||
tmp2 = _mm_mul_ps(x, yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
|
||||
return _mm_addsub_ps(tmp1, tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
|
||||
}
|
||||
|
||||
void srslte_vec_prod_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = len / 2;
|
||||
|
||||
__m128 xVal, yVal, zVal;
|
||||
float* zPtr = (float*) z;
|
||||
const float* xPtr = (const float*) x;
|
||||
const float* yPtr = (const float*) y;
|
||||
|
||||
for(; number < halfPoints; number++){
|
||||
xVal = _mm_load_ps(xPtr);
|
||||
yVal = _mm_load_ps(yPtr);
|
||||
zVal = _mm_complexmul_ps(xVal, yVal);
|
||||
_mm_store_ps(zPtr, zVal);
|
||||
|
||||
xPtr += 4;
|
||||
yPtr += 4;
|
||||
zPtr += 4;
|
||||
}
|
||||
|
||||
if((len % 2) != 0){
|
||||
*zPtr = (*xPtr) * (*yPtr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline __m128 _mm_complexmulconj_ps(__m128 x, __m128 y) {
|
||||
const __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
|
||||
y = _mm_xor_ps(y, conjugator);
|
||||
return _mm_complexmul_ps(x, y);
|
||||
}
|
||||
|
||||
void srslte_vec_prod_conj_ccc_simd(cf_t *x,cf_t *y, cf_t *z, uint32_t len) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = len / 2;
|
||||
|
||||
__m128 xVal, yVal, zVal;
|
||||
float* zPtr = (float*) z;
|
||||
const float* xPtr = (const float*) x;
|
||||
const float* yPtr = (const float*) y;
|
||||
|
||||
for(; number < halfPoints; number++){
|
||||
xVal = _mm_load_ps(xPtr);
|
||||
yVal = _mm_load_ps(yPtr);
|
||||
zVal = _mm_complexmulconj_ps(xVal, yVal);
|
||||
_mm_store_ps(zPtr, zVal);
|
||||
|
||||
xPtr += 4;
|
||||
yPtr += 4;
|
||||
zPtr += 4;
|
||||
}
|
||||
|
||||
if((len % 2) != 0){
|
||||
*zPtr = (*xPtr) * (*yPtr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_sc_prod_ccc_simd(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int halfPoints = len / 2;
|
||||
|
||||
__m128 xVal, yl, yh, zVal, tmp1, tmp2;
|
||||
float* zPtr = (float*) z;
|
||||
const float* xPtr = (const float*) x;
|
||||
|
||||
// Set up constant scalar vector
|
||||
yl = _mm_set_ps1(creal(h));
|
||||
yh = _mm_set_ps1(cimag(h));
|
||||
|
||||
for(;number < halfPoints; number++){
|
||||
|
||||
xVal = _mm_load_ps(xPtr);
|
||||
tmp1 = _mm_mul_ps(xVal,yl);
|
||||
xVal = _mm_shuffle_ps(xVal,xVal,0xB1);
|
||||
tmp2 = _mm_mul_ps(xVal,yh);
|
||||
zVal = _mm_addsub_ps(tmp1,tmp2);
|
||||
_mm_storeu_ps(zPtr,zVal);
|
||||
|
||||
xPtr += 4;
|
||||
zPtr += 4;
|
||||
}
|
||||
|
||||
if((len % 2) != 0) {
|
||||
*zPtr = (*xPtr) * h;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_abs_square_cf_simd(cf_t *x, float *z, uint32_t len) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int quarterPoints = len / 4;
|
||||
|
||||
const float* xPtr = (const float*) x;
|
||||
float* zPtr = z;
|
||||
|
||||
__m128 xVal1, xVal2, zVal;
|
||||
for(; number < quarterPoints; number++){
|
||||
xVal1 = _mm_load_ps(xPtr);
|
||||
xPtr += 4;
|
||||
xVal2 = _mm_load_ps(xPtr);
|
||||
xPtr += 4;
|
||||
xVal1 = _mm_mul_ps(xVal1, xVal1);
|
||||
xVal2 = _mm_mul_ps(xVal2, xVal2);
|
||||
zVal = _mm_hadd_ps(xVal1, xVal2);
|
||||
_mm_store_ps(zPtr, zVal);
|
||||
zPtr += 4;
|
||||
}
|
||||
|
||||
number = quarterPoints * 4;
|
||||
for(; number < len; number++){
|
||||
float val1Real = *xPtr++;
|
||||
float val1Imag = *xPtr++;
|
||||
*zPtr++ = (val1Real * val1Real) + (val1Imag * val1Imag);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue