mirror of https://github.com/PentHertz/srsLTE.git
Conditional SSE compilation
This commit is contained in:
parent
438a5aa240
commit
6c194dc078
|
@ -83,8 +83,18 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
|
|||
#Any additional flags for CXX
|
||||
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
|
||||
|
||||
FIND_PACKAGE(SSE)
|
||||
|
||||
IF(CMAKE_COMPILER_IS_GNUCC)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE -g -mfpmath=sse -mavx -O3")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wno-write-strings -Wno-format-extra-args -Winline -Wno-unused-result -Wno-format -std=c99 -D_GNU_SOURCE -g -march=native -O3")
|
||||
IF(AVX_FOUND)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpmath=sse -mavx -DLV_HAVE_AVX -DLV_HAVE_SSE")
|
||||
ELSEIF(SSE4_2_FOUND)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpmath=sse -msse_4.2 -DLV_HAVE_SSE")
|
||||
ELSEIF(SSE4_1_FOUND)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpmath=sse -msse_4.1 -DLV_HAVE_SSE")
|
||||
ENDIF(AVX_FOUND)
|
||||
|
||||
# IF(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
||||
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror -Wno-error=implicit-function-declaration -Wno-error=unused-but-set-variable")
|
||||
# ENDIF(${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
||||
|
|
|
@ -47,6 +47,7 @@ CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_subtract_32f HAVE_VOLK_SUB_FLOAT_FUNCTION
|
|||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_x2_square_dist_32f HAVE_VOLK_SQUARE_DIST_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_deinterleave_real_32f HAVE_VOLK_DEINTERLEAVE_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32fc_index_max_16u HAVE_VOLK_MAX_ABS_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION)
|
||||
|
||||
INCLUDE(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(VOLK DEFAULT_MSG VOLK_LIBRARIES VOLK_INCLUDE_DIRS)
|
||||
|
@ -54,6 +55,7 @@ MARK_AS_ADVANCED(VOLK_LIBRARIES VOLK_INCLUDE_DIRS VOLK_DEFINITIONS)
|
|||
|
||||
IF(VOLK_FOUND)
|
||||
SET(CMAKE_REQUIRED_LIBRARIES ${VOLK_LIBRARIES} m)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_16i_s32f_convert_32f HAVE_VOLK_CONVERT_IF_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_index_max_16u HAVE_VOLK_MAX_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_x2_max_32f HAVE_VOLK_MAX_VEC_FUNCTION)
|
||||
CHECK_FUNCTION_EXISTS_MATH(volk_32f_accumulator_s32f HAVE_VOLK_ACC_FUNCTION)
|
||||
|
|
|
@ -85,8 +85,7 @@ SRSLTE_API int srslte_precoding_type(srslte_precoding_t *q,
|
|||
|
||||
/* Estimates the vector "x" based on the received signal "y" and the channel estimates "h"
|
||||
*/
|
||||
SRSLTE_API int srslte_predecoding_single(srslte_precoding_t *q,
|
||||
cf_t *y,
|
||||
SRSLTE_API int srslte_predecoding_single(cf_t *y,
|
||||
cf_t *h,
|
||||
cf_t *x,
|
||||
int nof_symbols,
|
||||
|
|
|
@ -40,8 +40,7 @@
|
|||
#include "srslte/common/phy_common.h"
|
||||
#include "srslte/fec/rm_turbo.h"
|
||||
#include "srslte/fec/turbocoder.h"
|
||||
#include "srslte/fec/turbodecoder_gen.h"
|
||||
#include "srslte/fec/turbodecoder_sse.h"
|
||||
#include "srslte/fec/turbodecoder.h"
|
||||
#include "srslte/fec/crc.h"
|
||||
#include "srslte/phch/pdsch_cfg.h"
|
||||
#include "srslte/phch/pusch_cfg.h"
|
||||
|
@ -66,12 +65,12 @@ typedef struct SRSLTE_API {
|
|||
uint8_t *parity_bits;
|
||||
void *e;
|
||||
uint8_t *temp_g_bits;
|
||||
uint32_t *ul_interleaver;
|
||||
uint16_t *ul_interleaver;
|
||||
srslte_uci_bit_t ack_ri_bits[12*288];
|
||||
uint32_t nof_ri_ack_bits;
|
||||
|
||||
srslte_tcod_t encoder;
|
||||
srslte_tdec_sse_t decoder;
|
||||
srslte_tdec_t decoder;
|
||||
srslte_crc_t crc_tb;
|
||||
srslte_crc_t crc_cb;
|
||||
|
||||
|
|
|
@ -63,8 +63,7 @@
|
|||
#include "srslte/fec/crc.h"
|
||||
#include "srslte/fec/tc_interl.h"
|
||||
#include "srslte/fec/turbocoder.h"
|
||||
#include "srslte/fec/turbodecoder_sse.h"
|
||||
#include "srslte/fec/turbodecoder_gen.h"
|
||||
#include "srslte/fec/turbodecoder.h"
|
||||
#include "srslte/fec/cbsegm.h"
|
||||
#include "srslte/fec/rm_conv.h"
|
||||
#include "srslte/fec/rm_turbo.h"
|
||||
|
|
|
@ -109,7 +109,7 @@ SRSLTE_API void srslte_vec_sc_div2_sss(short *x, int pow2_div, short *z, uint32_
|
|||
SRSLTE_API void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len);
|
||||
SRSLTE_API void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len);
|
||||
|
|
|
@ -102,24 +102,24 @@ int main(int argc, char **argv) {
|
|||
|
||||
num_re = 2 * cell.nof_prb * SRSLTE_NRE * SRSLTE_CP_NSYMB(cell.cp);
|
||||
|
||||
input = malloc(num_re * sizeof(cf_t));
|
||||
input = srslte_vec_malloc(num_re * sizeof(cf_t));
|
||||
if (!input) {
|
||||
perror("malloc");
|
||||
perror("srslte_vec_malloc");
|
||||
goto do_exit;
|
||||
}
|
||||
output = malloc(num_re * sizeof(cf_t));
|
||||
output = srslte_vec_malloc(num_re * sizeof(cf_t));
|
||||
if (!output) {
|
||||
perror("malloc");
|
||||
perror("srslte_vec_malloc");
|
||||
goto do_exit;
|
||||
}
|
||||
h = malloc(num_re * sizeof(cf_t));
|
||||
h = srslte_vec_malloc(num_re * sizeof(cf_t));
|
||||
if (!h) {
|
||||
perror("malloc");
|
||||
perror("srslte_vec_malloc");
|
||||
goto do_exit;
|
||||
}
|
||||
ce = malloc(num_re * sizeof(cf_t));
|
||||
ce = srslte_vec_malloc(num_re * sizeof(cf_t));
|
||||
if (!ce) {
|
||||
perror("malloc");
|
||||
perror("srslte_vec_malloc");
|
||||
goto do_exit;
|
||||
}
|
||||
|
||||
|
@ -173,7 +173,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
gettimeofday(&t[1], NULL);
|
||||
for (int j=0;j<100;j++) {
|
||||
srslte_predecoding_single(&cheq, input, ce, output, num_re, 0);
|
||||
srslte_predecoding_single(input, ce, output, num_re, 0);
|
||||
}
|
||||
gettimeofday(&t[2], NULL);
|
||||
get_time_interval(t);
|
||||
|
@ -188,7 +188,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
gettimeofday(&t[1], NULL);
|
||||
for (int j=0;j<100;j++) {
|
||||
srslte_predecoding_single(&cheq, input, ce, output, num_re, srslte_chest_dl_get_noise_estimate(&est));
|
||||
srslte_predecoding_single(input, ce, output, num_re, srslte_chest_dl_get_noise_estimate(&est));
|
||||
}
|
||||
gettimeofday(&t[2], NULL);
|
||||
get_time_interval(t);
|
||||
|
|
|
@ -189,7 +189,7 @@ void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
|
|||
}
|
||||
|
||||
if (cell.nof_ports == 1) {
|
||||
srslte_predecoding_single(&cheq, input_signal, ce[0], output_signal2, nof_re, srslte_chest_dl_get_noise_estimate(&chest));
|
||||
srslte_predecoding_single(input_signal, ce[0], output_signal2, nof_re, srslte_chest_dl_get_noise_estimate(&chest));
|
||||
} else {
|
||||
srslte_predecoding_diversity(&cheq, input_signal, ce, output_signal, cell.nof_ports, nof_re, srslte_chest_dl_get_noise_estimate(&chest));
|
||||
srslte_layerdemap_diversity(output_signal, output_signal2, cell.nof_ports, nof_re/cell.nof_ports);
|
||||
|
|
|
@ -181,7 +181,7 @@ void srslte_ofdm_rx_sf(srslte_ofdm_t *q, cf_t *input, cf_t *output) {
|
|||
srslte_vec_prod_ccc(input, q->shift_buffer, input, 2*q->slot_sz);
|
||||
}
|
||||
for (n=0;n<2;n++) {
|
||||
srslte_ofdm_rx_slot_zerocopy(q, &input[n*q->slot_sz], &output[n*q->nof_re*q->nof_symbols]);
|
||||
srslte_ofdm_rx_slot(q, &input[n*q->slot_sz], &output[n*q->nof_re*q->nof_symbols]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -37,14 +37,16 @@
|
|||
#include "srslte/utils/vector.h"
|
||||
#include "srslte/fec/cbsegm.h"
|
||||
|
||||
#define HAVE_SIMD
|
||||
|
||||
#ifdef HAVE_SIMD
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
|
||||
int srslte_rm_turbo_rx_lut_simd(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
#include <pmmintrin.h>
|
||||
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx);
|
||||
#endif
|
||||
|
||||
#define NCOLS 32
|
||||
|
@ -286,29 +288,32 @@ int srslte_rm_turbo_tx_lut(uint8_t *w_buff, uint8_t *systematic, uint8_t *parity
|
|||
|
||||
int srslte_rm_turbo_rx_lut(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
#ifndef HAVE_SIMD
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
|
||||
|
||||
for (int i=0;i<in_len;i++) {
|
||||
//printf("i=%d=%d goes to %d\n", i%out_len, input[i], deinter[i%out_len]);
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
#ifdef LV_HAVE_AVX
|
||||
return srslte_rm_turbo_rx_lut_avx(input, output, in_len, cb_idx, rv_idx);
|
||||
#else
|
||||
#ifdef LV_HAVE_SSE
|
||||
return srslte_rm_turbo_rx_lut_sse(input, output, in_len, cb_idx, rv_idx);
|
||||
#else
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
|
||||
|
||||
for (int i=0;i<in_len;i++) {
|
||||
//printf("i=%d=%d goes to %d\n", i%out_len, input[i], deinter[i%out_len]);
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
#else
|
||||
return srslte_rm_turbo_rx_lut_simd(input, output, in_len, cb_idx, rv_idx);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef HAVE_SIMD
|
||||
#ifdef LV_HAVE_SSE
|
||||
|
||||
|
||||
int srslte_rm_turbo_rx_lut_simd(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
int srslte_rm_turbo_rx_lut_sse(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
|
@ -381,7 +386,116 @@ int srslte_rm_turbo_rx_lut_simd(int16_t *input, int16_t *output, uint32_t in_len
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
|
||||
#define SAVE_OUTPUT(j) x = (int16_t) _mm256_extract_epi16(xVal, j);\
|
||||
l = (uint16_t) _mm256_extract_epi16(lutVal, j);\
|
||||
output[l] += x;
|
||||
|
||||
|
||||
int srslte_rm_turbo_rx_lut_avx(int16_t *input, int16_t *output, uint32_t in_len, uint32_t cb_idx, uint32_t rv_idx)
|
||||
{
|
||||
if (rv_idx < 4 && cb_idx < SRSLTE_NOF_TC_CB_SIZES) {
|
||||
uint32_t out_len = 3*srslte_cbsegm_cbsize(cb_idx)+12;
|
||||
uint16_t *deinter = deinterleaver[cb_idx][rv_idx];
|
||||
|
||||
const __m256i* xPtr = (const __m256i*) input;
|
||||
const __m256i* lutPtr = (const __m256i*) deinter;
|
||||
__m256i xVal, lutVal;
|
||||
|
||||
int16_t x;
|
||||
uint16_t l;
|
||||
|
||||
/* Simplify load if we do not need to wrap (ie high rates) */
|
||||
if (in_len <= out_len) {
|
||||
for (int i=0;i<in_len/16;i++) {
|
||||
xVal = _mm256_loadu_si256(xPtr);
|
||||
lutVal = _mm256_loadu_si256(lutPtr);
|
||||
SAVE_OUTPUT(0);
|
||||
SAVE_OUTPUT(1);
|
||||
SAVE_OUTPUT(2);
|
||||
SAVE_OUTPUT(3);
|
||||
SAVE_OUTPUT(4);
|
||||
SAVE_OUTPUT(5);
|
||||
SAVE_OUTPUT(6);
|
||||
SAVE_OUTPUT(7);
|
||||
|
||||
SAVE_OUTPUT(8);
|
||||
SAVE_OUTPUT(9);
|
||||
SAVE_OUTPUT(10);
|
||||
SAVE_OUTPUT(11);
|
||||
SAVE_OUTPUT(12);
|
||||
SAVE_OUTPUT(13);
|
||||
SAVE_OUTPUT(14);
|
||||
SAVE_OUTPUT(15);
|
||||
|
||||
xPtr ++;
|
||||
lutPtr ++;
|
||||
}
|
||||
for (int i=16*(in_len/16);i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
} else {
|
||||
int intCnt = 16;
|
||||
int inputCnt = 0;
|
||||
int nwrapps = 0;
|
||||
while(inputCnt < in_len - 16) {
|
||||
xVal = _mm256_loadu_si256(xPtr);
|
||||
lutVal = _mm256_loadu_si256(lutPtr);
|
||||
|
||||
SAVE_OUTPUT(0);
|
||||
SAVE_OUTPUT(1);
|
||||
SAVE_OUTPUT(2);
|
||||
SAVE_OUTPUT(3);
|
||||
SAVE_OUTPUT(4);
|
||||
SAVE_OUTPUT(5);
|
||||
SAVE_OUTPUT(6);
|
||||
SAVE_OUTPUT(7);
|
||||
|
||||
SAVE_OUTPUT(8);
|
||||
SAVE_OUTPUT(9);
|
||||
SAVE_OUTPUT(10);
|
||||
SAVE_OUTPUT(11);
|
||||
SAVE_OUTPUT(12);
|
||||
SAVE_OUTPUT(13);
|
||||
SAVE_OUTPUT(14);
|
||||
SAVE_OUTPUT(15);
|
||||
xPtr++;
|
||||
lutPtr++;
|
||||
intCnt += 16;
|
||||
inputCnt += 16;
|
||||
if (intCnt >= out_len && inputCnt < in_len - 16) {
|
||||
/* Copy last elements */
|
||||
if ((out_len%16) == 12) {
|
||||
for (int j=(nwrapps+1)*out_len-12;j<(nwrapps+1)*out_len;j++) {
|
||||
output[deinter[j%out_len]] += input[j];
|
||||
inputCnt++;
|
||||
}
|
||||
} else {
|
||||
for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) {
|
||||
output[deinter[j%out_len]] += input[j];
|
||||
inputCnt++;
|
||||
}
|
||||
}
|
||||
/* And wrap pointers */
|
||||
nwrapps++;
|
||||
intCnt = 16;
|
||||
xPtr = (const __m256i*) &input[nwrapps*out_len];
|
||||
lutPtr = (const __m256i*) deinter;
|
||||
}
|
||||
}
|
||||
for (int i=inputCnt;i<in_len;i++) {
|
||||
output[deinter[i%out_len]] += input[i];
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else {
|
||||
printf("Invalid inputs rv_idx=%d, cb_idx=%d\n", rv_idx, cb_idx);
|
||||
return SRSLTE_ERROR_INVALID_INPUTS;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -391,7 +391,7 @@ int srslte_tdec_gen_run_all(srslte_tdec_gen_t * h, float * input, uint8_t *outpu
|
|||
iter++;
|
||||
} while (iter < nof_iterations);
|
||||
|
||||
srslte_tdec_gen_decision(h, output, long_cb);
|
||||
srslte_tdec_gen_decision_byte(h, output, long_cb);
|
||||
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -37,8 +37,11 @@
|
|||
|
||||
#include <inttypes.h>
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
#define NUMSTATES 8
|
||||
#define NINPUTS 2
|
||||
|
@ -55,11 +58,13 @@
|
|||
*
|
||||
************************************************/
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
|
||||
static inline int16_t hMax(__m128i buffer)
|
||||
{
|
||||
__m128i tmp1 = _mm_sub_epi8(_mm_set1_epi16(0x7FFF), buffer);
|
||||
__m128i tmp3 = _mm_minpos_epu16(tmp1);
|
||||
return (int16_t)(_mm_cvtsi128_si32(tmp3));
|
||||
__m128i tmp1 = _mm_sub_epi8(_mm_set1_epi16(0x7FFF), buffer);
|
||||
__m128i tmp3 = _mm_minpos_epu16(tmp1);
|
||||
return (int16_t)(_mm_cvtsi128_si32(tmp3));
|
||||
}
|
||||
|
||||
void srslte_map_gen_beta(srslte_map_gen_t * s, int16_t * output, uint32_t long_cb)
|
||||
|
@ -626,3 +631,7 @@ int srslte_tdec_sse_run_all(srslte_tdec_sse_t * h, int16_t * input, uint8_t *out
|
|||
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
@ -118,12 +118,10 @@ int main(int argc, char **argv) {
|
|||
float var[SNR_POINTS];
|
||||
uint32_t snr_points;
|
||||
uint32_t errors;
|
||||
uint32_t errors_gen;
|
||||
uint32_t coded_length;
|
||||
struct timeval tdata[3];
|
||||
float mean_usec, mean_usec_gen;
|
||||
srslte_tdec_sse_t tdec;
|
||||
srslte_tdec_gen_t tdec_gen;
|
||||
float mean_usec;
|
||||
srslte_tdec_t tdec;
|
||||
srslte_tcod_t tcod;
|
||||
|
||||
parse_args(argc, argv);
|
||||
|
@ -189,12 +187,7 @@ int main(int argc, char **argv) {
|
|||
exit(-1);
|
||||
}
|
||||
|
||||
if (srslte_tdec_sse_init(&tdec, frame_length)) {
|
||||
fprintf(stderr, "Error initiating Turbo decoder\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (srslte_tdec_gen_init(&tdec_gen, frame_length)) {
|
||||
if (srslte_tdec_init(&tdec, frame_length)) {
|
||||
fprintf(stderr, "Error initiating Turbo decoder\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
@ -216,9 +209,7 @@ int main(int argc, char **argv) {
|
|||
for (i = 0; i < snr_points; i++) {
|
||||
|
||||
mean_usec = 0;
|
||||
mean_usec_gen = 0;
|
||||
errors = 0;
|
||||
errors_gen = 0;
|
||||
frame_cnt = 0;
|
||||
while (frame_cnt < nof_frames) {
|
||||
/* generate data_tx */
|
||||
|
@ -249,8 +240,7 @@ int main(int argc, char **argv) {
|
|||
llr_s[j] = (int16_t) (100*llr[j]);
|
||||
}
|
||||
/* decoder */
|
||||
srslte_tdec_sse_reset(&tdec, frame_length);
|
||||
srslte_tdec_gen_reset(&tdec_gen, frame_length);
|
||||
srslte_tdec_reset(&tdec, frame_length);
|
||||
|
||||
uint32_t t;
|
||||
if (nof_iterations == -1) {
|
||||
|
@ -261,7 +251,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
gettimeofday(&tdata[1], NULL);
|
||||
for (int k=0;k<nof_repetitions;k++) {
|
||||
srslte_tdec_sse_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
|
||||
srslte_tdec_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
|
||||
}
|
||||
gettimeofday(&tdata[2], NULL);
|
||||
get_time_interval(tdata);
|
||||
|
@ -270,24 +260,11 @@ int main(int argc, char **argv) {
|
|||
srslte_bit_unpack_vector(data_rx_bytes, data_rx, frame_length);
|
||||
|
||||
errors += srslte_bit_diff(data_tx, data_rx, frame_length);
|
||||
|
||||
gettimeofday(&tdata[1], NULL);
|
||||
for (int k=0;k<nof_repetitions;k++) {
|
||||
srslte_tdec_gen_run_all(&tdec_gen, llr, data_rx, t, frame_length);
|
||||
}
|
||||
gettimeofday(&tdata[2], NULL);
|
||||
get_time_interval(tdata);
|
||||
mean_usec_gen = (float) mean_usec_gen * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
|
||||
|
||||
/* check errors */
|
||||
errors_gen += srslte_bit_diff(data_tx, data_rx, frame_length);
|
||||
|
||||
frame_cnt++;
|
||||
printf("Eb/No: %2.2f %10d/%d ", SNR_MIN + i * ebno_inc, frame_cnt, nof_frames);
|
||||
printf("BER: %.2e ", (float) errors / (frame_cnt * frame_length));
|
||||
printf("BER_gen: %.2e ", (float) errors_gen / (frame_cnt * frame_length));
|
||||
printf("%3.1f Mbps (%6.2f usec) -- gen: ", (float) frame_length / mean_usec, mean_usec);
|
||||
printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec_gen, mean_usec_gen);
|
||||
printf("%3.1f Mbps (%6.2f usec)", (float) frame_length / mean_usec, mean_usec);
|
||||
printf("\r");
|
||||
|
||||
}
|
||||
|
@ -297,10 +274,7 @@ int main(int argc, char **argv) {
|
|||
printf("\n");
|
||||
if (snr_points == 1) {
|
||||
if (errors) {
|
||||
printf("%d Errors in SSE\n", errors);
|
||||
}
|
||||
if (errors_gen) {
|
||||
printf("%d Errors in GEN\n", errors_gen);
|
||||
printf("%d Errors\n", errors);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -311,8 +285,7 @@ int main(int argc, char **argv) {
|
|||
free(llr_c);
|
||||
free(data_rx);
|
||||
|
||||
srslte_tdec_sse_free(&tdec);
|
||||
srslte_tdec_gen_free(&tdec_gen);
|
||||
srslte_tdec_free(&tdec);
|
||||
srslte_tcod_free(&tcod);
|
||||
|
||||
printf("\n");
|
||||
|
|
|
@ -35,6 +35,18 @@
|
|||
#include "srslte/mimo/precoding.h"
|
||||
#include "srslte/utils/vector.h"
|
||||
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#include <pmmintrin.h>
|
||||
int srslte_predecoding_single_sse(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate);
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
#include <immintrin.h>
|
||||
int srslte_predecoding_single_avx(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
/************************************************
|
||||
*
|
||||
|
@ -117,23 +129,138 @@ void srslte_precoding_free(srslte_precoding_t *q) {
|
|||
bzero(q, sizeof(srslte_precoding_t));
|
||||
}
|
||||
|
||||
/* ZF/MMSE SISO equalizer x=y(h'h+no)^(-1)h' (ZF if n0=0.0)*/
|
||||
int srslte_predecoding_single(srslte_precoding_t *q, cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
||||
if (nof_symbols <= q->max_frame_len) {
|
||||
// h'h
|
||||
srslte_vec_abs_square_cf(h, q->y_mod, nof_symbols);
|
||||
if (noise_estimate > 0.0) {
|
||||
// (h'h + n0)
|
||||
srslte_vec_sc_add_fff(q->y_mod, noise_estimate, q->y_mod, nof_symbols);
|
||||
#ifdef LV_HAVE_SSE
|
||||
|
||||
#define PROD(a,b) _mm_addsub_ps(_mm_mul_ps(a,_mm_moveldup_ps(b)),_mm_mul_ps(_mm_shuffle_ps(a,a,0xB1),_mm_movehdup_ps(b)))
|
||||
|
||||
int srslte_predecoding_single_sse(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
||||
|
||||
float *xPtr = (float*) x;
|
||||
const float *hPtr = (const float*) h;
|
||||
const float *yPtr = (const float*) y;
|
||||
|
||||
__m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
|
||||
|
||||
__m128 noise = _mm_set1_ps(noise_estimate);
|
||||
__m128 h1Val, h2Val, y1Val, y2Val, h12square, h1square, h2square, h1conj, h2conj, x1Val, x2Val;
|
||||
for (int i=0;i<nof_symbols/4;i++) {
|
||||
y1Val = _mm_load_ps(yPtr); yPtr+=4;
|
||||
y2Val = _mm_load_ps(yPtr); yPtr+=4;
|
||||
h1Val = _mm_load_ps(hPtr); hPtr+=4;
|
||||
h2Val = _mm_load_ps(hPtr); hPtr+=4;
|
||||
|
||||
h12square = _mm_hadd_ps(_mm_mul_ps(h1Val, h1Val), _mm_mul_ps(h2Val, h2Val));
|
||||
if (noise_estimate > 0) {
|
||||
h12square = _mm_add_ps(h12square, noise);
|
||||
}
|
||||
// y*h'
|
||||
srslte_vec_prod_conj_ccc(y, h, x, nof_symbols);
|
||||
// divide by (h'h+no)
|
||||
srslte_vec_div_cfc(x,q->y_mod,x,q->z_real,q->z_imag, nof_symbols);
|
||||
return nof_symbols;
|
||||
} else {
|
||||
return SRSLTE_ERROR;
|
||||
|
||||
h1square = _mm_shuffle_ps(h12square, h12square, _MM_SHUFFLE(1, 1, 0, 0));
|
||||
h2square = _mm_shuffle_ps(h12square, h12square, _MM_SHUFFLE(3, 3, 2, 2));
|
||||
|
||||
/* Conjugate channel */
|
||||
h1conj = _mm_xor_ps(h1Val, conjugator);
|
||||
h2conj = _mm_xor_ps(h2Val, conjugator);
|
||||
|
||||
/* Complex product */
|
||||
x1Val = PROD(y1Val, h1conj);
|
||||
x2Val = PROD(y2Val, h2conj);
|
||||
|
||||
x1Val = _mm_div_ps(x1Val, h1square);
|
||||
x2Val = _mm_div_ps(x2Val, h2square);
|
||||
|
||||
_mm_store_ps(xPtr, x1Val); xPtr+=4;
|
||||
_mm_store_ps(xPtr, x2Val); xPtr+=4;
|
||||
}
|
||||
for (int i=8*(nof_symbols/8);i<nof_symbols;i++) {
|
||||
x[i] = y[i]*conj(h[i])/(conj(h[i])*h[i]+noise_estimate);
|
||||
}
|
||||
return nof_symbols;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
|
||||
#define PROD_AVX(a,b) _mm256_addsub_ps(_mm256_mul_ps(a,_mm256_moveldup_ps(b)),_mm256_mul_ps(_mm256_shuffle_ps(a,a,0xB1),_mm256_movehdup_ps(b)))
|
||||
|
||||
|
||||
|
||||
int srslte_predecoding_single_avx(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
||||
|
||||
float *xPtr = (float*) x;
|
||||
const float *hPtr = (const float*) h;
|
||||
const float *yPtr = (const float*) y;
|
||||
|
||||
__m256 conjugator = _mm256_setr_ps(0, -0.f, 0, -0.f, 0, -0.f, 0, -0.f);
|
||||
|
||||
__m256 noise = _mm256_set1_ps(noise_estimate);
|
||||
__m256 h1Val, h2Val, y1Val, y2Val, h12square, h1square, h2square, h1_p, h2_p, h1conj, h2conj, x1Val, x2Val;
|
||||
|
||||
for (int i=0;i<nof_symbols/8;i++) {
|
||||
y1Val = _mm256_load_ps(yPtr); yPtr+=8;
|
||||
y2Val = _mm256_load_ps(yPtr); yPtr+=8;
|
||||
h1Val = _mm256_load_ps(hPtr); hPtr+=8;
|
||||
h2Val = _mm256_load_ps(hPtr); hPtr+=8;
|
||||
|
||||
__m256 t1 = _mm256_mul_ps(h1Val, h1Val);
|
||||
__m256 t2 = _mm256_mul_ps(h2Val, h2Val);
|
||||
h12square = _mm256_hadd_ps(_mm256_permute2f128_ps(t1, t2, 0x20), _mm256_permute2f128_ps(t1, t2, 0x31));
|
||||
if (noise_estimate > 0) {
|
||||
h12square = _mm256_add_ps(h12square, noise);
|
||||
}
|
||||
h1_p = _mm256_permute_ps(h12square, _MM_SHUFFLE(1, 1, 0, 0));
|
||||
h2_p = _mm256_permute_ps(h12square, _MM_SHUFFLE(3, 3, 2, 2));
|
||||
h1square = _mm256_permute2f128_ps(h1_p, h2_p, 2<<4);
|
||||
h2square = _mm256_permute2f128_ps(h1_p, h2_p, 3<<4 | 1);
|
||||
|
||||
/* Conjugate channel */
|
||||
h1conj = _mm256_xor_ps(h1Val, conjugator);
|
||||
h2conj = _mm256_xor_ps(h2Val, conjugator);
|
||||
|
||||
/* Complex product */
|
||||
x1Val = PROD_AVX(y1Val, h1conj);
|
||||
x2Val = PROD_AVX(y2Val, h2conj);
|
||||
|
||||
x1Val = _mm256_div_ps(x1Val, h1square);
|
||||
x2Val = _mm256_div_ps(x2Val, h2square);
|
||||
|
||||
_mm256_store_ps(xPtr, x1Val); xPtr+=8;
|
||||
_mm256_store_ps(xPtr, x2Val); xPtr+=8;
|
||||
}
|
||||
for (int i=16*(nof_symbols/16);i<nof_symbols;i++) {
|
||||
x[i] = y[i]*conj(h[i])/(conj(h[i])*h[i]+noise_estimate);
|
||||
}
|
||||
return nof_symbols;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int srslte_predecoding_single_gen(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
||||
for (int i=0;i<nof_symbols;i++) {
|
||||
x[i] = y[i]*conj(h[i])/(conj(h[i])*h[i]+noise_estimate);
|
||||
}
|
||||
return nof_symbols;
|
||||
}
|
||||
|
||||
/* ZF/MMSE SISO equalizer x=y(h'h+no)^(-1)h' (ZF if n0=0.0)*/
|
||||
int srslte_predecoding_single(cf_t *y, cf_t *h, cf_t *x, int nof_symbols, float noise_estimate) {
|
||||
#ifdef LV_HAVE_AVX
|
||||
if (nof_symbols > 32) {
|
||||
return srslte_predecoding_single_avx(y, h, x, nof_symbols, noise_estimate);
|
||||
} else {
|
||||
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
|
||||
}
|
||||
#else
|
||||
#ifdef LV_HAVE_SSE
|
||||
if (nof_symbols > 32) {
|
||||
return srslte_predecoding_single_sse(y, h, x, nof_symbols, noise_estimate);
|
||||
} else {
|
||||
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
|
||||
}
|
||||
#else
|
||||
return srslte_predecoding_single_gen(y, h, x, nof_symbols, noise_estimate);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ZF/MMSE STBC equalizer x=y(H'H+n0·I)^(-1)H' (ZF is n0=0.0)
|
||||
|
@ -257,7 +384,7 @@ int srslte_predecoding_type(srslte_precoding_t *q, cf_t *y, cf_t *h[SRSLTE_MAX_P
|
|||
switch (type) {
|
||||
case SRSLTE_MIMO_TYPE_SINGLE_ANTENNA:
|
||||
if (nof_ports == 1 && nof_layers == 1) {
|
||||
return srslte_predecoding_single(q, y, h[0], x[0], nof_symbols, noise_estimate);
|
||||
return srslte_predecoding_single(y, h[0], x[0], nof_symbols, noise_estimate);
|
||||
} else {
|
||||
fprintf(stderr,
|
||||
"Number of ports and layers must be 1 for transmission on single antenna ports\n");
|
||||
|
|
|
@ -102,7 +102,7 @@ int main(int argc, char **argv) {
|
|||
perror("srslte_vec_malloc");
|
||||
exit(-1);
|
||||
}
|
||||
xr[i] = calloc(1,sizeof(cf_t) * nof_symbols);
|
||||
xr[i] = srslte_vec_malloc(sizeof(cf_t) * nof_symbols);
|
||||
if (!xr[i]) {
|
||||
perror("srslte_vec_malloc");
|
||||
exit(-1);
|
||||
|
@ -186,7 +186,6 @@ int main(int argc, char **argv) {
|
|||
mse = 0;
|
||||
for (i = 0; i < nof_layers; i++) {
|
||||
for (j = 0; j < nof_symbols; j++) {
|
||||
printf("%f - %f\n", crealf(xr[i][j]), crealf(x[i][j]));
|
||||
mse += cabsf(xr[i][j] - x[i][j]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,16 +33,16 @@
|
|||
#include "srslte/utils/bit.h"
|
||||
#include "srslte/modem/demod_soft.h"
|
||||
|
||||
#define HAVE_SIMD
|
||||
// AVX implementation not useful for integers. Wait for AVX2
|
||||
|
||||
#ifdef HAVE_SIMD
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <xmmintrin.h>
|
||||
#include <pmmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols);
|
||||
#endif
|
||||
|
||||
|
||||
//#define SCALE_DEMOD16QAM
|
||||
|
||||
#define SCALE_SHORT_CONV_QPSK 100
|
||||
#define SCALE_SHORT_CONV_QAM16 400
|
||||
#define SCALE_SHORT_CONV_QAM64 700
|
||||
|
@ -72,48 +72,17 @@ void demod_16qam_lte(const cf_t *symbols, float *llr, int nsymbols) {
|
|||
float yre = crealf(symbols[i]);
|
||||
float yim = cimagf(symbols[i]);
|
||||
|
||||
#ifdef SCALE_DEMOD16QAM
|
||||
|
||||
llr[4*i+2] = (fabsf(yre)-2/sqrt(10))*sqrt(10);
|
||||
llr[4*i+3] = (fabsf(yim)-2/sqrt(10))*sqrt(10);
|
||||
|
||||
if (llr[4*i+2] > 0) {
|
||||
llr[4*i+0] = -yre/(3/sqrt(10));
|
||||
} else {
|
||||
llr[4*i+0] = -yre/(1/sqrt(10));
|
||||
}
|
||||
if (llr[4*i+3] > 0) {
|
||||
llr[4*i+1] = -yim/(3/sqrt(10));
|
||||
} else {
|
||||
llr[4*i+1] = -yim/(1/sqrt(10));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
llr[4*i+0] = -yre;
|
||||
llr[4*i+1] = -yim;
|
||||
llr[4*i+2] = fabsf(yre)-2/sqrt(10);
|
||||
llr[4*i+3] = fabsf(yim)-2/sqrt(10);
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
#ifndef HAVE_SIMD
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
short yre = (short) (SCALE_SHORT_CONV_QAM16*crealf(symbols[i]));
|
||||
short yim = (short) (SCALE_SHORT_CONV_QAM16*cimagf(symbols[i]));
|
||||
|
||||
llr[4*i+0] = -yre;
|
||||
llr[4*i+1] = -yim;
|
||||
llr[4*i+2] = abs(yre)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
|
||||
llr[4*i+3] = abs(yim)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
|
||||
}
|
||||
#else
|
||||
#ifdef LV_HAVE_SSE
|
||||
|
||||
float *symbolsPtr = (float*) symbols;
|
||||
void demod_16qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
float *symbolsPtr = (float*) symbols;
|
||||
__m128i *resultPtr = (__m128i*) llr;
|
||||
__m128 symbol1, symbol2;
|
||||
__m128i symbol_i1, symbol_i2, symbol_i, symbol_abs;
|
||||
|
@ -148,6 +117,22 @@ void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
|||
short yre = (short) (SCALE_SHORT_CONV_QAM16*crealf(symbols[i]));
|
||||
short yim = (short) (SCALE_SHORT_CONV_QAM16*cimagf(symbols[i]));
|
||||
|
||||
llr[4*i+0] = -yre;
|
||||
llr[4*i+1] = -yim;
|
||||
llr[4*i+2] = abs(yre)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
|
||||
llr[4*i+3] = abs(yim)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
#ifdef LV_HAVE_SSE
|
||||
demod_16qam_lte_s_sse(symbols, llr, nsymbols);
|
||||
#else
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
short yre = (short) (SCALE_SHORT_CONV_QAM16*crealf(symbols[i]));
|
||||
short yim = (short) (SCALE_SHORT_CONV_QAM16*cimagf(symbols[i]));
|
||||
|
||||
llr[4*i+0] = -yre;
|
||||
llr[4*i+1] = -yim;
|
||||
llr[4*i+2] = abs(yre)-2*SCALE_SHORT_CONV_QAM16/sqrt(10);
|
||||
|
@ -172,21 +157,10 @@ void demod_64qam_lte(const cf_t *symbols, float *llr, int nsymbols)
|
|||
|
||||
}
|
||||
|
||||
void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
|
||||
{
|
||||
#ifndef HAVE_SIMD
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
float yre = (short) (SCALE_SHORT_CONV_QAM64*crealf(symbols[i]));
|
||||
float yim = (short) (SCALE_SHORT_CONV*cimagf(symbols[i]));
|
||||
#ifdef LV_HAVE_SSE
|
||||
|
||||
llr[6*i+0] = -yre;
|
||||
llr[6*i+1] = -yim;
|
||||
llr[6*i+2] = abs(yre)-4*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+3] = abs(yim)-4*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+4] = abs(llr[6*i+2])-2*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
}
|
||||
#else
|
||||
void demod_64qam_lte_s_sse(const cf_t *symbols, short *llr, int nsymbols)
|
||||
{
|
||||
float *symbolsPtr = (float*) symbols;
|
||||
__m128i *resultPtr = (__m128i*) llr;
|
||||
__m128 symbol1, symbol2;
|
||||
|
@ -239,6 +213,26 @@ void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
|
|||
float yre = (short) (SCALE_SHORT_CONV_QAM64*crealf(symbols[i]));
|
||||
float yim = (short) (SCALE_SHORT_CONV_QAM64*cimagf(symbols[i]));
|
||||
|
||||
llr[6*i+0] = -yre;
|
||||
llr[6*i+1] = -yim;
|
||||
llr[6*i+2] = abs(yre)-4*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+3] = abs(yim)-4*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+4] = abs(llr[6*i+2])-2*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
demod_64qam_lte_s_sse(symbols, llr, nsymbols);
|
||||
#else
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
float yre = (short) (SCALE_SHORT_CONV_QAM64*crealf(symbols[i]));
|
||||
float yim = (short) (SCALE_SHORT_CONV_QAM64*cimagf(symbols[i]));
|
||||
|
||||
llr[6*i+0] = -yre;
|
||||
llr[6*i+1] = -yim;
|
||||
llr[6*i+2] = abs(yre)-4*SCALE_SHORT_CONV_QAM64/sqrt(42);
|
||||
|
|
|
@ -470,8 +470,7 @@ int srslte_pbch_decode(srslte_pbch_t *q, cf_t *slot1_symbols, cf_t *ce_slot1[SRS
|
|||
/* in control channels, only diversity is supported */
|
||||
if (nant == 1) {
|
||||
/* no need for layer demapping */
|
||||
srslte_predecoding_single(&q->precoding, q->symbols[0], q->ce[0], q->d,
|
||||
q->nof_symbols, noise_estimate);
|
||||
srslte_predecoding_single(q->symbols[0], q->ce[0], q->d, q->nof_symbols, noise_estimate);
|
||||
} else {
|
||||
srslte_predecoding_diversity(&q->precoding, q->symbols[0], q->ce, x, nant,
|
||||
q->nof_symbols, noise_estimate);
|
||||
|
|
|
@ -193,8 +193,7 @@ int srslte_pcfich_decode(srslte_pcfich_t *q, cf_t *slot_symbols, cf_t *ce[SRSLTE
|
|||
/* in control channels, only diversity is supported */
|
||||
if (q->cell.nof_ports == 1) {
|
||||
/* no need for layer demapping */
|
||||
srslte_predecoding_single(&q->precoding, q->symbols[0], q->ce[0], q->d,
|
||||
q->nof_symbols, noise_estimate);
|
||||
srslte_predecoding_single(q->symbols[0], q->ce[0], q->d, q->nof_symbols, noise_estimate);
|
||||
} else {
|
||||
srslte_predecoding_diversity(&q->precoding, q->symbols[0], ce_precoding, x,
|
||||
q->cell.nof_ports, q->nof_symbols, noise_estimate);
|
||||
|
|
|
@ -408,7 +408,7 @@ int srslte_pdcch_extract_llr(srslte_pdcch_t *q, cf_t *sf_symbols, cf_t *ce[SRSLT
|
|||
/* in control channels, only diversity is supported */
|
||||
if (q->cell.nof_ports == 1) {
|
||||
/* no need for layer demapping */
|
||||
srslte_predecoding_single(&q->precoding, q->symbols[0], q->ce[0], q->d, nof_symbols, noise_estimate);
|
||||
srslte_predecoding_single(q->symbols[0], q->ce[0], q->d, nof_symbols, noise_estimate);
|
||||
} else {
|
||||
srslte_predecoding_diversity(&q->precoding, q->symbols[0], q->ce, x, q->cell.nof_ports, nof_symbols, noise_estimate);
|
||||
srslte_layerdemap_diversity(x, q->d, q->cell.nof_ports, nof_symbols / q->cell.nof_ports);
|
||||
|
|
|
@ -404,8 +404,7 @@ int srslte_pdsch_decode_rnti(srslte_pdsch_t *q,
|
|||
/* TODO: only diversity is supported */
|
||||
if (q->cell.nof_ports == 1) {
|
||||
/* no need for layer demapping */
|
||||
srslte_predecoding_single(&q->precoding, q->symbols[0], q->ce[0], q->d,
|
||||
cfg->nbits.nof_re, noise_estimate);
|
||||
srslte_predecoding_single(q->symbols[0], q->ce[0], q->d, cfg->nbits.nof_re, noise_estimate);
|
||||
} else {
|
||||
srslte_predecoding_diversity(&q->precoding, q->symbols[0], q->ce, x, q->cell.nof_ports,
|
||||
cfg->nbits.nof_re, noise_estimate);
|
||||
|
|
|
@ -216,8 +216,7 @@ int srslte_phich_decode(srslte_phich_t *q, cf_t *slot_symbols, cf_t *ce[SRSLTE_M
|
|||
/* in control channels, only diversity is supported */
|
||||
if (q->cell.nof_ports == 1) {
|
||||
/* no need for layer demapping */
|
||||
srslte_predecoding_single(&q->precoding, q->symbols[0], q->ce[0], q->d0,
|
||||
SRSLTE_PHICH_MAX_NSYMB, noise_estimate);
|
||||
srslte_predecoding_single(q->symbols[0], q->ce[0], q->d0, SRSLTE_PHICH_MAX_NSYMB, noise_estimate);
|
||||
} else {
|
||||
srslte_predecoding_diversity(&q->precoding, q->symbols[0], ce_precoding, x,
|
||||
q->cell.nof_ports, SRSLTE_PHICH_MAX_NSYMB, noise_estimate);
|
||||
|
|
|
@ -438,8 +438,7 @@ int srslte_pusch_decode(srslte_pusch_t *q,
|
|||
return SRSLTE_ERROR;
|
||||
}
|
||||
|
||||
srslte_predecoding_single(&q->equalizer, q->d, q->ce, q->z,
|
||||
cfg->nbits.nof_re, noise_estimate);
|
||||
srslte_predecoding_single(q->d, q->ce, q->z, cfg->nbits.nof_re, noise_estimate);
|
||||
|
||||
srslte_dft_predecoding(&q->dft_precoding, q->z, q->d, cfg->grant.L_prb, cfg->nbits.nof_symb);
|
||||
|
||||
|
|
|
@ -111,7 +111,7 @@ int srslte_sch_init(srslte_sch_t *q) {
|
|||
fprintf(stderr, "Error initiating Turbo Coder\n");
|
||||
goto clean;
|
||||
}
|
||||
if (srslte_tdec_sse_init(&q->decoder, SRSLTE_TCOD_MAX_LEN_CB)) {
|
||||
if (srslte_tdec_init(&q->decoder, SRSLTE_TCOD_MAX_LEN_CB)) {
|
||||
fprintf(stderr, "Error initiating Turbo Decoder\n");
|
||||
goto clean;
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ int srslte_sch_init(srslte_sch_t *q) {
|
|||
goto clean;
|
||||
}
|
||||
bzero(q->temp_g_bits, SRSLTE_MAX_PRB*12*12*12);
|
||||
q->ul_interleaver = srslte_vec_malloc(sizeof(uint32_t)*SRSLTE_MAX_PRB*12*12*12);
|
||||
q->ul_interleaver = srslte_vec_malloc(sizeof(uint16_t)*SRSLTE_MAX_PRB*12*12*12);
|
||||
if (!q->ul_interleaver) {
|
||||
goto clean;
|
||||
}
|
||||
|
@ -163,7 +163,7 @@ void srslte_sch_free(srslte_sch_t *q) {
|
|||
if (q->ul_interleaver) {
|
||||
free(q->ul_interleaver);
|
||||
}
|
||||
srslte_tdec_sse_free(&q->decoder);
|
||||
srslte_tdec_free(&q->decoder);
|
||||
srslte_tcod_free(&q->encoder);
|
||||
srslte_uci_cqi_free(&q->uci_cqi);
|
||||
bzero(q, sizeof(srslte_sch_t));
|
||||
|
@ -413,10 +413,10 @@ static int decode_tb(srslte_sch_t *q,
|
|||
srslte_crc_t *crc_ptr;
|
||||
early_stop = false;
|
||||
|
||||
srslte_tdec_sse_reset(&q->decoder, cb_len);
|
||||
srslte_tdec_reset(&q->decoder, cb_len);
|
||||
|
||||
do {
|
||||
srslte_tdec_sse_iteration(&q->decoder, softbuffer->buffer_f[i], cb_len);
|
||||
srslte_tdec_iteration(&q->decoder, softbuffer->buffer_f[i], cb_len);
|
||||
q->nof_iterations++;
|
||||
|
||||
if (cb_segm->C > 1) {
|
||||
|
@ -427,10 +427,10 @@ static int decode_tb(srslte_sch_t *q,
|
|||
crc_ptr = &q->crc_tb;
|
||||
}
|
||||
|
||||
srslte_tdec_sse_decision_byte(&q->decoder, q->cb_in, cb_len);
|
||||
srslte_tdec_decision_byte(&q->decoder, q->cb_in, cb_len);
|
||||
|
||||
if (i == 9) {
|
||||
srslte_tdec_sse_decision(&q->decoder, q->temp_data, cb_len);
|
||||
srslte_tdec_decision(&q->decoder, q->temp_data, cb_len);
|
||||
}
|
||||
/* Check Codeblock CRC and stop early if incorrect */
|
||||
if (!srslte_crc_checksum_byte(crc_ptr, q->cb_in, len_crc)) {
|
||||
|
@ -525,7 +525,7 @@ int srslte_ulsch_decode(srslte_sch_t *q, srslte_pusch_cfg_t *cfg, srslte_softbuf
|
|||
/* UL-SCH channel interleaver according to 5.2.2.8 of 36.212 */
|
||||
void ulsch_interleave(uint8_t *g_bits, uint32_t Qm, uint32_t H_prime_total,
|
||||
uint32_t N_pusch_symbs, uint8_t *q_bits, srslte_uci_bit_t *ri_bits, uint32_t nof_ri_bits,
|
||||
uint32_t *interleaver_buffer, uint8_t *temp_buffer, uint32_t buffer_sz)
|
||||
uint16_t *interleaver_buffer, uint8_t *temp_buffer, uint32_t buffer_sz)
|
||||
{
|
||||
|
||||
uint32_t rows = H_prime_total/N_pusch_symbs;
|
||||
|
|
|
@ -147,24 +147,24 @@ int main(int argc, char **argv) {
|
|||
|
||||
/* init memory */
|
||||
for (i=0;i<cell.nof_ports;i++) {
|
||||
ce[i] = malloc(sizeof(cf_t) * SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp));
|
||||
ce[i] = srslte_vec_malloc(sizeof(cf_t) * SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp));
|
||||
if (!ce[i]) {
|
||||
perror("malloc");
|
||||
perror("srslte_vec_malloc");
|
||||
goto quit;
|
||||
}
|
||||
for (j=0;j<SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp);j++) {
|
||||
ce[i][j] = 1;
|
||||
}
|
||||
slot_symbols[i] = calloc(sizeof(cf_t) , SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp));
|
||||
slot_symbols[i] = srslte_vec_malloc(sizeof(cf_t)*SRSLTE_SF_LEN_RE(cell.nof_prb, cell.cp));
|
||||
if (!slot_symbols[i]) {
|
||||
perror("malloc");
|
||||
perror("srslte_vec_malloc");
|
||||
goto quit;
|
||||
}
|
||||
}
|
||||
|
||||
data = srslte_vec_malloc(sizeof(uint8_t) * grant.mcs.tbs/8);
|
||||
if (!data) {
|
||||
perror("malloc");
|
||||
perror("srslte_vec_malloc");
|
||||
goto quit;
|
||||
}
|
||||
|
||||
|
|
|
@ -240,6 +240,17 @@ void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
|
|||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_convert_if(int16_t *x, float *z, float scale, uint32_t len) {
|
||||
#ifndef HAVE_VOLK_CONVERT_IF_FUNCTION
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = ((float) x[i])*scale;
|
||||
}
|
||||
#else
|
||||
volk_16i_s32f_convert_32f(z,x,scale,len);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
|
||||
#ifndef HAVE_VECTOR_SIMD
|
||||
int i;
|
||||
|
@ -303,7 +314,12 @@ void srslte_vec_deinterleave_real_cf(cf_t *x, float *real, uint32_t len) {
|
|||
|
||||
void *srslte_vec_malloc(uint32_t size) {
|
||||
#ifndef HAVE_VOLK
|
||||
return malloc(size);
|
||||
void *ptr;
|
||||
if (posix_memalign(&ptr,32,size)) {
|
||||
return NULL;
|
||||
} else {
|
||||
return ptr;
|
||||
}
|
||||
#else
|
||||
void *ptr;
|
||||
if (posix_memalign(&ptr,volk_get_alignment(),size)) {
|
||||
|
|
|
@ -37,18 +37,15 @@
|
|||
#include <inttypes.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <xmmintrin.h>
|
||||
#ifdef LV_HAVE_SSE
|
||||
#include <emmintrin.h>
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
void print128_num(__m128i var)
|
||||
{
|
||||
int16_t *val = (int16_t*) &var;//can also use uint32_t instead of 16_t
|
||||
printf("Numerical: %d %d %d %d %d %d %d %d \n",
|
||||
val[0], val[1], val[2], val[3], val[4], val[5],
|
||||
val[6], val[7]);
|
||||
}
|
||||
|
||||
void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int points = len / 8;
|
||||
|
||||
|
@ -75,10 +72,13 @@ void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len)
|
|||
for(;number < len; number++){
|
||||
z[number] = x[number] + y[number];
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int points = len / 8;
|
||||
|
||||
|
@ -105,10 +105,12 @@ void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
|
|||
for(;number < len; number++){
|
||||
z[number] = x[number] - y[number];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int points = len / 8;
|
||||
|
||||
|
@ -135,10 +137,12 @@ void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len)
|
|||
for(;number < len; number++){
|
||||
z[number] = x[number] * y[number];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int points = len / 8;
|
||||
|
||||
|
@ -163,10 +167,13 @@ void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
|
|||
for(;number < len; number++){
|
||||
z[number] = x[number] / divn;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/* No improvement with AVX */
|
||||
void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
const unsigned int points = len / 8;
|
||||
|
||||
|
@ -192,12 +199,13 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l
|
|||
for(;number < len; number++){
|
||||
y[lut[number]] = x[number];
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Modified from volk_32f_s32f_convert_16i_a_sse2. Removed clipping */
|
||||
/* Modified from volk_32f_s32f_convert_16i_a_simd2. Removed clipping */
|
||||
void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
|
||||
{
|
||||
#ifdef LV_HAVE_SSE
|
||||
unsigned int number = 0;
|
||||
|
||||
const unsigned int eighthPoints = len / 8;
|
||||
|
@ -230,5 +238,5 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
|
|||
for(; number < len; number++){
|
||||
z[number] = (int16_t) (x[number] * scale);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
Loading…
Reference in New Issue