diff --git a/matlab/common/write_int16.m b/matlab/common/write_int16.m new file mode 100644 index 000000000..afd6e0ff4 --- /dev/null +++ b/matlab/common/write_int16.m @@ -0,0 +1,14 @@ +function [ out ] = write_int16( filename, x) +%READ_COMPLEX Summary of this function goes here +% Detailed explanation goes here + + [tidin msg]=fopen(filename,'w'); + if (tidin==-1) + fprintf('error opening %s: %s\n',filename, msg); + out=[]; + return + end + + fwrite(tidin,x,'int16'); +end + diff --git a/matlab/common/write_real.m b/matlab/common/write_real.m new file mode 100644 index 000000000..039906b16 --- /dev/null +++ b/matlab/common/write_real.m @@ -0,0 +1,14 @@ +function [ out ] = write_complex( filename, x) +%READ_COMPLEX Summary of this function goes here +% Detailed explanation goes here + + [tidin msg]=fopen(filename,'w'); + if (tidin==-1) + fprintf('error opening %s: %s\n',filename, msg); + out=[]; + return + end + + fwrite(tidin,x,'single'); +end + diff --git a/matlab/common/write_uchar.m b/matlab/common/write_uchar.m new file mode 100644 index 000000000..7ba41fb46 --- /dev/null +++ b/matlab/common/write_uchar.m @@ -0,0 +1,14 @@ +function [ out ] = write_uchar( filename, x) +%READ_COMPLEX Summary of this function goes here +% Detailed explanation goes here + + [tidin msg]=fopen(filename,'w'); + if (tidin==-1) + fprintf('error opening %s: %s\n',filename, msg); + out=[]; + return + end + + fwrite(tidin,x,'uint8'); +end + diff --git a/srslte/lib/fec/src/rm_turbo.c b/srslte/lib/fec/src/rm_turbo.c index 7cdeb182c..7e4467994 100644 --- a/srslte/lib/fec/src/rm_turbo.c +++ b/srslte/lib/fec/src/rm_turbo.c @@ -37,7 +37,7 @@ #include "srslte/utils/vector.h" #include "srslte/fec/cbsegm.h" -//#define HAVE_SIMD +#define HAVE_SIMD #ifdef HAVE_SIMD #include @@ -321,45 +321,60 @@ int srslte_rm_turbo_rx_lut_simd(int16_t *input, int16_t *output, uint32_t in_len const __m128i* xPtr = (const __m128i*) input; const __m128i* lutPtr = (const __m128i*) deinter; - printf("\nin_len=%d, out_len=%d\n", in_len, out_len); - srslte_vec_fprint_s(stdout, input, in_len); __m128i xVal, lutVal; - int intCnt = 8; - int nwrapps = 0; - for (int i=0;i= out_len) { - /* Copy last elements */ - for (int j=nwrapps*out_len+intCnt-8;j<(nwrapps+1)*out_len;j++) { - printf("coping element %d (in=%d)\n", j, input[j]); - output[deinter[j]] += input[j]; - } - /* And wrap pointers */ - nwrapps++; - printf("--- Wrapping: intCnt=%d, nwrap=%d\n",intCnt, nwrapps); - intCnt = 8; - xPtr = (const __m128i*) &input[nwrapps*out_len]; - lutPtr = (const __m128i*) deinter; - } + /* Simplify load if we do not need to wrap (ie high rates) */ + if (in_len <= out_len) { + for (int i=0;i= out_len && inputCnt < in_len - 8) { + /* Copy last elements */ + for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) { + output[deinter[j%out_len]] += input[j]; + inputCnt++; + } + /* And wrap pointers */ + nwrapps++; + intCnt = 8; + xPtr = (const __m128i*) &input[nwrapps*out_len]; + lutPtr = (const __m128i*) deinter; + } + } + for (int i=inputCnt;i