Added SIMD turbo rate recovery

This commit is contained in:
ismagom 2015-10-14 23:56:53 +02:00
parent 88cd40420a
commit 9e685eca32
5 changed files with 93 additions and 36 deletions

View File

@ -0,0 +1,14 @@
function [ out ] = write_int16( filename, x)
%READ_COMPLEX Summary of this function goes here
% Detailed explanation goes here
[tidin msg]=fopen(filename,'w');
if (tidin==-1)
fprintf('error opening %s: %s\n',filename, msg);
out=[];
return
end
fwrite(tidin,x,'int16');
end

View File

@ -0,0 +1,14 @@
function [ out ] = write_complex( filename, x)
%READ_COMPLEX Summary of this function goes here
% Detailed explanation goes here
[tidin msg]=fopen(filename,'w');
if (tidin==-1)
fprintf('error opening %s: %s\n',filename, msg);
out=[];
return
end
fwrite(tidin,x,'single');
end

View File

@ -0,0 +1,14 @@
function [ out ] = write_uchar( filename, x)
%READ_COMPLEX Summary of this function goes here
% Detailed explanation goes here
[tidin msg]=fopen(filename,'w');
if (tidin==-1)
fprintf('error opening %s: %s\n',filename, msg);
out=[];
return
end
fwrite(tidin,x,'uint8');
end

View File

@ -37,7 +37,7 @@
#include "srslte/utils/vector.h"
#include "srslte/fec/cbsegm.h"
//#define HAVE_SIMD
#define HAVE_SIMD
#ifdef HAVE_SIMD
#include <xmmintrin.h>
@ -321,45 +321,60 @@ int srslte_rm_turbo_rx_lut_simd(int16_t *input, int16_t *output, uint32_t in_len
const __m128i* xPtr = (const __m128i*) input;
const __m128i* lutPtr = (const __m128i*) deinter;
printf("\nin_len=%d, out_len=%d\n", in_len, out_len);
srslte_vec_fprint_s(stdout, input, in_len);
__m128i xVal, lutVal;
int intCnt = 8;
int nwrapps = 0;
for (int i=0;i<in_len/8-1-nwrapps/2;i++) {
xVal = _mm_loadu_si128(xPtr);
lutVal = _mm_load_si128(lutPtr);
for (int j=0;j<8;j++) {
int16_t x = (int16_t) _mm_extract_epi16(xVal, j);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j);
output[l] += x;
}
printf("x: "); print128_num(xVal);
printf("l: "); print128_num(lutVal);
xPtr ++;
lutPtr ++;
intCnt += 8;
if (intCnt >= out_len) {
/* Copy last elements */
for (int j=nwrapps*out_len+intCnt-8;j<(nwrapps+1)*out_len;j++) {
printf("coping element %d (in=%d)\n", j, input[j]);
output[deinter[j]] += input[j];
}
/* And wrap pointers */
nwrapps++;
printf("--- Wrapping: intCnt=%d, nwrap=%d\n",intCnt, nwrapps);
intCnt = 8;
xPtr = (const __m128i*) &input[nwrapps*out_len];
lutPtr = (const __m128i*) deinter;
}
/* Simplify load if we do not need to wrap (ie high rates) */
if (in_len <= out_len) {
for (int i=0;i<in_len/8;i++) {
xVal = _mm_load_si128(xPtr);
lutVal = _mm_load_si128(lutPtr);
for (int j=0;j<8;j++) {
int16_t x = (int16_t) _mm_extract_epi16(xVal, j);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j);
output[l] += x;
}
xPtr ++;
lutPtr ++;
}
for (int i=8*(in_len/8);i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
} else {
int intCnt = 8;
int inputCnt = 0;
int nwrapps = 0;
while(inputCnt < in_len - 8) {
xVal = _mm_loadu_si128(xPtr);
lutVal = _mm_load_si128(lutPtr);
for (int j=0;j<8;j++) {
int16_t x = (int16_t) _mm_extract_epi16(xVal, j);
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, j);
output[l] += x;
}
xPtr++;
lutPtr++;
intCnt += 8;
inputCnt += 8;
if (intCnt >= out_len && inputCnt < in_len - 8) {
/* Copy last elements */
for (int j=(nwrapps+1)*out_len-4;j<(nwrapps+1)*out_len;j++) {
output[deinter[j%out_len]] += input[j];
inputCnt++;
}
/* And wrap pointers */
nwrapps++;
intCnt = 8;
xPtr = (const __m128i*) &input[nwrapps*out_len];
lutPtr = (const __m128i*) deinter;
}
}
for (int i=inputCnt;i<in_len;i++) {
output[deinter[i%out_len]] += input[i];
}
}
for (int i=8*(in_len/8-1)+(((8*(in_len/8))%out_len)%8);i<in_len;i++) {
printf("copying i=%d, val=%d, t=%d\n",i, input[i],deinter[i%out_len]);
output[deinter[i%out_len]] += input[i];
}
return 0;
} else {

View File

@ -176,7 +176,7 @@ int main(int argc, char **argv) {
printf("OK TX...");
for (int i=0;i<nof_e_bits;i++) {
rm_bits_f[i] = i;//rand()%10-5;
rm_bits_f[i] = rand()%10-5;
rm_bits_s[i] = (short) rm_bits_f[i];
}