mirror of https://github.com/PentHertz/srsLTE.git
Turbo decoder working with hard decision
This commit is contained in:
parent
ad06998d91
commit
19256c261e
|
@ -43,6 +43,10 @@ SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t l
|
|||
|
||||
SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len);
|
||||
|
||||
SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -207,7 +207,7 @@ void srslte_map_gen_alpha(srslte_map_gen_t * s, uint32_t long_cb)
|
|||
__m128i gv;
|
||||
__m128i *gPtr = (__m128i*) s->branch;
|
||||
__m128i g, ap, an;
|
||||
|
||||
|
||||
__m128i alpha_k = _mm_set_epi16(-INF, -INF, -INF, -INF, -INF, -INF, -INF, 0);
|
||||
|
||||
#define ALPHA_STEP(c) g = _mm_shuffle_epi8(gv, shuf_g[c]); \
|
||||
|
@ -564,13 +564,13 @@ void srslte_tdec_iteration(srslte_tdec_t * h, float * input, uint32_t long_cb)
|
|||
}
|
||||
|
||||
// Interleave extrinsic output of DEC1 to form apriori info for decoder 2
|
||||
srslte_vec_lut_sss(h->ext1, inter, h->app2, long_cb);
|
||||
srslte_vec_lut_sss(h->ext1, deinter, h->app2, long_cb);
|
||||
|
||||
// Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits
|
||||
srslte_map_gen_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, long_cb);
|
||||
|
||||
// Deinterleaved extrinsic bits become apriori info for decoder 1
|
||||
srslte_vec_lut_sss(h->ext2, deinter, h->app1, long_cb);
|
||||
srslte_vec_lut_sss(h->ext2, inter, h->app1, long_cb);
|
||||
|
||||
h->n_iter++;
|
||||
} else {
|
||||
|
@ -626,14 +626,14 @@ void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long
|
|||
|
||||
// long_cb is always byte aligned
|
||||
for (uint32_t i = 0; i < long_cb/8; i++) {
|
||||
uint8_t out0 = h->app1[i+0]>0?mask[0]:0;
|
||||
uint8_t out1 = h->app1[i+1]>0?mask[1]:0;
|
||||
uint8_t out2 = h->app1[i+2]>0?mask[2]:0;
|
||||
uint8_t out3 = h->app1[i+3]>0?mask[3]:0;
|
||||
uint8_t out4 = h->app1[i+4]>0?mask[4]:0;
|
||||
uint8_t out5 = h->app1[i+5]>0?mask[5]:0;
|
||||
uint8_t out6 = h->app1[i+6]>0?mask[6]:0;
|
||||
uint8_t out7 = h->app1[i+7]>0?mask[7]:0;
|
||||
uint8_t out0 = h->app1[8*i+0]>0?mask[0]:0;
|
||||
uint8_t out1 = h->app1[8*i+1]>0?mask[1]:0;
|
||||
uint8_t out2 = h->app1[8*i+2]>0?mask[2]:0;
|
||||
uint8_t out3 = h->app1[8*i+3]>0?mask[3]:0;
|
||||
uint8_t out4 = h->app1[8*i+4]>0?mask[4]:0;
|
||||
uint8_t out5 = h->app1[8*i+5]>0?mask[5]:0;
|
||||
uint8_t out6 = h->app1[8*i+6]>0?mask[6]:0;
|
||||
uint8_t out7 = h->app1[8*i+7]>0?mask[7]:0;
|
||||
|
||||
output[i] = out0 | out1 | out2 | out3 | out4 | out5 | out6 | out7;
|
||||
}
|
||||
|
@ -650,7 +650,7 @@ int srslte_tdec_run_all(srslte_tdec_t * h, float * input, uint8_t *output,
|
|||
srslte_tdec_iteration(h, input, long_cb);
|
||||
} while (h->n_iter < nof_iterations);
|
||||
|
||||
srslte_tdec_decision(h, output, long_cb);
|
||||
srslte_tdec_decision_byte(h, output, long_cb);
|
||||
|
||||
return SRSLTE_SUCCESS;
|
||||
}
|
||||
|
|
|
@ -113,7 +113,7 @@ int main(int argc, char **argv) {
|
|||
uint32_t frame_cnt;
|
||||
float *llr;
|
||||
uint8_t *llr_c;
|
||||
uint8_t *data_tx, *data_rx, *symbols;
|
||||
uint8_t *data_tx, *data_rx, *data_rx_bytes, *symbols;
|
||||
uint32_t i, j;
|
||||
float var[SNR_POINTS];
|
||||
uint32_t snr_points;
|
||||
|
@ -157,6 +157,11 @@ int main(int argc, char **argv) {
|
|||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
data_rx_bytes = srslte_vec_malloc(frame_length * sizeof(uint8_t));
|
||||
if (!data_rx_bytes) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
symbols = srslte_vec_malloc(coded_length * sizeof(uint8_t));
|
||||
if (!symbols) {
|
||||
|
@ -248,12 +253,14 @@ int main(int argc, char **argv) {
|
|||
|
||||
gettimeofday(&tdata[1], NULL);
|
||||
for (int k=0;k<nof_repetitions;k++) {
|
||||
srslte_tdec_run_all(&tdec, llr, data_rx, t, frame_length);
|
||||
srslte_tdec_run_all(&tdec, llr, data_rx_bytes, t, frame_length);
|
||||
}
|
||||
gettimeofday(&tdata[2], NULL);
|
||||
get_time_interval(tdata);
|
||||
mean_usec = (float) mean_usec * 0.9 + (float) (tdata[0].tv_usec/nof_repetitions) * 0.1;
|
||||
|
||||
srslte_bit_unpack_vector(data_rx_bytes, data_rx, frame_length);
|
||||
|
||||
errors += srslte_bit_diff(data_tx, data_rx, frame_length);
|
||||
|
||||
gettimeofday(&tdata[1], NULL);
|
||||
|
|
|
@ -228,7 +228,7 @@ int main(int argc, char **argv) {
|
|||
int r = srslte_pdsch_decode(&pdsch, &pdsch_cfg, &softbuffer_rx, slot_symbols[0], ce, 0, data);
|
||||
gettimeofday(&t[2], NULL);
|
||||
get_time_interval(t);
|
||||
printf("DECODED %d in %d:%d (%.2f Mbps)\n", r?"Error":"OK",
|
||||
printf("DECODED %s in %d:%d (%.2f Mbps)\n", r?"Error":"OK",
|
||||
(int) t[0].tv_sec, (int) t[0].tv_usec, (float) grant.mcs.tbs/t[0].tv_usec);
|
||||
if (r) {
|
||||
ret = -1;
|
||||
|
|
|
@ -241,26 +241,30 @@ void srslte_vec_sc_prod_ccc(cf_t *x, cf_t h, cf_t *z, uint32_t len) {
|
|||
}
|
||||
|
||||
void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
|
||||
#ifdef HAVE_VOLK_CONVERT_FI_FUNCTION
|
||||
volk_32f_s32f_convert_16i(z, x, scale, len);
|
||||
#else
|
||||
#ifndef HAVE_VECTOR_SIMD
|
||||
int i;
|
||||
for (i=0;i<len;i++) {
|
||||
z[i] = (int16_t) (x[i]*scale);
|
||||
}
|
||||
#else
|
||||
srslte_vec_convert_fi_simd(x, z, scale, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len) {
|
||||
for (int i=0;i<len;i++) {
|
||||
y[i] = x[lut[i]];
|
||||
y[lut[i]] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
|
||||
#ifndef HAVE_VECTOR_SIMD
|
||||
for (int i=0;i<len;i++) {
|
||||
y[i] = x[lut[i]];
|
||||
y[lut[i]] = x[i];
|
||||
}
|
||||
#else
|
||||
srslte_vec_lut_sss_simd(x, lut, y, len);
|
||||
#endif
|
||||
}
|
||||
|
||||
void srslte_vec_interleave_cf(float *real, float *imag, cf_t *x, uint32_t len) {
|
||||
|
|
|
@ -135,3 +135,82 @@ void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
|
|||
}
|
||||
}
|
||||
|
||||
void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
const unsigned int points = len / 8;
|
||||
|
||||
const __m128i* xPtr = (const __m128i*) x;
|
||||
const __m128i* lutPtr = (__m128i*) lut;
|
||||
|
||||
__m128i xVal, lutVal;
|
||||
for(;number < points; number++){
|
||||
|
||||
xVal = _mm_load_si128(xPtr);
|
||||
lutVal = _mm_load_si128(lutPtr);
|
||||
|
||||
for (int i=0;i<8;i++) {
|
||||
uint16_t x = (uint16_t) _mm_extract_epi16(xVal, i);
|
||||
uint16_t l = (uint16_t) _mm_extract_epi16(lutVal, i);
|
||||
y[l] = x;
|
||||
}
|
||||
xPtr ++;
|
||||
lutPtr ++;
|
||||
}
|
||||
|
||||
number = points * 8;
|
||||
for(;number < len; number++){
|
||||
y[lut[number]] = x[number];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Modified from volk_32f_s32f_convert_16i_a_sse2. Removed clipping */
|
||||
void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
|
||||
const unsigned int eighthPoints = len / 8;
|
||||
|
||||
const float* inputVectorPtr = (const float*)x;
|
||||
int16_t* outputVectorPtr = z;
|
||||
|
||||
float min_val = -32768;
|
||||
float max_val = 32767;
|
||||
float r;
|
||||
|
||||
__m128 vScalar = _mm_set_ps1(scale);
|
||||
__m128 inputVal1, inputVal2;
|
||||
__m128i intInputVal1, intInputVal2;
|
||||
__m128 ret1, ret2;
|
||||
__m128 vmin_val = _mm_set_ps1(min_val);
|
||||
__m128 vmax_val = _mm_set_ps1(max_val);
|
||||
|
||||
for(;number < eighthPoints; number++){
|
||||
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
|
||||
|
||||
// Scale and clip
|
||||
ret1 = _mm_mul_ps(inputVal1, vScalar);
|
||||
ret2 = _mm_mul_ps(inputVal2, vScalar);
|
||||
|
||||
intInputVal1 = _mm_cvtps_epi32(ret1);
|
||||
intInputVal2 = _mm_cvtps_epi32(ret2);
|
||||
|
||||
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
|
||||
|
||||
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
|
||||
outputVectorPtr += 8;
|
||||
}
|
||||
|
||||
number = eighthPoints * 8;
|
||||
for(; number < num_points; number++){
|
||||
r = inputVector[number] * scalar;
|
||||
if(r > max_val)
|
||||
r = max_val;
|
||||
else if(r < min_val)
|
||||
r = min_val;
|
||||
outputVector[number] = (int16_t)rintf(r);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue