diff --git a/lib/include/srslte/phy/fec/turbodecoder.h b/lib/include/srslte/phy/fec/turbodecoder.h index 24e38d09e..d42a20ae6 100644 --- a/lib/include/srslte/phy/fec/turbodecoder.h +++ b/lib/include/srslte/phy/fec/turbodecoder.h @@ -102,17 +102,14 @@ SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h, SRSLTE_API void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_NPAR], - uint32_t nof_cb, uint32_t long_cb); SRSLTE_API void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], - uint32_t nof_cb, uint32_t long_cb); SRSLTE_API void srslte_tdec_decision_byte_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], - uint32_t nof_cb, uint32_t long_cb); SRSLTE_API void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h, @@ -124,7 +121,6 @@ SRSLTE_API int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_iterations, - uint32_t nof_cb, uint32_t long_cb); #endif diff --git a/lib/src/phy/fec/test/turbodecoder_test.c b/lib/src/phy/fec/test/turbodecoder_test.c index 839bc4202..df1d4b884 100644 --- a/lib/src/phy/fec/test/turbodecoder_test.c +++ b/lib/src/phy/fec/test/turbodecoder_test.c @@ -258,13 +258,17 @@ int main(int argc, char **argv) { uint8_t *output[SRSLTE_TDEC_NPAR]; for (int n=0;ntdec_simd, input, nof_cb, long_cb); + srslte_tdec_simd_iteration(&h->tdec_simd, input, long_cb); #else srslte_vec_convert_if(input[0], h->input_conv, 0.01, 3*long_cb+12); srslte_tdec_gen_iteration(&h->tdec_gen, h->input_conv, long_cb); @@ -103,12 +103,12 @@ void srslte_tdec_iteration_par(srslte_tdec_t * h, int16_t* input[SRSLTE_TDEC_NPA void srslte_tdec_iteration(srslte_tdec_t * h, int16_t* input, uint32_t long_cb) { int16_t *input_par[SRSLTE_TDEC_NPAR]; input_par[0] = input; - return srslte_tdec_iteration_par(h, input_par, 1, long_cb); + return srslte_tdec_iteration_par(h, input_par, long_cb); } -void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) { +void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) { #ifdef LV_HAVE_SSE - return srslte_tdec_simd_decision(&h->tdec_simd, output, nof_cb, long_cb); + return srslte_tdec_simd_decision(&h->tdec_simd, output, long_cb); #else return srslte_tdec_gen_decision(&h->tdec_gen, output[0], long_cb); #endif @@ -117,12 +117,12 @@ void srslte_tdec_decision_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPA void srslte_tdec_decision(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) { uint8_t *output_par[SRSLTE_TDEC_NPAR]; output_par[0] = output; - srslte_tdec_decision_par(h, output_par, 1, long_cb); + srslte_tdec_decision_par(h, output_par, long_cb); } -void srslte_tdec_decision_byte_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) { +void srslte_tdec_decision_byte_par(srslte_tdec_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) { #ifdef LV_HAVE_SSE - srslte_tdec_simd_decision_byte(&h->tdec_simd, output, nof_cb, long_cb); + srslte_tdec_simd_decision_byte(&h->tdec_simd, output, long_cb); #else srslte_tdec_gen_decision_byte(&h->tdec_gen, output[0], long_cb); #endif @@ -139,14 +139,14 @@ void srslte_tdec_decision_byte_par_cb(srslte_tdec_t * h, uint8_t *output, uint32 void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long_cb) { uint8_t *output_par[SRSLTE_TDEC_NPAR]; output_par[0] = output; - srslte_tdec_decision_byte_par(h, output_par, 1, long_cb); + srslte_tdec_decision_byte_par(h, output_par, long_cb); } int srslte_tdec_run_all_par(srslte_tdec_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_NPAR], - uint32_t nof_iterations, uint32_t nof_cb, uint32_t long_cb) { + uint32_t nof_iterations, uint32_t long_cb) { #ifdef LV_HAVE_SSE - return srslte_tdec_simd_run_all(&h->tdec_simd, input, output, nof_iterations, nof_cb, long_cb); + return srslte_tdec_simd_run_all(&h->tdec_simd, input, output, nof_iterations, long_cb); #else srslte_vec_convert_if(input[0], h->input_conv, 0.01, 3*long_cb+12); return srslte_tdec_gen_run_all(&h->tdec_gen, h->input_conv, output[0], nof_iterations, long_cb); @@ -160,7 +160,7 @@ int srslte_tdec_run_all(srslte_tdec_t * h, int16_t * input, uint8_t *output, uin int16_t *input_par[SRSLTE_TDEC_NPAR]; input_par[0] = input; - return srslte_tdec_run_all_par(h, input_par, output_par, nof_iterations, 1, long_cb); + return srslte_tdec_run_all_par(h, input_par, output_par, nof_iterations, long_cb); } diff --git a/lib/src/phy/fec/turbodecoder_avx.c b/lib/src/phy/fec/turbodecoder_avx.c index e25f3a1e7..2a2f6f925 100644 --- a/lib/src/phy/fec/turbodecoder_avx.c +++ b/lib/src/phy/fec/turbodecoder_avx.c @@ -175,8 +175,8 @@ void map_avx_beta(map_gen_t * s, int16_t * output[SRSLTE_TDEC_NPAR], uint32_t lo bn = _mm256_add_epi16(bn, alpha_k);\ bn = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bn);\ bp = _mm256_sub_epi16(_mm256_set1_epi16(0x7FFF), bp);\ - if (output[0]) output[0][k-d] = hMax0(bn) - hMax0(bp);\ - if (output[1]) output[1][k-d] = hMax1(bn) - hMax1(bp); + output[0][k-d] = hMax0(bn) - hMax0(bp);\ + output[1][k-d] = hMax1(bn) - hMax1(bp); /* The tail does not require to load alpha or produce outputs. Only update * beta metrics accordingly */ diff --git a/lib/src/phy/fec/turbodecoder_simd.c b/lib/src/phy/fec/turbodecoder_simd.c index 24f550f08..e245c84a4 100644 --- a/lib/src/phy/fec/turbodecoder_simd.c +++ b/lib/src/phy/fec/turbodecoder_simd.c @@ -129,19 +129,38 @@ void map_simd_free(map_gen_t * h) /* Runs one instance of a decoder */ void map_simd_dec(map_gen_t * h, int16_t * input[SRSLTE_TDEC_NPAR], int16_t *app[SRSLTE_TDEC_NPAR], int16_t * parity[SRSLTE_TDEC_NPAR], - int16_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) + int16_t *output[SRSLTE_TDEC_NPAR], uint32_t cb_mask, uint32_t long_cb) { + uint32_t nof_cb = 1; + int16_t *outptr[SRSLTE_TDEC_NPAR]; + // Compute branch metrics - for (int i=0;iinterleaver[i], srslte_cbsegm_cbsize(i)); } h->current_cbidx = -1; + h->cb_mask = 0; ret = 0; clean_and_exit:if (ret == -1) { srslte_tdec_simd_free(h); @@ -334,7 +354,7 @@ void deinterleave_input_simd(srslte_tdec_simd_t *h, int16_t *input, uint32_t cbi } /* Runs 1 turbo decoder iteration */ -void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) +void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint32_t long_cb) { int16_t *tmp_app[SRSLTE_TDEC_NPAR]; @@ -343,47 +363,64 @@ void srslte_tdec_simd_iteration(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_T uint16_t *inter = h->interleaver[h->current_cbidx].forward; uint16_t *deinter = h->interleaver[h->current_cbidx].reverse; - for (int i=0;icb_mask = (input[0]?1:0) | (input[1]?2:0); +#else + h->cb_mask = input[0]?1:0; +#endif + + for (int i=0;imax_par_cb;i++) { if (h->n_iter[i] == 0 && input[i]) { + //printf("deinterleaveing %d\n",i); deinterleave_input_simd(h, input[i], i, long_cb); } } // Add apriori information to decoder 1 - for (int i=0;in_iter[i] > 0) { + for (int i=0;imax_par_cb;i++) { + if (h->n_iter[i] > 0 && input[i]) { srslte_vec_sub_sss(h->app1[i], h->ext1[i], h->app1[i], long_cb); } } // Run MAP DEC #1 for (int i=0;imax_par_cb;i++) { - tmp_app[i] = h->n_iter[i]?h->app1[i]:NULL; + if (input[i]) { + tmp_app[i] = h->n_iter[i]?h->app1[i]:NULL; + } else { + tmp_app[i] = NULL; + } } - map_simd_dec(&h->dec, h->syst, tmp_app, h->parity0, h->ext1, nof_cb, long_cb); + map_simd_dec(&h->dec, h->syst, tmp_app, h->parity0, h->ext1, h->cb_mask, long_cb); // Convert aposteriori information into extrinsic information - for (int i=0;in_iter[i] > 0) { + for (int i=0;imax_par_cb;i++) { + if (h->n_iter[i] > 0 && input[i]) { srslte_vec_sub_sss(h->ext1[i], h->app1[i], h->ext1[i], long_cb); } } // Interleave extrinsic output of DEC1 to form apriori info for decoder 2 - for (int i=0;iext1[i], deinter, h->app2[i], long_cb); + for (int i=0;imax_par_cb;i++) { + if (input[i]) { + srslte_vec_lut_sss(h->ext1[i], deinter, h->app2[i], long_cb); + } } // Run MAP DEC #2. 2nd decoder uses apriori information as systematic bits - map_simd_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, nof_cb, long_cb); + map_simd_dec(&h->dec, h->app2, NULL, h->parity1, h->ext2, h->cb_mask, long_cb); // Deinterleaved extrinsic bits become apriori info for decoder 1 - for (int i=0;iext2[i], inter, h->app1[i], long_cb); + for (int i=0;imax_par_cb;i++) { + if (input[i]) { + srslte_vec_lut_sss(h->ext2[i], inter, h->app1[i], long_cb); + } } for (int i=0;imax_par_cb;i++) { - h->n_iter[i]++; + if (input[i]) { + h->n_iter[i]++; + } } } else { fprintf(stderr, "Error CB index not set (call srslte_tdec_simd_reset() first\n"); @@ -401,6 +438,7 @@ int srslte_tdec_simd_reset(srslte_tdec_simd_t * h, uint32_t long_cb) for (int i=0;imax_par_cb;i++) { h->n_iter[i] = 0; } + h->cb_mask = 0; h->current_cbidx = srslte_cbsegm_cbindex(long_cb); if (h->current_cbidx < 0) { fprintf(stderr, "Invalid CB length %d\n", long_cb); @@ -446,9 +484,9 @@ void tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output, uint32_t cbidx, } } -void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) +void srslte_tdec_simd_decision(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) { - for (int i=0;imax_par_cb;i++) { tdec_simd_decision(h, output[i], i, long_cb); } } @@ -472,9 +510,9 @@ void srslte_tdec_simd_decision_byte_cb(srslte_tdec_simd_t * h, uint8_t *output, } } -void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t nof_cb, uint32_t long_cb) +void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSLTE_TDEC_NPAR], uint32_t long_cb) { - for (int i=0;imax_par_cb;i++) { srslte_tdec_simd_decision_byte_cb(h, output[i], i, long_cb); } } @@ -482,17 +520,17 @@ void srslte_tdec_simd_decision_byte(srslte_tdec_simd_t * h, uint8_t *output[SRSL /* Runs nof_iterations iterations and decides the output bits */ int srslte_tdec_simd_run_all(srslte_tdec_simd_t * h, int16_t * input[SRSLTE_TDEC_NPAR], uint8_t *output[SRSLTE_TDEC_NPAR], - uint32_t nof_iterations, uint32_t nof_cb, uint32_t long_cb) + uint32_t nof_iterations, uint32_t long_cb) { if (srslte_tdec_simd_reset(h, long_cb)) { return SRSLTE_ERROR; } do { - srslte_tdec_simd_iteration(h, input, nof_cb, long_cb); + srslte_tdec_simd_iteration(h, input, long_cb); } while (h->n_iter[0] < nof_iterations); - srslte_tdec_simd_decision_byte(h, output, nof_cb, long_cb); + srslte_tdec_simd_decision_byte(h, output, long_cb); return SRSLTE_SUCCESS; } diff --git a/lib/src/phy/phch/sch.c b/lib/src/phy/phch/sch.c index f08054988..dceab2506 100644 --- a/lib/src/phy/phch/sch.c +++ b/lib/src/phy/phch/sch.c @@ -321,7 +321,6 @@ bool decode_tb_cb(srslte_sch_t *q, bool cb_map[SRSLTE_MAX_CODEBLOCKS]; uint32_t cb_idx[SRSLTE_TDEC_NPAR]; - bool cb_in_use[SRSLTE_TDEC_NPAR]; int16_t *decoder_input[SRSLTE_TDEC_NPAR]; uint32_t nof_cb = cb_size_group?cb_segm->C2:cb_segm->C1; @@ -340,9 +339,8 @@ bool decode_tb_cb(srslte_sch_t *q, } for (int i=0;i0) { - uint32_t npar = SRSLTE_MIN(remaining_cb, SRSLTE_TDEC_NPAR); // Unratematch the codeblocks left to decode - for (int i=0;i 0) { // Find an unprocessed CB cb_idx[i]=first_cb; while(cb_idx[i] cb_segm->C - gamma) { - n_e2 = n_e+Qm; - rp = (cb_segm->C - gamma)*n_e + (cb_idx[i]-(cb_segm->C - gamma))*n_e2; - } - - INFO("CB %d: rp=%d, n_e=%d, i=%d\n", cb_idx[i], rp, n_e2, i); - if (srslte_rm_turbo_rx_lut(&e_bits[rp], softbuffer->buffer_f[cb_idx[i]], n_e2, cb_len_idx, rv)) { - fprintf(stderr, "Error in rate matching\n"); - return SRSLTE_ERROR; - } + if (cb_map[cb_idx[i]] == false) { + cb_map[cb_idx[i]] = true; + + uint32_t rp = cb_idx[i]*n_e; + uint32_t n_e2 = n_e; + + if (cb_idx[i] > cb_segm->C - gamma) { + n_e2 = n_e+Qm; + rp = (cb_segm->C - gamma)*n_e + (cb_idx[i]-(cb_segm->C - gamma))*n_e2; + } + + INFO("CB %d: rp=%d, n_e=%d, i=%d\n", cb_idx[i], rp, n_e2, i); + if (srslte_rm_turbo_rx_lut(&e_bits[rp], softbuffer->buffer_f[cb_idx[i]], n_e2, cb_len_idx, rv)) { + fprintf(stderr, "Error in rate matching\n"); + return SRSLTE_ERROR; + } - decoder_input[i] = softbuffer->buffer_f[cb_idx[i]]; + decoder_input[i] = softbuffer->buffer_f[cb_idx[i]]; + } } } - + // Run 1 iteration for up to TDEC_NPAR codeblocks - if (SRSLTE_TDEC_NPAR > 1) { - INFO("Processing %d CBs, index %d,%d\n", npar, cb_idx[0], cb_idx[1]); - } - srslte_tdec_iteration_par(&q->decoder, decoder_input, npar, cb_len); + srslte_tdec_iteration_par(&q->decoder, decoder_input, cb_len); q->nof_iterations = srslte_tdec_get_nof_iterations_cb(&q->decoder, 0); // Decide output bits and compute CRC - for (int i=0;idecoder, q->cb_in, i, cb_len); uint32_t len_crc; @@ -418,13 +413,14 @@ bool decode_tb_cb(srslte_sch_t *q, // Reset number of iterations for that CB in the decoder srslte_tdec_reset_cb(&q->decoder, i); remaining_cb--; - cb_in_use[i] = false; + decoder_input[i] = NULL; + cb_idx[i] = 0; // CRC is error and exceeded maximum iterations for this CB. // Early stop the whole transport block. } else if (srslte_tdec_get_nof_iterations_cb(&q->decoder, i) >= q->max_iterations) { - INFO("CB %d: Error. CB is erroneous. remaining_cb=%d, i=%d, first_cb=%d, nof_cb=%d, npar=%d\n", - cb_idx[i], remaining_cb, i, first_cb, nof_cb, npar); + INFO("CB %d: Error. CB is erroneous. remaining_cb=%d, i=%d, first_cb=%d, nof_cb=%d\n", + cb_idx[i], remaining_cb, i, first_cb, nof_cb); return false; } }