From f2b40c57aee7be0a7c828784ac47af745b4e61d7 Mon Sep 17 00:00:00 2001 From: ismagom Date: Mon, 12 Oct 2015 19:03:20 +0200 Subject: [PATCH] Soft demodulator produces 16-bit fixed point --- srslte/include/srslte/fec/turbodecoder.h | 4 +- srslte/include/srslte/modem/demod_soft.h | 5 + srslte/lib/fec/src/turbodecoder.c | 57 ++------ srslte/lib/fec/test/turbodecoder_test.c | 15 +- srslte/lib/modem/src/demod_soft.c | 174 +++++++++++++++++++++++ srslte/lib/modem/test/soft_demod_test.c | 34 ++++- srslte/lib/utils/src/vector_simd.c | 21 +-- 7 files changed, 239 insertions(+), 71 deletions(-) diff --git a/srslte/include/srslte/fec/turbodecoder.h b/srslte/include/srslte/fec/turbodecoder.h index 6a91b6178..9f892d6cc 100644 --- a/srslte/include/srslte/fec/turbodecoder.h +++ b/srslte/include/srslte/fec/turbodecoder.h @@ -84,7 +84,7 @@ SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h); SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb); SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h, - float * input, + llr_t * input, uint32_t long_cb); SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h, @@ -96,7 +96,7 @@ SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h, uint32_t long_cb); SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h, - float * input, + llr_t * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb); diff --git a/srslte/include/srslte/modem/demod_soft.h b/srslte/include/srslte/modem/demod_soft.h index 80f63b3b5..d29ffa7f3 100644 --- a/srslte/include/srslte/modem/demod_soft.h +++ b/srslte/include/srslte/modem/demod_soft.h @@ -49,6 +49,11 @@ SRSLTE_API int srslte_demod_soft_demodulate(srslte_mod_t modulation, float* llr, int nsymbols); +SRSLTE_API int srslte_demod_soft_demodulate_s(srslte_mod_t modulation, + const cf_t* symbols, + short* llr, + int nsymbols); + /* High-level API */ typedef struct SRSLTE_API { srslte_modem_table_t table; diff --git a/srslte/lib/fec/src/turbodecoder.c b/srslte/lib/fec/src/turbodecoder.c index a9ddd7d4c..b906f5aa4 100644 --- a/srslte/lib/fec/src/turbodecoder.c +++ b/srslte/lib/fec/src/turbodecoder.c @@ -47,22 +47,6 @@ #define INF 10000 #define ZERO 0 -#define SCALE 100 - -static void print128_num(__m128i var) -{ - int16_t *val = (int16_t*) &var;//can also use uint32_t instead of 16_t - printf("[%d %d %d %d %d %d %d %d]\n", - val[0], val[1], val[2], val[3], val[4], val[5], - val[6], val[7]); -} - -void print128f_num(__m128 var) -{ - float *val = (float*) &var; - printf("[%f %f %f %f]\n", - val[0], val[1], val[2], val[3]); -} /************************************************ @@ -434,11 +418,10 @@ void srslte_tdec_free(srslte_tdec_t * h) bzero(h, sizeof(srslte_tdec_t)); } -void deinterleave_input(srslte_tdec_t *h, float *input, uint32_t long_cb) { +void deinterleave_input(srslte_tdec_t *h, short *input, uint32_t long_cb) { uint32_t i; - float *inputPtr = input; - __m128 inf0, inf1, inf2, inf3, inf4, inf5; + __m128i *inputPtr = (__m128i*) input; __m128i in0, in1, in2; __m128i s0, s1, s2, s; __m128i p00, p01, p02, p0; @@ -468,30 +451,14 @@ void deinterleave_input(srslte_tdec_t *h, float *input, uint32_t long_cb) { __m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff); // pick bits 1, 4, 7 from 3rd word __m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff); - - __m128 vScalar = _mm_set1_ps(SCALE); - + // Split systematic and parity bits for (i = 0; i < long_cb/8; i++) { - inf0 = _mm_load_ps(inputPtr); inputPtr+=4; - inf1 = _mm_load_ps(inputPtr); inputPtr+=4; - inf2 = _mm_load_ps(inputPtr); inputPtr+=4; - inf3 = _mm_load_ps(inputPtr); inputPtr+=4; - inf4 = _mm_load_ps(inputPtr); inputPtr+=4; - inf5 = _mm_load_ps(inputPtr); inputPtr+=4; - - inf0 = _mm_mul_ps(inf0, vScalar); - inf1 = _mm_mul_ps(inf1, vScalar); - inf2 = _mm_mul_ps(inf2, vScalar); - inf3 = _mm_mul_ps(inf3, vScalar); - inf4 = _mm_mul_ps(inf4, vScalar); - inf5 = _mm_mul_ps(inf5, vScalar); + in0 = _mm_load_si128(inputPtr); inputPtr++; + in1 = _mm_load_si128(inputPtr); inputPtr++; + in2 = _mm_load_si128(inputPtr); inputPtr++; - in0 = _mm_packs_epi32(_mm_cvtps_epi32(inf0), _mm_cvtps_epi32(inf1)); - in1 = _mm_packs_epi32(_mm_cvtps_epi32(inf2), _mm_cvtps_epi32(inf3)); - in2 = _mm_packs_epi32(_mm_cvtps_epi32(inf4), _mm_cvtps_epi32(inf5)); - /* Deinterleave Systematic bits */ s0 = _mm_shuffle_epi8(in0, s0_mask); s1 = _mm_shuffle_epi8(in1, s1_mask); @@ -525,17 +492,17 @@ void deinterleave_input(srslte_tdec_t *h, float *input, uint32_t long_cb) { } for (i = 0; i < 3; i++) { - h->syst[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 2*i]; - h->parity0[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 2*i + 1]; + h->syst[i+long_cb] = input[3*long_cb + 2*i]; + h->parity0[i+long_cb] = input[3*long_cb + 2*i + 1]; } for (i = 0; i < 3; i++) { - h->app2[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 6 + 2*i]; - h->parity1[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 6 + 2*i + 1]; + h->app2[i+long_cb] = input[3*long_cb + 6 + 2*i]; + h->parity1[i+long_cb] = input[3*long_cb + 6 + 2*i + 1]; } } -void srslte_tdec_iteration(srslte_tdec_t * h, float * input, uint32_t long_cb) +void srslte_tdec_iteration(srslte_tdec_t * h, short * input, uint32_t long_cb) { if (h->current_cbidx >= 0) { @@ -639,7 +606,7 @@ void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long } } -int srslte_tdec_run_all(srslte_tdec_t * h, float * input, uint8_t *output, +int srslte_tdec_run_all(srslte_tdec_t * h, short * input, uint8_t *output, uint32_t nof_iterations, uint32_t long_cb) { if (srslte_tdec_reset(h, long_cb)) { diff --git a/srslte/lib/fec/test/turbodecoder_test.c b/srslte/lib/fec/test/turbodecoder_test.c index 21c55869b..a9d3be2b9 100644 --- a/srslte/lib/fec/test/turbodecoder_test.c +++ b/srslte/lib/fec/test/turbodecoder_test.c @@ -112,6 +112,7 @@ void parse_args(int argc, char **argv) { int main(int argc, char **argv) { uint32_t frame_cnt; float *llr; + short *llr_s; uint8_t *llr_c; uint8_t *data_tx, *data_rx, *data_rx_bytes, *symbols; uint32_t i, j; @@ -173,6 +174,11 @@ int main(int argc, char **argv) { perror("malloc"); exit(-1); } + llr_s = srslte_vec_malloc(coded_length * sizeof(short)); + if (!llr_s) { + perror("malloc"); + exit(-1); + } llr_c = srslte_vec_malloc(coded_length * sizeof(uint8_t)); if (!llr_c) { perror("malloc"); @@ -239,7 +245,10 @@ int main(int argc, char **argv) { } srslte_ch_awgn_f(llr, llr, var[i], coded_length); - + + for (j=0;j +#include +#endif + + //#define SCALE_DEMOD16QAM +#define SCALE_SHORT_CONV 100 + +void demod_bpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) { + for (int i=0;i 0) { mean_texec = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec, n-1); } + + gettimeofday(&t[1], NULL); + srslte_demod_soft_demodulate_s(modulation, symbols, llr_s, num_bits / mod.nbits_x_symbol); + gettimeofday(&t[2], NULL); + get_time_interval(t); + + if (n > 0) { + mean_texec_s = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec_s, n-1); + } if (SRSLTE_VERBOSE_ISDEBUG()) { printf("bits="); @@ -180,6 +197,10 @@ int main(int argc, char **argv) { printf("llr="); srslte_vec_fprint_f(stdout, llr, num_bits); + + printf("llr_s="); + srslte_vec_fprint_s(stdout, llr_s, num_bits); + } // Check demodulation errors @@ -200,6 +221,7 @@ clean_exit: srslte_modem_table_free(&mod); - printf("Mean Throughput: %.2f. Mbps ExTime: %.2f us\n", num_bits/mean_texec, mean_texec); + printf("Mean Throughput: %.2f/%.2f. Mbps ExTime: %.2f/%.2f us\n", + num_bits/mean_texec, num_bits/mean_texec_s, mean_texec, mean_texec_s); exit(ret); } diff --git a/srslte/lib/utils/src/vector_simd.c b/srslte/lib/utils/src/vector_simd.c index f979b6e2a..d62fe75f9 100644 --- a/srslte/lib/utils/src/vector_simd.c +++ b/srslte/lib/utils/src/vector_simd.c @@ -168,35 +168,31 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l /* Modified from volk_32f_s32f_convert_16i_a_sse2. Removed clipping */ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len) { - unsigned int number = 0; + unsigned int number = 0; const unsigned int eighthPoints = len / 8; const float* inputVectorPtr = (const float*)x; int16_t* outputVectorPtr = z; - float min_val = -32768; - float max_val = 32767; - float r; - __m128 vScalar = _mm_set_ps1(scale); __m128 inputVal1, inputVal2; __m128i intInputVal1, intInputVal2; __m128 ret1, ret2; - __m128 vmin_val = _mm_set_ps1(min_val); - __m128 vmax_val = _mm_set_ps1(max_val); for(;number < eighthPoints; number++){ inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; - // Scale and clip ret1 = _mm_mul_ps(inputVal1, vScalar); ret2 = _mm_mul_ps(inputVal2, vScalar); intInputVal1 = _mm_cvtps_epi32(ret1); intInputVal2 = _mm_cvtps_epi32(ret2); + printf("intinput: "); print128_num(intInputVal1); + printf("intinput2: "); print128_num(intInputVal2); + intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1); @@ -204,13 +200,8 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len) } number = eighthPoints * 8; - for(; number < num_points; number++){ - r = inputVector[number] * scalar; - if(r > max_val) - r = max_val; - else if(r < min_val) - r = min_val; - outputVector[number] = (int16_t)rintf(r); + for(; number < len; number++){ + z[number] = (int16_t) (x[number] * scale); } } \ No newline at end of file