mirror of https://github.com/PentHertz/srsLTE.git
Soft demodulator produces 16-bit fixed point
This commit is contained in:
parent
19256c261e
commit
f2b40c57ae
|
@ -84,7 +84,7 @@ SRSLTE_API void srslte_tdec_free(srslte_tdec_t * h);
|
|||
SRSLTE_API int srslte_tdec_reset(srslte_tdec_t * h, uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_iteration(srslte_tdec_t * h,
|
||||
float * input,
|
||||
llr_t * input,
|
||||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API void srslte_tdec_decision(srslte_tdec_t * h,
|
||||
|
@ -96,7 +96,7 @@ SRSLTE_API void srslte_tdec_decision_byte(srslte_tdec_t * h,
|
|||
uint32_t long_cb);
|
||||
|
||||
SRSLTE_API int srslte_tdec_run_all(srslte_tdec_t * h,
|
||||
float * input,
|
||||
llr_t * input,
|
||||
uint8_t *output,
|
||||
uint32_t nof_iterations,
|
||||
uint32_t long_cb);
|
||||
|
|
|
@ -49,6 +49,11 @@ SRSLTE_API int srslte_demod_soft_demodulate(srslte_mod_t modulation,
|
|||
float* llr,
|
||||
int nsymbols);
|
||||
|
||||
SRSLTE_API int srslte_demod_soft_demodulate_s(srslte_mod_t modulation,
|
||||
const cf_t* symbols,
|
||||
short* llr,
|
||||
int nsymbols);
|
||||
|
||||
/* High-level API */
|
||||
typedef struct SRSLTE_API {
|
||||
srslte_modem_table_t table;
|
||||
|
|
|
@ -47,22 +47,6 @@
|
|||
|
||||
#define INF 10000
|
||||
#define ZERO 0
|
||||
#define SCALE 100
|
||||
|
||||
static void print128_num(__m128i var)
|
||||
{
|
||||
int16_t *val = (int16_t*) &var;//can also use uint32_t instead of 16_t
|
||||
printf("[%d %d %d %d %d %d %d %d]\n",
|
||||
val[0], val[1], val[2], val[3], val[4], val[5],
|
||||
val[6], val[7]);
|
||||
}
|
||||
|
||||
void print128f_num(__m128 var)
|
||||
{
|
||||
float *val = (float*) &var;
|
||||
printf("[%f %f %f %f]\n",
|
||||
val[0], val[1], val[2], val[3]);
|
||||
}
|
||||
|
||||
|
||||
/************************************************
|
||||
|
@ -434,11 +418,10 @@ void srslte_tdec_free(srslte_tdec_t * h)
|
|||
bzero(h, sizeof(srslte_tdec_t));
|
||||
}
|
||||
|
||||
void deinterleave_input(srslte_tdec_t *h, float *input, uint32_t long_cb) {
|
||||
void deinterleave_input(srslte_tdec_t *h, short *input, uint32_t long_cb) {
|
||||
uint32_t i;
|
||||
|
||||
float *inputPtr = input;
|
||||
__m128 inf0, inf1, inf2, inf3, inf4, inf5;
|
||||
__m128i *inputPtr = (__m128i*) input;
|
||||
__m128i in0, in1, in2;
|
||||
__m128i s0, s1, s2, s;
|
||||
__m128i p00, p01, p02, p0;
|
||||
|
@ -468,30 +451,14 @@ void deinterleave_input(srslte_tdec_t *h, float *input, uint32_t long_cb) {
|
|||
__m128i p11_mask = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,13,12,7,6,1,0,0xff,0xff,0xff,0xff);
|
||||
// pick bits 1, 4, 7 from 3rd word
|
||||
__m128i p12_mask = _mm_set_epi8(15,14,9,8,3,2,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
|
||||
__m128 vScalar = _mm_set1_ps(SCALE);
|
||||
|
||||
|
||||
// Split systematic and parity bits
|
||||
for (i = 0; i < long_cb/8; i++) {
|
||||
|
||||
inf0 = _mm_load_ps(inputPtr); inputPtr+=4;
|
||||
inf1 = _mm_load_ps(inputPtr); inputPtr+=4;
|
||||
inf2 = _mm_load_ps(inputPtr); inputPtr+=4;
|
||||
inf3 = _mm_load_ps(inputPtr); inputPtr+=4;
|
||||
inf4 = _mm_load_ps(inputPtr); inputPtr+=4;
|
||||
inf5 = _mm_load_ps(inputPtr); inputPtr+=4;
|
||||
|
||||
inf0 = _mm_mul_ps(inf0, vScalar);
|
||||
inf1 = _mm_mul_ps(inf1, vScalar);
|
||||
inf2 = _mm_mul_ps(inf2, vScalar);
|
||||
inf3 = _mm_mul_ps(inf3, vScalar);
|
||||
inf4 = _mm_mul_ps(inf4, vScalar);
|
||||
inf5 = _mm_mul_ps(inf5, vScalar);
|
||||
in0 = _mm_load_si128(inputPtr); inputPtr++;
|
||||
in1 = _mm_load_si128(inputPtr); inputPtr++;
|
||||
in2 = _mm_load_si128(inputPtr); inputPtr++;
|
||||
|
||||
in0 = _mm_packs_epi32(_mm_cvtps_epi32(inf0), _mm_cvtps_epi32(inf1));
|
||||
in1 = _mm_packs_epi32(_mm_cvtps_epi32(inf2), _mm_cvtps_epi32(inf3));
|
||||
in2 = _mm_packs_epi32(_mm_cvtps_epi32(inf4), _mm_cvtps_epi32(inf5));
|
||||
|
||||
/* Deinterleave Systematic bits */
|
||||
s0 = _mm_shuffle_epi8(in0, s0_mask);
|
||||
s1 = _mm_shuffle_epi8(in1, s1_mask);
|
||||
|
@ -525,17 +492,17 @@ void deinterleave_input(srslte_tdec_t *h, float *input, uint32_t long_cb) {
|
|||
}
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
h->syst[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 2*i];
|
||||
h->parity0[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 2*i + 1];
|
||||
h->syst[i+long_cb] = input[3*long_cb + 2*i];
|
||||
h->parity0[i+long_cb] = input[3*long_cb + 2*i + 1];
|
||||
}
|
||||
for (i = 0; i < 3; i++) {
|
||||
h->app2[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 6 + 2*i];
|
||||
h->parity1[i+long_cb] = (llr_t) SCALE*input[3*long_cb + 6 + 2*i + 1];
|
||||
h->app2[i+long_cb] = input[3*long_cb + 6 + 2*i];
|
||||
h->parity1[i+long_cb] = input[3*long_cb + 6 + 2*i + 1];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void srslte_tdec_iteration(srslte_tdec_t * h, float * input, uint32_t long_cb)
|
||||
void srslte_tdec_iteration(srslte_tdec_t * h, short * input, uint32_t long_cb)
|
||||
{
|
||||
|
||||
if (h->current_cbidx >= 0) {
|
||||
|
@ -639,7 +606,7 @@ void srslte_tdec_decision_byte(srslte_tdec_t * h, uint8_t *output, uint32_t long
|
|||
}
|
||||
}
|
||||
|
||||
int srslte_tdec_run_all(srslte_tdec_t * h, float * input, uint8_t *output,
|
||||
int srslte_tdec_run_all(srslte_tdec_t * h, short * input, uint8_t *output,
|
||||
uint32_t nof_iterations, uint32_t long_cb)
|
||||
{
|
||||
if (srslte_tdec_reset(h, long_cb)) {
|
||||
|
|
|
@ -112,6 +112,7 @@ void parse_args(int argc, char **argv) {
|
|||
int main(int argc, char **argv) {
|
||||
uint32_t frame_cnt;
|
||||
float *llr;
|
||||
short *llr_s;
|
||||
uint8_t *llr_c;
|
||||
uint8_t *data_tx, *data_rx, *data_rx_bytes, *symbols;
|
||||
uint32_t i, j;
|
||||
|
@ -173,6 +174,11 @@ int main(int argc, char **argv) {
|
|||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
llr_s = srslte_vec_malloc(coded_length * sizeof(short));
|
||||
if (!llr_s) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
llr_c = srslte_vec_malloc(coded_length * sizeof(uint8_t));
|
||||
if (!llr_c) {
|
||||
perror("malloc");
|
||||
|
@ -239,7 +245,10 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
|
||||
srslte_ch_awgn_f(llr, llr, var[i], coded_length);
|
||||
|
||||
|
||||
for (j=0;j<coded_length;j++) {
|
||||
llr_s[j] = (int16_t) (100*llr[j]);
|
||||
}
|
||||
/* decoder */
|
||||
srslte_tdec_reset(&tdec, frame_length);
|
||||
srslte_tdec_vl_reset(&tdec_vl, frame_length);
|
||||
|
@ -253,7 +262,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
gettimeofday(&tdata[1], NULL);
|
||||
for (int k=0;k<nof_repetitions;k++) {
|
||||
srslte_tdec_run_all(&tdec, llr, data_rx_bytes, t, frame_length);
|
||||
srslte_tdec_run_all(&tdec, llr_s, data_rx_bytes, t, frame_length);
|
||||
}
|
||||
gettimeofday(&tdata[2], NULL);
|
||||
get_time_interval(tdata);
|
||||
|
@ -262,7 +271,7 @@ int main(int argc, char **argv) {
|
|||
srslte_bit_unpack_vector(data_rx_bytes, data_rx, frame_length);
|
||||
|
||||
errors += srslte_bit_diff(data_tx, data_rx, frame_length);
|
||||
|
||||
|
||||
gettimeofday(&tdata[1], NULL);
|
||||
for (int k=0;k<nof_repetitions;k++) {
|
||||
srslte_tdec_vl_run_all(&tdec_vl, llr, data_rx, t, frame_length);
|
||||
|
|
|
@ -33,8 +33,23 @@
|
|||
#include "srslte/utils/bit.h"
|
||||
#include "srslte/modem/demod_soft.h"
|
||||
|
||||
#define HAVE_SIMD
|
||||
|
||||
#ifdef HAVE_SIMD
|
||||
#include <xmmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
//#define SCALE_DEMOD16QAM
|
||||
|
||||
#define SCALE_SHORT_CONV 100
|
||||
|
||||
void demod_bpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
llr[i] = (short) -SCALE_SHORT_CONV*(crealf(symbols[i]) + cimagf(symbols[i]))/sqrt(2);
|
||||
}
|
||||
}
|
||||
|
||||
void demod_bpsk_lte(const cf_t *symbols, float *llr, int nsymbols) {
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
|
@ -42,6 +57,11 @@ void demod_bpsk_lte(const cf_t *symbols, float *llr, int nsymbols) {
|
|||
}
|
||||
}
|
||||
|
||||
void demod_qpsk_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
srslte_vec_fprint_f(stdout, (float*) symbols, nsymbols*2);
|
||||
srslte_vec_convert_fi((float*) symbols, llr, -SCALE_SHORT_CONV*sqrt(2), nsymbols*2);
|
||||
}
|
||||
|
||||
void demod_qpsk_lte(const cf_t *symbols, float *llr, int nsymbols) {
|
||||
srslte_vec_sc_prod_fff((float*) symbols, -sqrt(2), llr, nsymbols*2);
|
||||
}
|
||||
|
@ -79,6 +99,62 @@ void demod_16qam_lte(const cf_t *symbols, float *llr, int nsymbols) {
|
|||
}
|
||||
}
|
||||
|
||||
void demod_16qam_lte_s(const cf_t *symbols, short *llr, int nsymbols) {
|
||||
#ifndef HAVE_SIMD
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
short yre = (short) (SCALE_SHORT_CONV*crealf(symbols[i]));
|
||||
short yim = (short) (SCALE_SHORT_CONV*cimagf(symbols[i]));
|
||||
|
||||
llr[4*i+0] = -yre;
|
||||
llr[4*i+1] = -yim;
|
||||
llr[4*i+2] = abs(yre)-2*SCALE_SHORT_CONV/sqrt(10);
|
||||
llr[4*i+3] = abs(yim)-2*SCALE_SHORT_CONV/sqrt(10);
|
||||
}
|
||||
#else
|
||||
|
||||
float *symbolsPtr = (float*) symbols;
|
||||
__m128i *resultPtr = (__m128i*) llr;
|
||||
__m128 symbol1, symbol2;
|
||||
__m128i symbol_i1, symbol_i2, symbol_i, symbol_abs;
|
||||
__m128i offset = _mm_set1_epi16(2*SCALE_SHORT_CONV/sqrt(10));
|
||||
__m128i result11, result12, result22, result21;
|
||||
__m128 scale_v = _mm_set1_ps(-SCALE_SHORT_CONV);
|
||||
__m128i shuffle_negated_1 = _mm_set_epi8(0xff,0xff,0xff,0xff,7,6,5,4,0xff,0xff,0xff,0xff,3,2,1,0);
|
||||
__m128i shuffle_negated_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,13,12,0xff,0xff,0xff,0xff,11,10,9,8);
|
||||
__m128i shuffle_abs_1 = _mm_set_epi8(7,6,5,4,0xff,0xff,0xff,0xff,3,2,1,0,0xff,0xff,0xff,0xff);
|
||||
__m128i shuffle_abs_2 = _mm_set_epi8(15,14,13,12,0xff,0xff,0xff,0xff,11,10,9,8,0xff,0xff,0xff,0xff);
|
||||
for (int i=0;i<nsymbols/4;i++) {
|
||||
symbol1 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol2 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol_i1 = _mm_cvtps_epi32(_mm_mul_ps(symbol1, scale_v));
|
||||
symbol_i2 = _mm_cvtps_epi32(_mm_mul_ps(symbol2, scale_v));
|
||||
symbol_i = _mm_packs_epi32(symbol_i1, symbol_i2);
|
||||
|
||||
symbol_abs = _mm_abs_epi16(symbol_i);
|
||||
symbol_abs = _mm_sub_epi16(symbol_abs, offset);
|
||||
|
||||
result11 = _mm_shuffle_epi8(symbol_i, shuffle_negated_1);
|
||||
result12 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_1);
|
||||
|
||||
result21 = _mm_shuffle_epi8(symbol_i, shuffle_negated_2);
|
||||
result22 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_2);
|
||||
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(result11, result12)); resultPtr++;
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(result21, result22)); resultPtr++;
|
||||
}
|
||||
// Demodulate last symbols
|
||||
for (int i=4*(nsymbols/4);i<nsymbols;i++) {
|
||||
short yre = (short) (SCALE_SHORT_CONV*crealf(symbols[i]));
|
||||
short yim = (short) (SCALE_SHORT_CONV*cimagf(symbols[i]));
|
||||
|
||||
llr[4*i+0] = -yre;
|
||||
llr[4*i+1] = -yim;
|
||||
llr[4*i+2] = abs(yre)-2*SCALE_SHORT_CONV/sqrt(10);
|
||||
llr[4*i+3] = abs(yim)-2*SCALE_SHORT_CONV/sqrt(10);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void demod_64qam_lte(const cf_t *symbols, float *llr, int nsymbols)
|
||||
{
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
|
@ -95,6 +171,83 @@ void demod_64qam_lte(const cf_t *symbols, float *llr, int nsymbols)
|
|||
|
||||
}
|
||||
|
||||
void demod_64qam_lte_s(const cf_t *symbols, short *llr, int nsymbols)
|
||||
{
|
||||
#ifndef HAVE_SIMD
|
||||
for (int i=0;i<nsymbols;i++) {
|
||||
float yre = (short) (SCALE_SHORT_CONV*crealf(symbols[i]));
|
||||
float yim = (short) (SCALE_SHORT_CONV*cimagf(symbols[i]));
|
||||
|
||||
llr[6*i+0] = -yre;
|
||||
llr[6*i+1] = -yim;
|
||||
llr[6*i+2] = abs(yre)-4*SCALE_SHORT_CONV/sqrt(42);
|
||||
llr[6*i+3] = abs(yim)-4*SCALE_SHORT_CONV/sqrt(42);
|
||||
llr[6*i+4] = abs(llr[6*i+2])-2*SCALE_SHORT_CONV/sqrt(42);
|
||||
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_SHORT_CONV/sqrt(42);
|
||||
}
|
||||
#else
|
||||
float *symbolsPtr = (float*) symbols;
|
||||
__m128i *resultPtr = (__m128i*) llr;
|
||||
__m128 symbol1, symbol2;
|
||||
__m128i symbol_i1, symbol_i2, symbol_i, symbol_abs, symbol_abs2;
|
||||
__m128i offset1 = _mm_set1_epi16(4*SCALE_SHORT_CONV/sqrt(42));
|
||||
__m128i offset2 = _mm_set1_epi16(2*SCALE_SHORT_CONV/sqrt(42));
|
||||
__m128 scale_v = _mm_set1_ps(-SCALE_SHORT_CONV);
|
||||
__m128i result11, result12, result13, result22, result21,result23, result31, result32, result33;
|
||||
|
||||
__m128i shuffle_negated_1 = _mm_set_epi8(7,6,5,4,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,3,2,1,0);
|
||||
__m128i shuffle_negated_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,11,10,9,8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
__m128i shuffle_negated_3 = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,15,14,13,12,0xff,0xff,0xff,0xff);
|
||||
|
||||
__m128i shuffle_abs_1 = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,3,2,1,0,0xff,0xff,0xff,0xff);
|
||||
__m128i shuffle_abs_2 = _mm_set_epi8(11,10,9,8,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,7,6,5,4);
|
||||
__m128i shuffle_abs_3 = _mm_set_epi8(0xff,0xff,0xff,0xff,15,14,13,12,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
|
||||
__m128i shuffle_abs2_1 = _mm_set_epi8(0xff,0xff,0xff,0xff,3,2,1,0,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff);
|
||||
__m128i shuffle_abs2_2 = _mm_set_epi8(0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,7,6,5,4,0xff,0xff,0xff,0xff);
|
||||
__m128i shuffle_abs2_3 = _mm_set_epi8(15,14,13,12,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,11,10,9,8);
|
||||
|
||||
for (int i=0;i<nsymbols/4;i++) {
|
||||
symbol1 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol2 = _mm_load_ps(symbolsPtr); symbolsPtr+=4;
|
||||
symbol_i1 = _mm_cvtps_epi32(_mm_mul_ps(symbol1, scale_v));
|
||||
symbol_i2 = _mm_cvtps_epi32(_mm_mul_ps(symbol2, scale_v));
|
||||
symbol_i = _mm_packs_epi32(symbol_i1, symbol_i2);
|
||||
|
||||
symbol_abs = _mm_abs_epi16(symbol_i);
|
||||
symbol_abs = _mm_sub_epi16(symbol_abs, offset1);
|
||||
symbol_abs2 = _mm_sub_epi16(_mm_abs_epi16(symbol_abs), offset2);
|
||||
|
||||
result11 = _mm_shuffle_epi8(symbol_i, shuffle_negated_1);
|
||||
result12 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_1);
|
||||
result13 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_1);
|
||||
|
||||
result21 = _mm_shuffle_epi8(symbol_i, shuffle_negated_2);
|
||||
result22 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_2);
|
||||
result23 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_2);
|
||||
|
||||
result31 = _mm_shuffle_epi8(symbol_i, shuffle_negated_3);
|
||||
result32 = _mm_shuffle_epi8(symbol_abs, shuffle_abs_3);
|
||||
result33 = _mm_shuffle_epi8(symbol_abs2, shuffle_abs2_3);
|
||||
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result11, result12),result13)); resultPtr++;
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result21, result22),result23)); resultPtr++;
|
||||
_mm_store_si128(resultPtr, _mm_or_si128(_mm_or_si128(result31, result32),result33)); resultPtr++;
|
||||
}
|
||||
for (int i=4*(nsymbols/4);i<nsymbols;i++) {
|
||||
float yre = (short) (SCALE_SHORT_CONV*crealf(symbols[i]));
|
||||
float yim = (short) (SCALE_SHORT_CONV*cimagf(symbols[i]));
|
||||
|
||||
llr[6*i+0] = -yre;
|
||||
llr[6*i+1] = -yim;
|
||||
llr[6*i+2] = abs(yre)-4*SCALE_SHORT_CONV/sqrt(42);
|
||||
llr[6*i+3] = abs(yim)-4*SCALE_SHORT_CONV/sqrt(42);
|
||||
llr[6*i+4] = abs(llr[6*i+2])-2*SCALE_SHORT_CONV/sqrt(42);
|
||||
llr[6*i+5] = abs(llr[6*i+3])-2*SCALE_SHORT_CONV/sqrt(42);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int srslte_demod_soft_demodulate(srslte_mod_t modulation, const cf_t* symbols, float* llr, int nsymbols) {
|
||||
switch(modulation) {
|
||||
case SRSLTE_MOD_BPSK:
|
||||
|
@ -116,6 +269,27 @@ int srslte_demod_soft_demodulate(srslte_mod_t modulation, const cf_t* symbols, f
|
|||
return 0;
|
||||
}
|
||||
|
||||
int srslte_demod_soft_demodulate_s(srslte_mod_t modulation, const cf_t* symbols, short* llr, int nsymbols) {
|
||||
switch(modulation) {
|
||||
case SRSLTE_MOD_BPSK:
|
||||
demod_bpsk_lte_s(symbols, llr, nsymbols);
|
||||
break;
|
||||
case SRSLTE_MOD_QPSK:
|
||||
demod_qpsk_lte_s(symbols, llr, nsymbols);
|
||||
break;
|
||||
case SRSLTE_MOD_16QAM:
|
||||
demod_16qam_lte_s(symbols, llr, nsymbols);
|
||||
break;
|
||||
case SRSLTE_MOD_64QAM:
|
||||
demod_64qam_lte_s(symbols, llr, nsymbols);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "Invalid modulation %d\n", modulation);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* High-Level API */
|
||||
int srslte_demod_soft_initialize(srslte_demod_soft_hl* hl) {
|
||||
|
|
|
@ -112,6 +112,7 @@ int main(int argc, char **argv) {
|
|||
uint8_t *input, *output;
|
||||
cf_t *symbols;
|
||||
float *llr;
|
||||
short *llr_s;
|
||||
|
||||
parse_args(argc, argv);
|
||||
|
||||
|
@ -125,34 +126,41 @@ int main(int argc, char **argv) {
|
|||
num_bits = mod.nbits_x_symbol * (num_bits / mod.nbits_x_symbol);
|
||||
|
||||
/* allocate buffers */
|
||||
input = malloc(sizeof(uint8_t) * num_bits);
|
||||
input = srslte_vec_malloc(sizeof(uint8_t) * num_bits);
|
||||
if (!input) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
output = malloc(sizeof(uint8_t) * num_bits);
|
||||
output = srslte_vec_malloc(sizeof(uint8_t) * num_bits);
|
||||
if (!output) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
symbols = malloc(sizeof(cf_t) * num_bits / mod.nbits_x_symbol);
|
||||
symbols = srslte_vec_malloc(sizeof(cf_t) * num_bits / mod.nbits_x_symbol);
|
||||
if (!symbols) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
llr = malloc(sizeof(float) * num_bits);
|
||||
llr = srslte_vec_malloc(sizeof(float) * num_bits);
|
||||
if (!llr) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
llr_s = srslte_vec_malloc(sizeof(short) * num_bits);
|
||||
if (!llr_s) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
/* generate random data */
|
||||
srand(0);
|
||||
|
||||
int ret = -1;
|
||||
struct timeval t[3];
|
||||
float mean_texec = 0.0;
|
||||
float mean_texec_s = 0.0;
|
||||
for (int n=0;n<nof_frames;n++) {
|
||||
for (i=0;i<num_bits;i++) {
|
||||
input[i] = rand()%2;
|
||||
|
@ -165,11 +173,20 @@ int main(int argc, char **argv) {
|
|||
srslte_demod_soft_demodulate(modulation, symbols, llr, num_bits / mod.nbits_x_symbol);
|
||||
gettimeofday(&t[2], NULL);
|
||||
get_time_interval(t);
|
||||
|
||||
|
||||
/* compute exponentially averaged execution time */
|
||||
if (n > 0) {
|
||||
mean_texec = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec, n-1);
|
||||
}
|
||||
|
||||
gettimeofday(&t[1], NULL);
|
||||
srslte_demod_soft_demodulate_s(modulation, symbols, llr_s, num_bits / mod.nbits_x_symbol);
|
||||
gettimeofday(&t[2], NULL);
|
||||
get_time_interval(t);
|
||||
|
||||
if (n > 0) {
|
||||
mean_texec_s = SRSLTE_VEC_CMA((float) t[0].tv_usec, mean_texec_s, n-1);
|
||||
}
|
||||
|
||||
if (SRSLTE_VERBOSE_ISDEBUG()) {
|
||||
printf("bits=");
|
||||
|
@ -180,6 +197,10 @@ int main(int argc, char **argv) {
|
|||
|
||||
printf("llr=");
|
||||
srslte_vec_fprint_f(stdout, llr, num_bits);
|
||||
|
||||
printf("llr_s=");
|
||||
srslte_vec_fprint_s(stdout, llr_s, num_bits);
|
||||
|
||||
}
|
||||
|
||||
// Check demodulation errors
|
||||
|
@ -200,6 +221,7 @@ clean_exit:
|
|||
|
||||
srslte_modem_table_free(&mod);
|
||||
|
||||
printf("Mean Throughput: %.2f. Mbps ExTime: %.2f us\n", num_bits/mean_texec, mean_texec);
|
||||
printf("Mean Throughput: %.2f/%.2f. Mbps ExTime: %.2f/%.2f us\n",
|
||||
num_bits/mean_texec, num_bits/mean_texec_s, mean_texec, mean_texec_s);
|
||||
exit(ret);
|
||||
}
|
||||
|
|
|
@ -168,35 +168,31 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l
|
|||
/* Modified from volk_32f_s32f_convert_16i_a_sse2. Removed clipping */
|
||||
void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
|
||||
{
|
||||
unsigned int number = 0;
|
||||
unsigned int number = 0;
|
||||
|
||||
const unsigned int eighthPoints = len / 8;
|
||||
|
||||
const float* inputVectorPtr = (const float*)x;
|
||||
int16_t* outputVectorPtr = z;
|
||||
|
||||
float min_val = -32768;
|
||||
float max_val = 32767;
|
||||
float r;
|
||||
|
||||
__m128 vScalar = _mm_set_ps1(scale);
|
||||
__m128 inputVal1, inputVal2;
|
||||
__m128i intInputVal1, intInputVal2;
|
||||
__m128 ret1, ret2;
|
||||
__m128 vmin_val = _mm_set_ps1(min_val);
|
||||
__m128 vmax_val = _mm_set_ps1(max_val);
|
||||
|
||||
for(;number < eighthPoints; number++){
|
||||
inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
|
||||
inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4;
|
||||
|
||||
// Scale and clip
|
||||
ret1 = _mm_mul_ps(inputVal1, vScalar);
|
||||
ret2 = _mm_mul_ps(inputVal2, vScalar);
|
||||
|
||||
intInputVal1 = _mm_cvtps_epi32(ret1);
|
||||
intInputVal2 = _mm_cvtps_epi32(ret2);
|
||||
|
||||
printf("intinput: "); print128_num(intInputVal1);
|
||||
printf("intinput2: "); print128_num(intInputVal2);
|
||||
|
||||
intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
|
||||
|
||||
_mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
|
||||
|
@ -204,13 +200,8 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
|
|||
}
|
||||
|
||||
number = eighthPoints * 8;
|
||||
for(; number < num_points; number++){
|
||||
r = inputVector[number] * scalar;
|
||||
if(r > max_val)
|
||||
r = max_val;
|
||||
else if(r < min_val)
|
||||
r = min_val;
|
||||
outputVector[number] = (int16_t)rintf(r);
|
||||
for(; number < len; number++){
|
||||
z[number] = (int16_t) (x[number] * scale);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue