diff --git a/lib/include/srslte/phy/ch_estimation/chest_dl.h b/lib/include/srslte/phy/ch_estimation/chest_dl.h index 0bd6e9afe..c1f5579a1 100644 --- a/lib/include/srslte/phy/ch_estimation/chest_dl.h +++ b/lib/include/srslte/phy/ch_estimation/chest_dl.h @@ -79,6 +79,7 @@ typedef struct { srslte_interp_linsrslte_vec_t srslte_interp_linvec; srslte_interp_lin_t srslte_interp_lin; + srslte_interp_lin_t srslte_interp_lin_3; srslte_interp_lin_t srslte_interp_lin_mbsfn; float rssi[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS]; float rsrp[SRSLTE_MAX_PORTS][SRSLTE_MAX_PORTS]; diff --git a/lib/include/srslte/phy/utils/vector.h b/lib/include/srslte/phy/utils/vector.h index a4028e495..210d3eef0 100644 --- a/lib/include/srslte/phy/utils/vector.h +++ b/lib/include/srslte/phy/utils/vector.h @@ -150,6 +150,10 @@ SRSLTE_API void srslte_vec_abs_square_cf(const cf_t *x, float *abs_square, const /* Copy 256 bit aligned vector */ SRSLTE_API void srs_vec_cf_cpy(const cf_t *src, cf_t *dst, const int len); +SRSLTE_API void srslte_vec_interleave(const cf_t *x, const cf_t *y, cf_t *z, const int len); + +SRSLTE_API void srslte_vec_interleave_add(const cf_t *x, const cf_t *y, cf_t *z, const int len); + #ifdef __cplusplus } #endif diff --git a/lib/include/srslte/phy/utils/vector_simd.h b/lib/include/srslte/phy/utils/vector_simd.h index 54ac55f98..31725edb3 100644 --- a/lib/include/srslte/phy/utils/vector_simd.h +++ b/lib/include/srslte/phy/utils/vector_simd.h @@ -122,6 +122,9 @@ SRSLTE_API void srslte_vec_convert_fi_simd(const float *x, int16_t *z, const flo SRSLTE_API void srslte_vec_cp_simd(const cf_t *src, cf_t *dst, int len); +SRSLTE_API void srslte_vec_interleave_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len); + +SRSLTE_API void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len); /* SIMD Find Max functions */ SRSLTE_API uint32_t srslte_vec_max_fi_simd(const float *x, const int len); diff --git a/lib/src/phy/ch_estimation/chest_dl.c b/lib/src/phy/ch_estimation/chest_dl.c index 113870cc9..f26c0d7f4 100644 --- a/lib/src/phy/ch_estimation/chest_dl.c +++ b/lib/src/phy/ch_estimation/chest_dl.c @@ -141,6 +141,11 @@ int srslte_chest_dl_init(srslte_chest_dl_t *q, uint32_t max_prb) goto clean_exit; } + if (srslte_interp_linear_init(&q->srslte_interp_lin_3, 4*max_prb, SRSLTE_NRE/4)) { + fprintf(stderr, "Error initializing interpolator\n"); + goto clean_exit; + } + if (srslte_interp_linear_init(&q->srslte_interp_lin_mbsfn, 6*max_prb, SRSLTE_NRE/6)) { fprintf(stderr, "Error initializing interpolator\n"); goto clean_exit; @@ -185,6 +190,7 @@ void srslte_chest_dl_free(srslte_chest_dl_t *q) } srslte_interp_linear_vector_free(&q->srslte_interp_linvec); srslte_interp_linear_free(&q->srslte_interp_lin); + srslte_interp_linear_free(&q->srslte_interp_lin_3); srslte_interp_linear_free(&q->srslte_interp_lin_mbsfn); if (q->pilot_estimates) { free(q->pilot_estimates); @@ -238,6 +244,11 @@ int srslte_chest_dl_set_cell(srslte_chest_dl_t *q, srslte_cell_t cell) return SRSLTE_ERROR; } + if (srslte_interp_linear_resize(&q->srslte_interp_lin_3, 4 * q->cell.nof_prb, SRSLTE_NRE / 4)) { + fprintf(stderr, "Error initializing interpolator\n"); + return SRSLTE_ERROR; + } + } ret = SRSLTE_SUCCESS; } @@ -337,10 +348,18 @@ static void interpolate_pilots(srslte_chest_dl_t *q, cf_t *pilot_estimates, cf_t fidx_offset, SRSLTE_NRE/6-fidx_offset); } } else { - fidx_offset = srslte_refsignal_cs_fidx(q->cell, l, port_id, 0); - srslte_interp_linear_offset(&q->srslte_interp_lin, &pilot_estimates[2*q->cell.nof_prb*l], - &ce[srslte_refsignal_cs_nsymbol(l,q->cell.cp, port_id) * q->cell.nof_prb * SRSLTE_NRE], - fidx_offset, SRSLTE_NRE/2-fidx_offset); + if (q->average_subframe) { + fidx_offset = SRSLTE_MIN(srslte_refsignal_cs_fidx(q->cell, 0, port_id, 0), + srslte_refsignal_cs_fidx(q->cell, 1, port_id, 0)); + srslte_interp_linear_offset(&q->srslte_interp_lin_3, &pilot_estimates[q->cell.nof_prb * l], + &ce[srslte_refsignal_cs_nsymbol(l, q->cell.cp, port_id) * q->cell.nof_prb + * SRSLTE_NRE], fidx_offset, SRSLTE_NRE / 4 - fidx_offset); + } else { + fidx_offset = srslte_refsignal_cs_fidx(q->cell, l, port_id, 0); + srslte_interp_linear_offset(&q->srslte_interp_lin, &pilot_estimates[2 * q->cell.nof_prb * l], + &ce[srslte_refsignal_cs_nsymbol(l, q->cell.cp, port_id) * q->cell.nof_prb + * SRSLTE_NRE], fidx_offset, SRSLTE_NRE / 2 - fidx_offset); + } } } @@ -417,11 +436,31 @@ static void average_pilots(srslte_chest_dl_t *q, cf_t *input, cf_t *output, uint // Average in the time domain if enabled if (q->average_subframe) { - for (int l=1;lcell, 0, port_id, 0) < 3) { + srslte_vec_interleave(input, &input[nref], temp, nref); + for (int l = 2; l < nsymbols - 1; l += 2) { + srslte_vec_interleave_add(&input[l * nref], &input[(l + 1) * nref], temp, nref); + } + } else { + srslte_vec_interleave(&input[nref], input, temp, nref); + for (int l = 2; l < nsymbols - 1; l += 2) { + srslte_vec_interleave_add(&input[(l + 1) * nref], &input[l * nref], temp, nref); + } + } + srslte_vec_sc_prod_cfc(temp, 2.0f / (float) nsymbols, temp, 2 * nref); + srslte_conv_same_cf(temp, q->smooth_filter, output, 2 * nref, q->smooth_filter_len); + + return; } - srslte_vec_sc_prod_cfc(input, 1.0/((float) nsymbols), input, nref); - nsymbols = 1; } // Average in the frequency domain diff --git a/lib/src/phy/utils/vector.c b/lib/src/phy/utils/vector.c index 35457fcb5..6f5bc48e5 100644 --- a/lib/src/phy/utils/vector.c +++ b/lib/src/phy/utils/vector.c @@ -394,3 +394,11 @@ void srslte_vec_quant_suc(const int16_t *in, uint8_t *out, const float gain, con void srs_vec_cf_cpy(const cf_t *dst, cf_t *src, int len) { srslte_vec_cp_simd(dst, src, len); } + +void srslte_vec_interleave(const cf_t *x, const cf_t *y, cf_t *z, const int len) { + srslte_vec_interleave_simd(x, y, z, len); +} + +void srslte_vec_interleave_add(const cf_t *x, const cf_t *y, cf_t *z, const int len) { + srslte_vec_interleave_add_simd(x, y, z, len); +} \ No newline at end of file diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 78286a81a..6bf8ddc1d 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -1131,3 +1131,89 @@ uint32_t srslte_vec_max_ci_simd(const cf_t *x, const int len) { return max_index; } + +void srslte_vec_interleave_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len) { + uint32_t i = 0, k = 0; + +#ifdef LV_HAVE_SSE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - 2 + 1; i += 2) { + __m128i a = _mm_load_si128((__m128i *) &x[i]); + __m128i b = _mm_load_si128((__m128i *) &y[i]); + + __m128i r1 = _mm_unpacklo_epi64(a, b); + _mm_store_si128((__m128i *) &z[k], r1); + k += 2; + + __m128i r2 = _mm_unpackhi_epi64(a, b); + _mm_store_si128((__m128i *) &z[k], r2); + k += 2; + } + } else { + for (; i < len - 2 + 1; i += 2) { + __m128i a = _mm_loadu_si128((__m128i *) &x[i]); + __m128i b = _mm_loadu_si128((__m128i *) &y[i]); + + __m128i r1 = _mm_unpacklo_epi64(a, b); + _mm_storeu_si128((__m128i *) &z[k], r1); + k += 2; + + __m128i r2 = _mm_unpackhi_epi64(a, b); + _mm_storeu_si128((__m128i *) &z[k], r2); + k += 2; + } + } +#endif /* LV_HAVE_SSE */ + + for (;i < len; i++) { + z[k++] = x[i]; + z[k++] = y[i]; + } +} + +void srslte_vec_interleave_add_simd(const cf_t *x, const cf_t *y, cf_t *z, const int len) { + uint32_t i = 0, k = 0; + +#ifdef LV_HAVE_SSE + if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(y) && SRSLTE_IS_ALIGNED(z)) { + for (; i < len - 2 + 1; i += 2) { + __m128i a = _mm_load_si128((__m128i *) &x[i]); + __m128i b = _mm_load_si128((__m128i *) &y[i]); + + __m128 r1 = (__m128) _mm_unpacklo_epi64(a, b); + __m128 z1 = _mm_load_ps((float *) &z[k]); + r1 = _mm_add_ps((__m128) r1, z1); + _mm_store_ps((float *) &z[k], r1); + k += 2; + + __m128 r2 = (__m128) _mm_unpackhi_epi64(a, b); + __m128 z2 = _mm_load_ps((float *) &z[k]); + r2 = _mm_add_ps((__m128) r2, z2); + _mm_store_ps((float *) &z[k], r2); + k += 2; + } + } else { + for (; i < len - 2 + 1; i += 2) { + __m128i a = _mm_loadu_si128((__m128i *) &x[i]); + __m128i b = _mm_loadu_si128((__m128i *) &y[i]); + + __m128 r1 = (__m128) _mm_unpacklo_epi64(a, b); + __m128 z1 = _mm_loadu_ps((float *) &z[k]); + r1 = _mm_add_ps((__m128) r1, z1); + _mm_storeu_ps((float *) &z[k], r1); + k += 2; + + __m128 r2 = (__m128) _mm_unpackhi_epi64(a, b); + __m128 z2 = _mm_loadu_ps((float *) &z[k]); + r2 = _mm_add_ps((__m128) r2, z2); + _mm_storeu_ps((float *) &z[k], r2); + k += 2; + } + } +#endif /* LV_HAVE_SSE */ + + for (;i < len; i++) { + z[k++] += x[i]; + z[k++] += y[i]; + } +} \ No newline at end of file