diff --git a/lib/src/phy/utils/vector_simd.c b/lib/src/phy/utils/vector_simd.c index 266e0fab5..c14a3a4f2 100644 --- a/lib/src/phy/utils/vector_simd.c +++ b/lib/src/phy/utils/vector_simd.c @@ -1684,21 +1684,21 @@ void srsran_vec_apply_cfo_simd(const cf_t* x, float cfo, cf_t* z, int len) { const float TWOPI = 2.0f * (float)M_PI; int i = 0; + cf_t osc = cexpf(_Complex_I * TWOPI * cfo); + cf_t phase = 1.0f; #if SRSRAN_SIMD_CF_SIZE - srsran_simd_aligned cf_t _osc[SRSRAN_SIMD_CF_SIZE]; + // Load initial phases and oscillator srsran_simd_aligned cf_t _phase[SRSRAN_SIMD_CF_SIZE]; - - if (i < len - SRSRAN_SIMD_CF_SIZE + 1) { - for (int k = 0; k < SRSRAN_SIMD_CF_SIZE; k++) { - _osc[k] = cexpf(_Complex_I * TWOPI * cfo * SRSRAN_SIMD_CF_SIZE); - _phase[k] = cexpf(_Complex_I * TWOPI * cfo * k); - } + _phase[0] = phase; + for (int k = 1; k < SRSRAN_SIMD_CF_SIZE; k++) { + _phase[k] = _phase[k - 1] * osc; } - simd_cf_t _simd_osc = srsran_simd_cfi_load(_osc); + simd_cf_t _simd_osc = srsran_simd_cf_set1(_phase[SRSRAN_SIMD_CF_SIZE - 1] * osc); simd_cf_t _simd_phase = srsran_simd_cfi_load(_phase); if (SRSRAN_IS_ALIGNED(x) && SRSRAN_IS_ALIGNED(z)) { + // For aligned memory for (; i < len - SRSRAN_SIMD_CF_SIZE + 1; i += SRSRAN_SIMD_CF_SIZE) { simd_cf_t a = srsran_simd_cfi_load(&x[i]); @@ -1709,6 +1709,7 @@ void srsran_vec_apply_cfo_simd(const cf_t* x, float cfo, cf_t* z, int len) _simd_phase = srsran_simd_cf_prod(_simd_phase, _simd_osc); } } else { + // For unaligned memory for (; i < len - SRSRAN_SIMD_F_SIZE + 1; i += SRSRAN_SIMD_F_SIZE) { simd_cf_t a = srsran_simd_cfi_loadu(&x[i]); @@ -1719,9 +1720,12 @@ void srsran_vec_apply_cfo_simd(const cf_t* x, float cfo, cf_t* z, int len) _simd_phase = srsran_simd_cf_prod(_simd_phase, _simd_osc); } } + + // Stores the next phase + srsran_simd_cfi_store(_phase, _simd_phase); + phase = _phase[0]; #endif - cf_t osc = cexpf(_Complex_I * TWOPI * cfo); - cf_t phase = cexpf(_Complex_I * TWOPI * cfo * i); + for (; i < len; i++) { z[i] = x[i] * phase;