Vector: added conjugate complex float to short

This commit is contained in:
Xavier Arteaga 2020-09-22 10:15:20 +02:00 committed by Xavier Arteaga
parent 8566cbb079
commit ede58319ae
5 changed files with 104 additions and 2 deletions

View File

@ -135,6 +135,7 @@ SRSLTE_API void srslte_vec_sc_prod_ccc(const cf_t* x, const cf_t h, cf_t* z, con
SRSLTE_API void srslte_vec_sc_prod_fff(const float* x, const float h, float* z, const uint32_t len);
SRSLTE_API void srslte_vec_convert_fi(const float* x, const float scale, int16_t* z, const uint32_t len);
SRSLTE_API void srslte_vec_convert_conj_cs(const cf_t* x, const float scale, int16_t* z, const uint32_t len);
SRSLTE_API void srslte_vec_convert_if(const int16_t* x, const float scale, float* z, const uint32_t len);
SRSLTE_API void srslte_vec_convert_fb(const float* x, const float scale, int8_t* z, const uint32_t len);

View File

@ -120,6 +120,8 @@ SRSLTE_API void srslte_vec_convert_if_simd(const int16_t* x, float* z, const flo
SRSLTE_API void srslte_vec_convert_fi_simd(const float* x, int16_t* z, const float scale, const int len);
SRSLTE_API void srslte_vec_convert_conj_cs_simd(const cf_t* x, int16_t* z, const float scale, const int len);
SRSLTE_API void srslte_vec_convert_fb_simd(const float* x, int8_t* z, const float scale, const int len);
SRSLTE_API void srslte_vec_interleave_simd(const cf_t* x, const cf_t* y, cf_t* z, const int len);

View File

@ -408,6 +408,30 @@ TEST(srslte_vec_convert_fi, MALLOC(float, x); MALLOC(short, z); float scale = 10
free(x);
free(z);)
TEST(
srslte_vec_convert_conj_cs, MALLOC(cf_t, x); int16_t* z = srslte_vec_i16_malloc(block_size * 2);
float scale = 1000.0f;
short gold_re;
short gold_im;
for (int i = 0; i < block_size; i++) { x[i] = (float)RANDOM_CF(); }
TEST_CALL(srslte_vec_convert_conj_cs(x, scale, z, block_size))
for (int i = 0; i < block_size; i++) {
gold_re = (short)(crealf(x[i]) * scale);
gold_im = (short)(cimagf(-x[i]) * scale);
cf_t t1 = (float)gold_re + I * (float)gold_im;
cf_t t2 = (float)z[2 * i] + I * (float)z[2 * i + 1];
double err = cabsf(t1 - t2);
if (err > mse) {
mse = err;
}
}
free(x);
free(z);)
TEST(srslte_vec_convert_if, MALLOC(int16_t, x); MALLOC(float, z); float scale = 1000.0f;
float gold;
@ -591,6 +615,22 @@ TEST(
free(y);
free(z);)
TEST(
srslte_vec_conj_cc, MALLOC(cf_t, x); MALLOC(cf_t, z);
cf_t gold;
for (int i = 0; i < block_size; i++) { x[i] = RANDOM_CF(); }
TEST_CALL(srslte_vec_conj_cc(x, z, block_size))
for (int i = 0; i < block_size; i++) {
gold = conjf(x[i]);
mse += cabsf(gold - z[i]);
}
free(x);
free(z);)
TEST(
srslte_vec_max_fi, MALLOC(float, x);
@ -802,6 +842,10 @@ int main(int argc, char** argv)
test_srslte_vec_convert_fi(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] =
test_srslte_vec_convert_conj_cs(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] =
test_srslte_vec_convert_if(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
@ -858,6 +902,10 @@ int main(int argc, char** argv)
test_srslte_vec_div_fff(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] =
test_srslte_vec_conj_cc(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;
passed[func_count][size_count] =
test_srslte_vec_max_fi(func_names[func_count], &timmings[func_count][size_count], block_size);
func_count++;

View File

@ -113,6 +113,11 @@ void srslte_vec_convert_fi(const float* x, const float scale, int16_t* z, const
srslte_vec_convert_fi_simd(x, z, scale, len);
}
void srslte_vec_convert_conj_cs(const cf_t* x, const float scale, int16_t* z, const uint32_t len)
{
srslte_vec_convert_conj_cs_simd(x, z, scale, len);
}
void srslte_vec_convert_fb(const float* x, const float scale, int8_t* z, const uint32_t len)
{
srslte_vec_convert_fb_simd(x, z, scale, len);

View File

@ -471,6 +471,52 @@ void srslte_vec_convert_fi_simd(const float* x, int16_t* z, const float scale, c
}
}
void srslte_vec_convert_conj_cs_simd(const cf_t* x_, int16_t* z, const float scale, const int len_)
{
int i = 0;
const float* x = (float*)x_;
const int len = len_ * 2;
#if SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE
srslte_simd_aligned float scale_v[SRSLTE_SIMD_F_SIZE];
for (uint32_t j = 0; j < SRSLTE_SIMD_F_SIZE; j++) {
scale_v[j] = (j % 2 == 0) ? +scale : -scale;
}
simd_f_t s = srslte_simd_f_load(scale_v);
if (SRSLTE_IS_ALIGNED(x) && SRSLTE_IS_ALIGNED(z)) {
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
simd_f_t a = srslte_simd_f_load(&x[i]);
simd_f_t b = srslte_simd_f_load(&x[i + SRSLTE_SIMD_F_SIZE]);
simd_f_t sa = srslte_simd_f_mul(a, s);
simd_f_t sb = srslte_simd_f_mul(b, s);
simd_s_t i16 = srslte_simd_convert_2f_s(sa, sb);
srslte_simd_s_store(&z[i], i16);
}
} else {
for (; i < len - SRSLTE_SIMD_S_SIZE + 1; i += SRSLTE_SIMD_S_SIZE) {
simd_f_t a = srslte_simd_f_loadu(&x[i]);
simd_f_t b = srslte_simd_f_loadu(&x[i + SRSLTE_SIMD_F_SIZE]);
simd_f_t sa = srslte_simd_f_mul(a, s);
simd_f_t sb = srslte_simd_f_mul(b, s);
simd_s_t i16 = srslte_simd_convert_2f_s(sa, sb);
srslte_simd_s_storeu(&z[i], i16);
}
}
#endif /* SRSLTE_SIMD_F_SIZE && SRSLTE_SIMD_S_SIZE */
for (; i < len; i++) {
z[i] = (int16_t)(conjf(x[i]) * scale);
}
}
#define SRSLTE_IS_ALIGNED_SSE(PTR) (((size_t)(PTR)&0x0F) == 0)
void srslte_vec_convert_fb_simd(const float* x, int8_t* z, const float scale, const int len)
@ -1691,8 +1737,8 @@ float srslte_vec_estimate_frequency_simd(const cf_t* x, int len)
}
// Accumulate using horizontal addition
simd_f_t _sum_re = srslte_simd_cf_re(_sum);
simd_f_t _sum_im = srslte_simd_cf_im(_sum);
simd_f_t _sum_re = srslte_simd_cf_re(_sum);
simd_f_t _sum_im = srslte_simd_cf_im(_sum);
simd_f_t _sum_re_im = srslte_simd_f_hadd(_sum_re, _sum_im);
for (int j = 2; j < SRSLTE_SIMD_F_SIZE; j *= 2) {
_sum_re_im = srslte_simd_f_hadd(_sum_re_im, _sum_re_im);