From eef3fac8636341a6cbf88d4310dda654d48d6628 Mon Sep 17 00:00:00 2001 From: Xavier Arteaga Date: Wed, 28 Apr 2021 12:44:53 +0200 Subject: [PATCH] Improved float XOR readability --- lib/src/phy/common/sequence.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/lib/src/phy/common/sequence.c b/lib/src/phy/common/sequence.c index 2ec871398..c0a47667a 100644 --- a/lib/src/phy/common/sequence.c +++ b/lib/src/phy/common/sequence.c @@ -211,10 +211,17 @@ void srsran_sequence_state_init(srsran_sequence_state_t* s, uint32_t seed) s->x2 = sequence_get_x2_init(seed); } +#define FLOAT_U32_XOR(DST, SRC, U32_MASK) \ + do { \ + uint32_t temp_u32; \ + memcpy(&temp_u32, &(SRC), 4); \ + temp_u32 ^= (U32_MASK); \ + memcpy(&(DST), &temp_u32, 4); \ + } while (false) + void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float* out, uint32_t length) { - uint32_t i = 0; - const float xor [2] = {+0.0F, -0.0F}; + uint32_t i = 0; if (length >= SEQUENCE_PAR_BITS) { for (; i < length - (SEQUENCE_PAR_BITS - 1); i += SEQUENCE_PAR_BITS) { @@ -246,7 +253,7 @@ void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float* #endif // Finish the parallel bits with generic code for (; j < SEQUENCE_PAR_BITS; j++) { - *((uint32_t*)&out[i + j]) = *((uint32_t*)&value) ^ *((uint32_t*)&xor[(c >> j) & 1U]); + FLOAT_U32_XOR(out[i + j], value, (c << (31U - j)) & 0x80000000); } // Step sequences @@ -256,7 +263,7 @@ void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float* } for (; i < length; i++) { - *((uint32_t*)&out[i]) = *((uint32_t*)&value) ^ *((uint32_t*)&xor[(s->x1 ^ s->x2) & 1U]); + FLOAT_U32_XOR(out[i], value, (s->x1 ^ s->x2) << 31U); // Step sequences s->x1 = sequence_gen_LTE_pr_memless_step_x1(s->x1); @@ -266,8 +273,7 @@ void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float* void srsran_sequence_state_apply_f(srsran_sequence_state_t* s, const float* in, float* out, uint32_t length) { - uint32_t i = 0; - const float xor [2] = {+0.0F, -0.0F}; + uint32_t i = 0; if (length >= SEQUENCE_PAR_BITS) { for (; i < length - (SEQUENCE_PAR_BITS - 1); i += SEQUENCE_PAR_BITS) { @@ -289,17 +295,17 @@ void srsran_sequence_state_apply_f(srsran_sequence_state_t* s, const float* in, mask = _mm_and_si128(mask, (__m128i)_mm_set1_ps(-0.0F)); // Load input - __m128 v = _mm_load_ps(in + i + j); + __m128 v = _mm_loadu_ps(in + i + j); // Loads input and perform sign XOR v = _mm_xor_ps((__m128)mask, v); _mm_storeu_ps(out + i + j, v); } -#endif +#endif // LV_HAVE_SSE // Finish the parallel bits with generic code for (; j < SEQUENCE_PAR_BITS; j++) { - *((uint32_t*)&out[i + j]) = *((uint32_t*)&in[i + j]) ^ *((uint32_t*)&xor[(c >> j) & 1U]); + FLOAT_U32_XOR(out[i + j], in[i + j], (c << (31U - j)) & 0x80000000); } // Step sequences @@ -309,7 +315,7 @@ void srsran_sequence_state_apply_f(srsran_sequence_state_t* s, const float* in, } for (; i < length; i++) { - *((uint32_t*)&out[i]) = *((uint32_t*)&in[i]) ^ *((uint32_t*)&xor[(s->x1 ^ s->x2) & 1U]); + FLOAT_U32_XOR(out[i], in[i], (s->x1 ^ s->x2) << 31U); // Step sequences s->x1 = sequence_gen_LTE_pr_memless_step_x1(s->x1); @@ -744,7 +750,7 @@ void srsran_sequence_apply_packed(const uint8_t* in, uint8_t* out, uint32_t leng out[i] = in[i] ^ reverse_lut[buffer & ((1U << rem8) - 1U) & 255U]; } -#else // SEQUENCE_PAR_BITS % 8 == 0 +#else // SEQUENCE_PAR_BITS % 8 == 0 while (i < (length / 8 - (SEQUENCE_PAR_BITS - 1) / 8)) { uint32_t c = (uint32_t)(x1 ^ x2);