Improved float XOR readability

2021-04-28 12:44:53 +02:00 · 2021-04-28 12:44:53 +02:00 · eef3fac863
parent 751b6858b3
commit eef3fac863
1 changed files with 17 additions and 11 deletions
--- a/lib/src/phy/common/sequence.c
+++ b/lib/src/phy/common/sequence.c
@ -211,10 +211,17 @@ void srsran_sequence_state_init(srsran_sequence_state_t* s, uint32_t seed)
  s->x2 = sequence_get_x2_init(seed);
 }

+#define FLOAT_U32_XOR(DST, SRC, U32_MASK)                                                                              \
+  do {                                                                                                                 \
+    uint32_t temp_u32;                                                                                                 \
+    memcpy(&temp_u32, &(SRC), 4);                                                                                      \
+    temp_u32 ^= (U32_MASK);                                                                                            \
+    memcpy(&(DST), &temp_u32, 4);                                                                                      \
+  } while (false)
+
 void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float* out, uint32_t length)
 {
-  uint32_t i          = 0;
-  const float xor [2] = {+0.0F, -0.0F};
+  uint32_t i = 0;

  if (length >= SEQUENCE_PAR_BITS) {
    for (; i < length - (SEQUENCE_PAR_BITS - 1); i += SEQUENCE_PAR_BITS) {
@ -246,7 +253,7 @@ void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float*
 #endif
      // Finish the parallel bits with generic code
      for (; j < SEQUENCE_PAR_BITS; j++) {
-        *((uint32_t*)&out[i + j]) = *((uint32_t*)&value) ^ *((uint32_t*)&xor[(c >> j) & 1U]);
+        FLOAT_U32_XOR(out[i + j], value, (c << (31U - j)) & 0x80000000);
      }

      // Step sequences
@ -256,7 +263,7 @@ void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float*
  }

  for (; i < length; i++) {
-    *((uint32_t*)&out[i]) = *((uint32_t*)&value) ^ *((uint32_t*)&xor[(s->x1 ^ s->x2) & 1U]);
+    FLOAT_U32_XOR(out[i], value, (s->x1 ^ s->x2) << 31U);

    // Step sequences
    s->x1 = sequence_gen_LTE_pr_memless_step_x1(s->x1);
@ -266,8 +273,7 @@ void srsran_sequence_state_gen_f(srsran_sequence_state_t* s, float value, float*

 void srsran_sequence_state_apply_f(srsran_sequence_state_t* s, const float* in, float* out, uint32_t length)
 {
-  uint32_t i          = 0;
-  const float xor [2] = {+0.0F, -0.0F};
+  uint32_t i = 0;

  if (length >= SEQUENCE_PAR_BITS) {
    for (; i < length - (SEQUENCE_PAR_BITS - 1); i += SEQUENCE_PAR_BITS) {
@ -289,17 +295,17 @@ void srsran_sequence_state_apply_f(srsran_sequence_state_t* s, const float* in,
        mask = _mm_and_si128(mask, (__m128i)_mm_set1_ps(-0.0F));

        // Load input
-        __m128 v = _mm_load_ps(in + i + j);
+        __m128 v = _mm_loadu_ps(in + i + j);

        // Loads input and perform sign XOR
        v = _mm_xor_ps((__m128)mask, v);

        _mm_storeu_ps(out + i + j, v);
      }
-#endif
+#endif // LV_HAVE_SSE
      // Finish the parallel bits with generic code
      for (; j < SEQUENCE_PAR_BITS; j++) {
-        *((uint32_t*)&out[i + j]) = *((uint32_t*)&in[i + j]) ^ *((uint32_t*)&xor[(c >> j) & 1U]);
+        FLOAT_U32_XOR(out[i + j], in[i + j], (c << (31U - j)) & 0x80000000);
      }

      // Step sequences
@ -309,7 +315,7 @@ void srsran_sequence_state_apply_f(srsran_sequence_state_t* s, const float* in,
  }

  for (; i < length; i++) {
-    *((uint32_t*)&out[i]) = *((uint32_t*)&in[i]) ^ *((uint32_t*)&xor[(s->x1 ^ s->x2) & 1U]);
+    FLOAT_U32_XOR(out[i], in[i], (s->x1 ^ s->x2) << 31U);

    // Step sequences
    s->x1 = sequence_gen_LTE_pr_memless_step_x1(s->x1);
@ -744,7 +750,7 @@ void srsran_sequence_apply_packed(const uint8_t* in, uint8_t* out, uint32_t leng

    out[i] = in[i] ^ reverse_lut[buffer & ((1U << rem8) - 1U) & 255U];
  }
-#else  // SEQUENCE_PAR_BITS % 8 == 0
+#else // SEQUENCE_PAR_BITS % 8 == 0
  while (i < (length / 8 - (SEQUENCE_PAR_BITS - 1) / 8)) {
    uint32_t c = (uint32_t)(x1 ^ x2);