Removed redundant code

This commit is contained in:
Xavier Arteaga 2020-04-15 12:57:57 +02:00 committed by Andre Puschmann
parent 01e224348c
commit 60c4daccab
2 changed files with 52 additions and 89 deletions

View File

@ -74,29 +74,6 @@ SRSLTE_API __m128 srslte_mat_cf_recip_sse(__m128 a);
/* SSE implementation for 2x2 determinant */ /* SSE implementation for 2x2 determinant */
SRSLTE_API __m128 srslte_mat_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11); SRSLTE_API __m128 srslte_mat_2x2_det_sse(__m128 a00, __m128 a01, __m128 a10, __m128 a11);
/* SSE implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_mat_2x2_zf_sse(__m128 y0,
__m128 y1,
__m128 h00,
__m128 h01,
__m128 h10,
__m128 h11,
__m128* x0,
__m128* x1,
float norm);
/* SSE implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_mat_2x2_mmse_sse(__m128 y0,
__m128 y1,
__m128 h00,
__m128 h01,
__m128 h10,
__m128 h11,
__m128* x0,
__m128* x1,
float noise_estimate,
float norm);
#endif /* LV_HAVE_SSE */ #endif /* LV_HAVE_SSE */
#ifdef LV_HAVE_AVX #ifdef LV_HAVE_AVX
@ -107,29 +84,6 @@ SRSLTE_API __m256 srslte_mat_cf_recip_avx(__m256 a);
/* AVX implementation for 2x2 determinant */ /* AVX implementation for 2x2 determinant */
SRSLTE_API __m256 srslte_mat_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11); SRSLTE_API __m256 srslte_mat_2x2_det_avx(__m256 a00, __m256 a01, __m256 a10, __m256 a11);
/* AVX implementation for Zero Forcing (ZF) solver */
SRSLTE_API void srslte_mat_2x2_zf_avx(__m256 y0,
__m256 y1,
__m256 h00,
__m256 h01,
__m256 h10,
__m256 h11,
__m256* x0,
__m256* x1,
float norm);
/* AVX implementation for Minimum Mean Squared Error (MMSE) solver */
SRSLTE_API void srslte_mat_2x2_mmse_avx(__m256 y0,
__m256 y1,
__m256 h00,
__m256 h01,
__m256 h10,
__m256 h11,
__m256* x0,
__m256* x1,
float noise_estimate,
float norm);
#endif /* LV_HAVE_AVX */ #endif /* LV_HAVE_AVX */
#if SRSLTE_SIMD_CF_SIZE != 0 #if SRSLTE_SIMD_CF_SIZE != 0

View File

@ -34,7 +34,11 @@
static bool zf_solver = false; static bool zf_solver = false;
static bool mmse_solver = false; static bool mmse_solver = false;
static bool verbose = false; static bool verbose = false;
static srslte_random_t random_gen = NULL; static srslte_random_t random_gen = NULL;
#define MAXIMUM_ERROR (1e-6f)
#define RANDOM_F() srslte_random_uniform_real_dist(random_gen, -1.0f, +1.0f)
#define RANDOM_CF() srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f)
double elapsed_us(struct timeval* ts_start, struct timeval* ts_end) double elapsed_us(struct timeval* ts_start, struct timeval* ts_end)
{ {
@ -47,7 +51,7 @@ double elapsed_us(struct timeval* ts_start, struct timeval* ts_end)
} }
} }
#define BLOCK_SIZE 100 #define BLOCK_SIZE 1000
#define RUN_TEST(FUNCTION) /*TYPE NAME (void)*/ \ #define RUN_TEST(FUNCTION) /*TYPE NAME (void)*/ \
do { \ do { \
int i; \ int i; \
@ -97,16 +101,16 @@ void parse_args(int argc, char** argv)
} }
} }
bool test_zf_solver_gen(void) static bool test_zf_solver_gen(void)
{ {
cf_t x0, x1, cf_error0, cf_error1; cf_t x0, x1, cf_error0, cf_error1;
float error; float error;
cf_t x0_gold = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t x0_gold = RANDOM_CF();
cf_t x1_gold = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t x1_gold = RANDOM_CF();
cf_t h00 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h00 = RANDOM_CF();
cf_t h01 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h01 = RANDOM_CF();
cf_t h10 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h10 = RANDOM_CF();
cf_t h11 = (1 - h01 * h10) / h00; cf_t h11 = (1 - h01 * h10) / h00;
cf_t y0 = x0_gold * h00 + x1_gold * h01; cf_t y0 = x0_gold * h00 + x1_gold * h01;
cf_t y1 = x0_gold * h10 + x1_gold * h11; cf_t y1 = x0_gold * h10 + x1_gold * h11;
@ -118,20 +122,20 @@ bool test_zf_solver_gen(void)
error = crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) + error = crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) +
crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1); crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1);
return (error < 1e-6); return (error < MAXIMUM_ERROR);
} }
bool test_mmse_solver_gen(void) static bool test_mmse_solver_gen(void)
{ {
cf_t x0, x1, cf_error0, cf_error1; cf_t x0, x1, cf_error0, cf_error1;
float error; float error;
cf_t x0_gold = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t x0_gold = RANDOM_CF();
cf_t x1_gold = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t x1_gold = RANDOM_CF();
cf_t h00 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h00 = RANDOM_CF();
cf_t h01 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h01 = RANDOM_CF();
cf_t h10 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h10 = RANDOM_CF();
cf_t h11 = (1 - h01 * h10) / h00; cf_t h11 = (1 - h01 * h10) * conjf(h00);
cf_t y0 = x0_gold * h00 + x1_gold * h01; cf_t y0 = x0_gold * h00 + x1_gold * h01;
cf_t y1 = x0_gold * h10 + x1_gold * h11; cf_t y1 = x0_gold * h10 + x1_gold * h11;
@ -142,22 +146,22 @@ bool test_mmse_solver_gen(void)
error = crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) + error = crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) +
crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1); crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1);
return (error < 1e-6); return (error < MAXIMUM_ERROR);
} }
#if SRSLTE_SIMD_CF_SIZE != 0 #if SRSLTE_SIMD_CF_SIZE != 0
bool test_zf_solver_simd(void) static bool test_zf_solver_simd(void)
{ {
cf_t cf_error0, cf_error1; cf_t cf_error0, cf_error1;
float error = 0.0f; float error = 0.0f;
cf_t x0_gold_1 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t x0_gold_1 = RANDOM_CF();
cf_t x1_gold_1 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t x1_gold_1 = RANDOM_CF();
cf_t h00_1 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h00_1 = RANDOM_CF();
cf_t h01_1 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h01_1 = RANDOM_CF();
cf_t h10_1 = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); cf_t h10_1 = RANDOM_CF();
cf_t h11_1 = (1 - h01_1 * h10_1) / h00_1; cf_t h11_1 = (1 - h01_1 * h10_1) * conjf(h00_1);
cf_t y0_1 = x0_gold_1 * h00_1 + x1_gold_1 * h01_1; cf_t y0_1 = x0_gold_1 * h00_1 + x1_gold_1 * h01_1;
cf_t y1_1 = x0_gold_1 * h10_1 + x1_gold_1 * h11_1; cf_t y1_1 = x0_gold_1 * h10_1 + x1_gold_1 * h11_1;
@ -184,10 +188,10 @@ bool test_zf_solver_simd(void)
error += crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) + error += crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) +
crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1); crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1);
return (error < 1e-3); return (error < MAXIMUM_ERROR);
} }
bool test_mmse_solver_simd(void) static bool test_mmse_solver_simd(void)
{ {
cf_t cf_error0, cf_error1; cf_t cf_error0, cf_error1;
float error = 0.0f; float error = 0.0f;
@ -200,13 +204,14 @@ bool test_mmse_solver_simd(void)
cf_t h11[SRSLTE_SIMD_CF_SIZE]; cf_t h11[SRSLTE_SIMD_CF_SIZE];
cf_t y0[SRSLTE_SIMD_CF_SIZE]; cf_t y0[SRSLTE_SIMD_CF_SIZE];
cf_t y1[SRSLTE_SIMD_CF_SIZE]; cf_t y1[SRSLTE_SIMD_CF_SIZE];
for (int i = 0; i < SRSLTE_SIMD_CF_SIZE; i++) { for (int i = 0; i < SRSLTE_SIMD_CF_SIZE; i++) {
x0_gold[i] = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); x0_gold[i] = RANDOM_CF();
x1_gold[i] = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); x1_gold[i] = RANDOM_CF();
h00[i] = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); h00[i] = RANDOM_CF();
h01[i] = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); h01[i] = RANDOM_CF();
h10[i] = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); h10[i] = RANDOM_CF();
h11[i] = (1 - h01[i] * h10[i]) / h00[i]; h11[i] = (1 - h01[i] * h10[i]) * conjf(h00[i]);
y0[i] = x0_gold[i] * h00[i] + x1_gold[i] * h01[i]; y0[i] = x0_gold[i] * h00[i] + x1_gold[i] * h01[i];
y1[i] = x0_gold[i] * h10[i] + x1_gold[i] * h11[i]; y1[i] = x0_gold[i] * h10[i] + x1_gold[i] * h11[i];
} }
@ -231,23 +236,26 @@ bool test_mmse_solver_simd(void)
cf_error0 = x0[1] - x0_gold[1]; cf_error0 = x0[1] - x0_gold[1];
cf_error1 = x1[1] - x1_gold[1]; cf_error1 = x1[1] - x1_gold[1];
error += crealf(cf_error0) * crealf(cf_error0) + cimagf(cf_error0) * cimagf(cf_error0) + error += __real__ cf_error0 * __real__ cf_error0;
crealf(cf_error1) * crealf(cf_error1) + cimagf(cf_error1) * cimagf(cf_error1); error += __imag__ cf_error0 * __imag__ cf_error0;
error += __real__ cf_error1 * __real__ cf_error1;
error += __imag__ cf_error1 * __imag__ cf_error1;
error /= 2.0f;
return (error < 1e-3); return (error < MAXIMUM_ERROR);
} }
#endif /* SRSLTE_SIMD_CF_SIZE != 0 */ #endif /* SRSLTE_SIMD_CF_SIZE != 0 */
bool test_vec_dot_prod_ccc(void) static bool test_vec_dot_prod_ccc(void)
{ {
__attribute__((aligned(256))) cf_t a[14]; __attribute__((aligned(256))) cf_t a[14];
__attribute__((aligned(256))) cf_t b[14]; __attribute__((aligned(256))) cf_t b[14];
cf_t res = 0, gold = 0; cf_t res = 0, gold = 0;
for (int i = 0; i < 14; i++) { for (int i = 0; i < 14; i++) {
a[i] = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); a[i] = RANDOM_CF();
b[i] = srslte_random_uniform_complex_dist(random_gen, -1.0f, +1.0f); b[i] = RANDOM_CF();
} }
res = srslte_vec_dot_prod_ccc(a, b, 14); res = srslte_vec_dot_prod_ccc(a, b, 14);
@ -256,8 +264,7 @@ bool test_vec_dot_prod_ccc(void)
gold += a[i] * b[i]; gold += a[i] * b[i];
} }
float err = cabsf(res - gold); return (cabsf(res - gold) < MAXIMUM_ERROR);
return (err < 1e-3);
} }
int main(int argc, char** argv) int main(int argc, char** argv)
@ -272,7 +279,7 @@ int main(int argc, char** argv)
if (zf_solver) { if (zf_solver) {
RUN_TEST(test_zf_solver_gen); RUN_TEST(test_zf_solver_gen);
#if SRSLTE_SIMD_CF_SIZE #if SRSLTE_SIMD_CF_SIZE != 0
RUN_TEST(test_zf_solver_simd); RUN_TEST(test_zf_solver_simd);
#endif /* SRSLTE_SIMD_CF_SIZE != 0*/ #endif /* SRSLTE_SIMD_CF_SIZE != 0*/
} }
@ -280,7 +287,7 @@ int main(int argc, char** argv)
if (mmse_solver) { if (mmse_solver) {
RUN_TEST(test_mmse_solver_gen); RUN_TEST(test_mmse_solver_gen);
#if SRSLTE_SIMD_CF_SIZE #if SRSLTE_SIMD_CF_SIZE != 0
RUN_TEST(test_mmse_solver_simd); RUN_TEST(test_mmse_solver_simd);
#endif /* SRSLTE_SIMD_CF_SIZE != 0*/ #endif /* SRSLTE_SIMD_CF_SIZE != 0*/
} }
@ -289,9 +296,11 @@ int main(int argc, char** argv)
printf("%s!\n", (passed) ? "Ok" : "Failed"); printf("%s!\n", (passed) ? "Ok" : "Failed");
srslte_random_free(random_gen);
if (!passed) { if (!passed) {
ret = SRSLTE_ERROR; ret = SRSLTE_ERROR;
} }
exit(ret); return ret;
} }