mirror of https://github.com/PentHertz/srsLTE.git
Added missing AVX512 intrinsics and flags. Fixes #291.
This commit is contained in:
parent
05abdc9516
commit
8c3a0153b9
|
@ -306,8 +306,8 @@ if(CMAKE_C_COMPILER_ID MATCHES "GNU" OR CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||||
endif (HAVE_FMA)
|
endif (HAVE_FMA)
|
||||||
|
|
||||||
if (HAVE_AVX512)
|
if (HAVE_AVX512)
|
||||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512")
|
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mavx512f -mavx512cd -mavx512bw -mavx512dq -DLV_HAVE_AVX512")
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -DLV_HAVE_AVX512")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mavx512cd -mavx512bw -mavx512dq -DLV_HAVE_AVX512")
|
||||||
endif(HAVE_AVX512)
|
endif(HAVE_AVX512)
|
||||||
|
|
||||||
if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
if(NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
|
||||||
|
|
|
@ -142,7 +142,7 @@ if (ENABLE_SSE)
|
||||||
# Check compiler for AVX intrinsics
|
# Check compiler for AVX intrinsics
|
||||||
#
|
#
|
||||||
if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
if (CMAKE_COMPILER_IS_GNUCC OR (CMAKE_C_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang"))
|
||||||
set(CMAKE_REQUIRED_FLAGS "-mavx512f")
|
set(CMAKE_REQUIRED_FLAGS "-mavx512f -mavx512cd -mavx512bw -mavx512dq -DLV_HAVE_AVX512")
|
||||||
check_c_source_runs("
|
check_c_source_runs("
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
int main()
|
int main()
|
||||||
|
|
|
@ -1341,7 +1341,13 @@ static inline simd_s_t srslte_simd_s_mul(simd_s_t a, simd_s_t b) {
|
||||||
|
|
||||||
static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) {
|
static inline simd_s_t srslte_simd_s_neg(simd_s_t a, simd_s_t b) {
|
||||||
#ifdef LV_HAVE_AVX512
|
#ifdef LV_HAVE_AVX512
|
||||||
#error sign instruction not available in avx512
|
__m256i a0 = _mm512_extracti64x4_epi64(a, 0);
|
||||||
|
__m256i a1 = _mm512_extracti64x4_epi64(a, 1);
|
||||||
|
__m256i b0 = _mm512_extracti64x4_epi64(b, 0);
|
||||||
|
__m256i b1 = _mm512_extracti64x4_epi64(b, 1);
|
||||||
|
__m256i r0 = _mm256_sign_epi16(a0, b0);
|
||||||
|
__m256i r1 = _mm256_sign_epi16(a1, b1);
|
||||||
|
return _mm512_inserti64x4(_mm512_broadcast_i64x4(r0), r1, 1);
|
||||||
#else /* LV_HAVE_AVX512 */
|
#else /* LV_HAVE_AVX512 */
|
||||||
#ifdef LV_HAVE_AVX2
|
#ifdef LV_HAVE_AVX2
|
||||||
return _mm256_sign_epi16(a, b);
|
return _mm256_sign_epi16(a, b);
|
||||||
|
@ -1814,7 +1820,13 @@ static inline simd_s_t srslte_simd_b_sub(simd_s_t a, simd_s_t b) {
|
||||||
|
|
||||||
static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) {
|
static inline simd_s_t srslte_simd_b_neg(simd_b_t a, simd_b_t b) {
|
||||||
#ifdef LV_HAVE_AVX512
|
#ifdef LV_HAVE_AVX512
|
||||||
#error sign instruction not available in avx512
|
__m256i a0 = _mm512_extracti64x4_epi64(a, 0);
|
||||||
|
__m256i a1 = _mm512_extracti64x4_epi64(a, 1);
|
||||||
|
__m256i b0 = _mm512_extracti64x4_epi64(b, 0);
|
||||||
|
__m256i b1 = _mm512_extracti64x4_epi64(b, 1);
|
||||||
|
__m256i r0 = _mm256_sign_epi8(a0, b0);
|
||||||
|
__m256i r1 = _mm256_sign_epi8(a1, b1);
|
||||||
|
return _mm512_inserti64x4(_mm512_broadcast_i64x4(r0), r1, 1);
|
||||||
#else /* LV_HAVE_AVX512 */
|
#else /* LV_HAVE_AVX512 */
|
||||||
#ifdef LV_HAVE_AVX2
|
#ifdef LV_HAVE_AVX2
|
||||||
return _mm256_sign_epi8(a, b);
|
return _mm256_sign_epi8(a, b);
|
||||||
|
|
|
@ -226,6 +226,29 @@ TEST(srslte_vec_prod_sss,
|
||||||
free(z);
|
free(z);
|
||||||
)
|
)
|
||||||
|
|
||||||
|
TEST(srslte_vec_neg_sss,
|
||||||
|
MALLOC(int16_t, x);
|
||||||
|
MALLOC(int16_t, y);
|
||||||
|
MALLOC(int16_t, z);
|
||||||
|
|
||||||
|
int16_t gold = 0.0f;
|
||||||
|
for (int i = 0; i < block_size; i++) {
|
||||||
|
x[i] = RANDOM_S();
|
||||||
|
do { y[i] = RANDOM_S(); } while (!y[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CALL(srslte_vec_neg_sss(x, y, z, block_size))
|
||||||
|
|
||||||
|
for (int i = 0; i < block_size; i++) {
|
||||||
|
gold = y[i] < 0 ? -x[i] : x[i];
|
||||||
|
mse += abs(gold - z[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(x);
|
||||||
|
free(y);
|
||||||
|
free(z);
|
||||||
|
)
|
||||||
|
|
||||||
TEST(srslte_vec_acc_cc,
|
TEST(srslte_vec_acc_cc,
|
||||||
MALLOC(cf_t, x);
|
MALLOC(cf_t, x);
|
||||||
cf_t z;
|
cf_t z;
|
||||||
|
@ -868,6 +891,9 @@ int main(int argc, char **argv) {
|
||||||
passed[func_count][size_count] = test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size);
|
passed[func_count][size_count] = test_srslte_vec_prod_sss(func_names[func_count], &timmings[func_count][size_count], block_size);
|
||||||
func_count++;
|
func_count++;
|
||||||
|
|
||||||
|
passed[func_count][size_count] = test_srslte_vec_neg_sss(func_names[func_count], &timmings[func_count][size_count], block_size);
|
||||||
|
func_count++;
|
||||||
|
|
||||||
passed[func_count][size_count] = test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size);
|
passed[func_count][size_count] = test_srslte_vec_acc_cc(func_names[func_count], &timmings[func_count][size_count], block_size);
|
||||||
func_count++;
|
func_count++;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue