updating avx vectors and viterbi

This commit is contained in:
yagoda 2017-05-31 22:19:26 +01:00
parent e75a9865de
commit d48dcc25b4
5 changed files with 81 additions and 59 deletions

View File

@ -35,46 +35,46 @@ extern "C" {
#include <stdint.h>
#include "srslte/config.h"
SRSLTE_API int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len);
SRSLTE_API int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len);
SRSLTE_API int srslte_vec_dot_prod_sss_simd_avx(short *x, short *y, uint32_t len);
SRSLTE_API int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_simd_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_simd_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_simd_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_simd(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_sse(short *x, int n_rightshift, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_simd_avx(short *x, int k, short *z, uint32_t len);
SRSLTE_API void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len);
SRSLTE_API void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len);
SRSLTE_API void srslte_32fc_s32f_multiply_32fc_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len);
SRSLTE_API void srslte_vec_mult_scalar_cf_f_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len);
#ifdef __cplusplus
}
#endif

View File

@ -213,7 +213,7 @@ int main(int argc, char **argv) {
gettimeofday(&t[1], NULL);
int M = 1;
srslte_vec_fprint_b(stdout, data_tx, frame_length);
//srslte_vec_fprint_b(stdout, data_tx, frame_length);
for (int i=0;i<M;i++) {
srslte_viterbi_decode_uc(&dec, llr_c, data_rx, frame_length);

View File

@ -43,13 +43,6 @@
#define DEFAULT_GAIN 100
#define AVX_ON
#ifdef LV_HAVE_AVX
#ifdef AVX_ON
#define USE_AVX
#endif
#endif
//#undef LV_HAVE_SSE
int decode37(void *o, uint8_t *symbols, uint8_t *data, uint32_t frame_length) {
@ -391,7 +384,7 @@ int srslte_viterbi_init(srslte_viterbi_t *q, srslte_viterbi_type_t type, int pol
switch (type) {
case SRSLTE_VITERBI_37:
#ifdef LV_HAVE_SSE
#ifdef USE_AVX
#ifdef LV_HAVE_AVX
return init37_avx2(q, poly, max_frame_length, tail_bitting);
#else
return init37_sse(q, poly, max_frame_length, tail_bitting);

View File

@ -103,13 +103,17 @@ void srslte_vec_sub_fff(float *x, float *y, float *z, uint32_t len) {
}
void srslte_vec_sub_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE
int i;
#ifdef LV_HAVE_AVX
srslte_vec_sub_sss_avx(x, y, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sub_sss_sse(x, y, z, len);
#else
int i;
for (i=0;i<len;i++) {
z[i] = x[i]-y[i];
}
#else
srslte_vec_sub_sss_simd_avx(x, y, z, len);
#endif
#endif
}
@ -129,13 +133,17 @@ void srslte_vec_sum_fff(float *x, float *y, float *z, uint32_t len) {
}
void srslte_vec_sum_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE
#ifdef LV_HAVE_AVX
srslte_vec_sum_sss_avx(x, y, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sum_sss_sse(x, y, z, len);
#else
int i;
for (i=0;i<len;i++) {
z[i] = x[i]+y[i];
}
#else
srslte_vec_sum_sss_simd_avx(x, y, z, len);
#endif
#endif
}
@ -197,14 +205,18 @@ void srslte_vec_sc_prod_sfs(short *x, float h, short *z, uint32_t len) {
}
void srslte_vec_sc_div2_sss(short *x, int n_rightshift, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE
#ifdef LV_HAVE_AVX
srslte_vec_sc_div2_sss_avx(x, n_rightshift, z, len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_sc_div2_sss_sse(x, n_rightshift, z, len);
#else
int i;
int pow2_div = 1<<n_rightshift;
for (i=0;i<len;i++) {
z[i] = x[i]/pow2_div;
}
#else
srslte_vec_sc_div2_sss_simd_avx(x, n_rightshift, z, len);
#endif
#endif
}
@ -220,13 +232,14 @@ void srslte_vec_norm_cfc(cf_t *x, float amplitude, cf_t *y, uint32_t len) {
}
void srslte_vec_sc_prod_cfc(cf_t *x, float h, cf_t *z, uint32_t len) {
#ifndef HAVE_VOLK_MULT_FUNCTION
#ifdef LV_HAVE_AVX
srslte_vec_mult_scalar_cf_f_avx(z,x, h, len);
#else
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*h;
}
#else
srslte_32fc_s32f_multiply_32fc_avx(z,x, h, len);
#endif
}
@ -271,7 +284,7 @@ void srslte_vec_convert_fi(float *x, int16_t *z, float scale, uint32_t len) {
z[i] = (int16_t) (x[i]*scale);
}
#else
srslte_vec_convert_fi_simd(x, z, scale, len);
srslte_vec_convert_fi_sse(x, z, scale, len);
#endif
}
@ -284,14 +297,13 @@ void srslte_vec_lut_fuf(float *x, uint32_t *lut, float *y, uint32_t len) {
void srslte_vec_lut_sss(short *x, unsigned short *lut, short *y, uint32_t len) {
#ifdef DEBUG_MODE
#warning FIXME: Disabling SSE/AVX in srslte_vec_lut_sss
srslte_vec_lut_sss_simd(x, lut, y, len);
#else
#ifndef LV_HAVE_SSE
#ifdef LV_HAVE_SSE
for (int i=0;i<len;i++) {
y[lut[i]] = x[i];
}
#else
srslte_vec_lut_sss_simd(x, lut, y, len);
srslte_vec_lut_sss_sse(x, lut, y, len);
#endif
#endif
}
@ -505,13 +517,19 @@ void srslte_vec_prod_fff(float *x, float *y, float *z, uint32_t len) {
}
void srslte_vec_prod_sss(short *x, short *y, short *z, uint32_t len) {
#ifndef LV_HAVE_SSE
#ifdef LV_HAVE_AVX
srslte_vec_prod_sss_avx(x,y,z,len);
#else
#ifdef LV_HAVE_SSE
srslte_vec_prod_sss_sse(x,y,z,len);
#else
int i;
for (i=0;i<len;i++) {
z[i] = x[i]*y[i];
}
#else
srslte_vec_prod_sss_simd_avx(x,y,z,len);
#endif
#endif
}
@ -642,15 +660,19 @@ float srslte_vec_dot_prod_fff(float *x, float *y, uint32_t len) {
}
int32_t srslte_vec_dot_prod_sss(int16_t *x, int16_t *y, uint32_t len) {
#ifndef LV_HAVE_SSE
#ifdef LV_HAVE_AVX
return srslte_vec_dot_prod_sss_avx(x, y, len);
#else
#ifdef LV_HAVE_SSE
return srslte_vec_dot_prod_sss_sse(x, y, len);
#else
uint32_t i;
int32_t res = 0;
for (i=0;i<len;i++) {
res += x[i]*y[i];
}
return res;
#else
return srslte_vec_dot_prod_sss_simd_avx(x, y, len);
#endif
#endif
}

View File

@ -45,7 +45,7 @@
#endif
int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len)
int srslte_vec_dot_prod_sss_sse(short *x, short *y, uint32_t len)
{
int result = 0;
#ifdef LV_HAVE_SSE
@ -87,7 +87,7 @@ int srslte_vec_dot_prod_sss_simd(short *x, short *y, uint32_t len)
}
int srslte_vec_dot_prod_sss_simd_avx(short *x, short *y, uint32_t len)
int srslte_vec_dot_prod_sss_avx(short *x, short *y, uint32_t len)
{
int result = 0;
#ifdef LV_HAVE_AVX
@ -127,7 +127,7 @@ int srslte_vec_dot_prod_sss_simd_avx(short *x, short *y, uint32_t len)
void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len)
void srslte_vec_sum_sss_sse(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -160,7 +160,7 @@ void srslte_vec_sum_sss_simd(short *x, short *y, short *z, uint32_t len)
}
void srslte_vec_sum_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_sum_sss_avx(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -193,7 +193,7 @@ void srslte_vec_sum_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
}
void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
void srslte_vec_sub_sss_sse(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -225,7 +225,7 @@ void srslte_vec_sub_sss_simd(short *x, short *y, short *z, uint32_t len)
#endif
}
void srslte_vec_sub_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_sub_sss_avx(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_AVX
unsigned int number = 0;
@ -260,7 +260,7 @@ void srslte_vec_sub_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len)
void srslte_vec_prod_sss_sse(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -292,7 +292,7 @@ void srslte_vec_prod_sss_simd(short *x, short *y, short *z, uint32_t len)
#endif
}
void srslte_vec_prod_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
void srslte_vec_prod_sss_avx(short *x, short *y, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -324,7 +324,12 @@ void srslte_vec_prod_sss_simd_avx(short *x, short *y, short *z, uint32_t len)
#endif
}
void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
void srslte_vec_sc_div2_sss_sse(short *x, int k, short *z, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -354,7 +359,7 @@ void srslte_vec_sc_div2_sss_simd(short *x, int k, short *z, uint32_t len)
#endif
}
void srslte_vec_sc_div2_sss_simd_avx(short *x, int k, short *z, uint32_t len)
void srslte_vec_sc_div2_sss_avx(short *x, int k, short *z, uint32_t len)
{
#ifdef LV_HAVE_AVX
unsigned int number = 0;
@ -384,8 +389,10 @@ void srslte_vec_sc_div2_sss_simd_avx(short *x, int k, short *z, uint32_t len)
#endif
}
/* No improvement with AVX */
void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t len)
void srslte_vec_lut_sss_sse(short *x, unsigned short *lut, short *y, uint32_t len)
{
#ifndef DEBUG_MODE
#ifdef LV_HAVE_SSE
@ -419,7 +426,7 @@ void srslte_vec_lut_sss_simd(short *x, unsigned short *lut, short *y, uint32_t l
}
/* Modified from volk_32f_s32f_convert_16i_a_simd2. Removed clipping */
void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
void srslte_vec_convert_fi_sse(float *x, int16_t *z, float scale, uint32_t len)
{
#ifdef LV_HAVE_SSE
unsigned int number = 0;
@ -457,8 +464,8 @@ void srslte_vec_convert_fi_simd(float *x, int16_t *z, float scale, uint32_t len)
#endif
}
void srslte_32fc_s32f_multiply_32fc_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len)
//srslte_32fc_s32f_multiply_32fc_avx
void srslte_vec_mult_scalar_cf_f_avx( cf_t *z,const cf_t *x,const float h,const uint32_t len)
{
#ifdef LV_HAVE_AVX