mirror of https://github.com/PentHertz/srsLTE.git
Added srs_lte_cpy for aligned copy which improves a bit performance for aligned data
This commit is contained in:
parent
a9d9c92205
commit
48d508aeba
|
@ -172,6 +172,9 @@ SRSLTE_API void srslte_vec_abs_square_cf(cf_t *x, float *abs_square, uint32_t le
|
|||
/* argument of each vector element */
|
||||
SRSLTE_API void srslte_vec_arg_cf(cf_t *x, float *arg, uint32_t len);
|
||||
|
||||
/* Copy 256 bit aligned vector */
|
||||
SRSLTE_API void srs_vec_cf_cpy(cf_t *src, cf_t *dst, int len);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <srslte/phy/utils/vector.h>
|
||||
|
||||
#include "srslte/phy/common/phy_common.h"
|
||||
#include "srslte/phy/mimo/layermap.h"
|
||||
|
@ -51,7 +52,12 @@ int srslte_layermap_diversity(cf_t *d, cf_t *x[SRSLTE_MAX_LAYERS], int nof_layer
|
|||
|
||||
int srslte_layermap_multiplex(cf_t *d[SRSLTE_MAX_CODEWORDS], cf_t *x[SRSLTE_MAX_LAYERS], int nof_cw, int nof_layers,
|
||||
int nof_symbols[SRSLTE_MAX_CODEWORDS]) {
|
||||
if (nof_cw == 1) {
|
||||
if (nof_cw == nof_layers) {
|
||||
for (int i = 0; i < nof_cw; i++) {
|
||||
srs_vec_cf_cpy(x[i], d[i], (uint32_t) nof_symbols[0]);
|
||||
}
|
||||
return nof_symbols[0];
|
||||
} else if (nof_cw == 1) {
|
||||
return srslte_layermap_diversity(d[0], x, nof_layers, nof_symbols[0]);
|
||||
} else {
|
||||
int n[2];
|
||||
|
|
|
@ -96,19 +96,19 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
|
||||
for (i=0;i<nof_cw;i++) {
|
||||
d[i] = malloc(sizeof(cf_t) * nof_symb_cw[i]);
|
||||
d[i] = srslte_vec_malloc(sizeof(cf_t) * nof_symb_cw[i]);
|
||||
if (!d[i]) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
dp[i] = malloc(sizeof(cf_t) * nof_symb_cw[i]);
|
||||
dp[i] = srslte_vec_malloc(sizeof(cf_t) * nof_symb_cw[i]);
|
||||
if (!dp[i]) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
for (i=0;i<nof_layers;i++) {
|
||||
x[i] = malloc(sizeof(cf_t) * nof_symbols);
|
||||
x[i] = srslte_vec_malloc(sizeof(cf_t) * nof_symbols);
|
||||
if (!x[i]) {
|
||||
perror("malloc");
|
||||
exit(-1);
|
||||
|
|
|
@ -843,3 +843,24 @@ void srslte_vec_quant_suc(int16_t *in, uint8_t *out, float gain, int16_t offset,
|
|||
}
|
||||
}
|
||||
|
||||
void srs_vec_cf_cpy(cf_t *dst, cf_t *src, int len) {
|
||||
int i = 0;
|
||||
|
||||
#ifdef LV_HAVE_AVX
|
||||
for (; i < len - 3; i += 4) {
|
||||
_mm256_store_ps((float *) &dst[i], _mm256_load_ps((float *) &src[i]));
|
||||
}
|
||||
#endif /* LV_HAVE_AVX */
|
||||
#ifdef LV_HAVE_SSE
|
||||
for (; i < len - 1; i += 2) {
|
||||
_mm_store_ps((float *) &dst[i], _mm_load_ps((float *) &src[i]));
|
||||
}
|
||||
for (; i < len; i++) {
|
||||
((__m64*) dst)[i] = ((__m64*) src)[i];
|
||||
}
|
||||
#else
|
||||
for (; i < len; i++) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
#endif /* LV_HAVE_SSE */
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue