mirror of https://github.com/PentHertz/srsLTE.git
Solve Issue 1: Invalid read in LDPC AVX2 Encoder/Decoder
This commit is contained in:
parent
d014a89cfb
commit
c9cdb0d53c
|
@ -72,16 +72,19 @@ static const int8_t infinity7 = (1U << 6U) - 1;
|
|||
struct ldpc_regs_c_avx2long {
|
||||
__m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
|
||||
|
||||
bg_node_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
|
||||
__m256i* check_to_var; /*!< \brief Check-to-variable messages. */
|
||||
__m256i* var_to_check; /*!< \brief Variable-to-check messages. */
|
||||
bg_node_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
|
||||
__m256i* check_to_var; /*!< \brief Check-to-variable messages. */
|
||||
__m256i* var_to_check; /*!< \brief Variable-to-check messages. */
|
||||
__m256i* var_to_check_to_free; /*!< \brief the Variable-to-check messages with one extra _mm256 allocated space. */
|
||||
|
||||
__m256i* rotated_v2c; /*!< \brief To store a rotated version of the variable-to-check messages. */
|
||||
__m256i* this_c2v_epi8; /*!< \brief Helper register for the current c2v node. */
|
||||
__m256i* minp_v2c_epi8; /*!< \brief Helper register for the minimum v2c message. */
|
||||
__m256i* mins_v2c_epi8; /*!< \brief Helper register for the second minimum v2c message. */
|
||||
__m256i* prod_v2c_epi8; /*!< \brief Helper register for the sign of the product of all v2c messages. */
|
||||
__m256i* min_ix_epi8; /*!< \brief Helper register for the index of the minimum v2c message. */
|
||||
__m256i* rotated_v2c; /*!< \brief To store a rotated version of the variable-to-check messages. */
|
||||
__m256i* this_c2v_epi8; /*!< \brief Helper register for the current c2v node. */
|
||||
__m256i* this_c2v_epi8_to_free; /*!< \brief Helper register for the current c2v node with one extra __m256 allocated
|
||||
space. */
|
||||
__m256i* minp_v2c_epi8; /*!< \brief Helper register for the minimum v2c message. */
|
||||
__m256i* mins_v2c_epi8; /*!< \brief Helper register for the second minimum v2c message. */
|
||||
__m256i* prod_v2c_epi8; /*!< \brief Helper register for the sign of the product of all v2c messages. */
|
||||
__m256i* min_ix_epi8; /*!< \brief Helper register for the index of the minimum v2c message. */
|
||||
|
||||
uint16_t ls; /*!< \brief Lifting size. */
|
||||
uint8_t hrr; /*!< \brief Number of variable nodes in the high-rate region (before lifting). */
|
||||
|
@ -153,15 +156,16 @@ void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float sc
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
if ((vp->var_to_check_to_free = srslte_vec_malloc(((hrr + 1) * n_subnodes + 2) * sizeof(__m256i))) == NULL) {
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp);
|
||||
return NULL;
|
||||
}
|
||||
vp->var_to_check = &vp->var_to_check_to_free[1];
|
||||
|
||||
if ((vp->minp_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp);
|
||||
|
@ -170,7 +174,7 @@ void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float sc
|
|||
|
||||
if ((vp->mins_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp);
|
||||
|
@ -180,7 +184,7 @@ void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float sc
|
|||
if ((vp->prod_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp);
|
||||
|
@ -191,7 +195,7 @@ void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float sc
|
|||
free(vp->prod_v2c_epi8);
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp);
|
||||
|
@ -203,25 +207,27 @@ void* create_ldpc_dec_c_avx2long(uint8_t bgN, uint8_t bgM, uint16_t ls, float sc
|
|||
free(vp->prod_v2c_epi8);
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ((vp->this_c2v_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
if ((vp->this_c2v_epi8_to_free = srslte_vec_malloc((n_subnodes + 2) * sizeof(__m256i))) == NULL) {
|
||||
free(vp->rotated_v2c);
|
||||
free(vp->min_ix_epi8);
|
||||
free(vp->prod_v2c_epi8);
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp);
|
||||
return NULL;
|
||||
}
|
||||
vp->this_c2v_epi8 =
|
||||
&vp->this_c2v_epi8_to_free[1]; //+1 to support reading negative position in this_c2v_epi8 at rotate_node_rigth
|
||||
|
||||
vp->bgM = bgM;
|
||||
vp->bgN = bgN;
|
||||
|
@ -240,13 +246,13 @@ void delete_ldpc_dec_c_avx2long(void* p)
|
|||
struct ldpc_regs_c_avx2long* vp = p;
|
||||
|
||||
if (vp != NULL) {
|
||||
free(vp->this_c2v_epi8);
|
||||
free(vp->this_c2v_epi8_to_free);
|
||||
free(vp->rotated_v2c);
|
||||
free(vp->min_ix_epi8);
|
||||
free(vp->prod_v2c_epi8);
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp);
|
||||
|
|
|
@ -72,17 +72,20 @@ static const int8_t infinity7 = (1U << 6U) - 1;
|
|||
struct ldpc_regs_c_avx2long_flood {
|
||||
__m256i scaling_fctr; /*!< \brief Scaling factor for the normalized min-sum decoding algorithm. */
|
||||
|
||||
bg_node_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
|
||||
__m256i* llrs; /*!< \brief A-priori log-likelihood ratios. */
|
||||
__m256i* check_to_var; /*!< \brief Check-to-variable messages. */
|
||||
__m256i* var_to_check; /*!< \brief Variable-to-check messages. */
|
||||
bg_node_t* soft_bits; /*!< \brief A-posteriori log-likelihood ratios. */
|
||||
__m256i* llrs; /*!< \brief A-priori log-likelihood ratios. */
|
||||
__m256i* check_to_var; /*!< \brief Check-to-variable messages. */
|
||||
__m256i* var_to_check; /*!< \brief Variable-to-check messages. */
|
||||
__m256i* var_to_check_to_free; /*!< \brief Auxiliar variable-to-check messages, with 2 extra __m256 space. */
|
||||
|
||||
__m256i* rotated_v2c; /*!< \brief To store a rotated version of the variable-to-check messages. */
|
||||
__m256i* this_c2v_epi8; /*!< \brief Helper register for the current c2v node. */
|
||||
__m256i* minp_v2c_epi8; /*!< \brief Helper register for the minimum v2c message. */
|
||||
__m256i* mins_v2c_epi8; /*!< \brief Helper register for the second minimum v2c message. */
|
||||
__m256i* prod_v2c_epi8; /*!< \brief Helper register for the sign of the product of all v2c messages. */
|
||||
__m256i* min_ix_epi8; /*!< \brief Helper register for the index of the minimum v2c message. */
|
||||
__m256i*
|
||||
this_c2v_epi8_to_free; /*!< \brief Auxiliar helper register for the current c2v node, with 2 extra _mm256 space */
|
||||
__m256i* minp_v2c_epi8; /*!< \brief Helper register for the minimum v2c message. */
|
||||
__m256i* mins_v2c_epi8; /*!< \brief Helper register for the second minimum v2c message. */
|
||||
__m256i* prod_v2c_epi8; /*!< \brief Helper register for the sign of the product of all v2c messages. */
|
||||
__m256i* min_ix_epi8; /*!< \brief Helper register for the index of the minimum v2c message. */
|
||||
|
||||
uint16_t ls; /*!< \brief Lifting size. */
|
||||
uint8_t n_subnodes; /*!< \brief Number of subnodes. */
|
||||
|
@ -160,16 +163,17 @@ void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, fl
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if ((vp->var_to_check = srslte_vec_malloc((hrr + 1) * bgM * n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
if ((vp->var_to_check_to_free = srslte_vec_malloc(((hrr + 1) * bgM * n_subnodes + 2) * sizeof(__m256i))) == NULL) {
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp->llrs);
|
||||
free(vp);
|
||||
return NULL;
|
||||
}
|
||||
vp->var_to_check = &vp->var_to_check_to_free[1];
|
||||
|
||||
if ((vp->minp_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp->llrs);
|
||||
|
@ -179,7 +183,7 @@ void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, fl
|
|||
|
||||
if ((vp->mins_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp->llrs);
|
||||
|
@ -190,7 +194,7 @@ void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, fl
|
|||
if ((vp->prod_v2c_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp->llrs);
|
||||
|
@ -202,7 +206,7 @@ void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, fl
|
|||
free(vp->prod_v2c_epi8);
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp->llrs);
|
||||
|
@ -215,7 +219,7 @@ void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, fl
|
|||
free(vp->prod_v2c_epi8);
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp->llrs);
|
||||
|
@ -223,19 +227,20 @@ void* create_ldpc_dec_c_avx2long_flood(uint8_t bgN, uint8_t bgM, uint16_t ls, fl
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if ((vp->this_c2v_epi8 = srslte_vec_malloc(n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
if ((vp->this_c2v_epi8_to_free = srslte_vec_malloc((n_subnodes + 2) * sizeof(__m256i))) == NULL) {
|
||||
free(vp->rotated_v2c);
|
||||
free(vp->min_ix_epi8);
|
||||
free(vp->prod_v2c_epi8);
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp->llrs);
|
||||
free(vp);
|
||||
return NULL;
|
||||
}
|
||||
vp->this_c2v_epi8 = &vp->this_c2v_epi8_to_free[1];
|
||||
|
||||
vp->bgM = bgM;
|
||||
vp->bgN = bgN;
|
||||
|
@ -254,13 +259,13 @@ void delete_ldpc_dec_c_avx2long_flood(void* p)
|
|||
struct ldpc_regs_c_avx2long_flood* vp = p;
|
||||
|
||||
if (vp != NULL) {
|
||||
free(vp->this_c2v_epi8);
|
||||
free(vp->this_c2v_epi8_to_free);
|
||||
free(vp->rotated_v2c);
|
||||
free(vp->min_ix_epi8);
|
||||
free(vp->prod_v2c_epi8);
|
||||
free(vp->mins_v2c_epi8);
|
||||
free(vp->minp_v2c_epi8);
|
||||
free(vp->var_to_check);
|
||||
free(vp->var_to_check_to_free);
|
||||
free(vp->check_to_var);
|
||||
free(vp->soft_bits);
|
||||
free(vp->llrs);
|
||||
|
|
|
@ -710,6 +710,11 @@ int srslte_ldpc_decoder_init(srslte_ldpc_decoder_t* q,
|
|||
}
|
||||
|
||||
q->var_indices = srslte_vec_malloc(q->bgM * sizeof(int8_t[MAX_CNCT]));
|
||||
if (!q->var_indices) {
|
||||
free(q->pcm);
|
||||
perror("malloc");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (create_compact_pcm(q->pcm, q->var_indices, q->bg, q->ls) != 0) {
|
||||
perror("Create PCM");
|
||||
|
|
|
@ -56,11 +56,14 @@ typedef union bg_node_t {
|
|||
* \brief Inner registers for the optimized LDPC encoder.
|
||||
*/
|
||||
struct ldpc_enc_avx2long {
|
||||
bg_node_t* codeword; /*!< \brief Contains the entire codeword, before puncturing. */
|
||||
__m256i* aux; /*!< \brief Auxiliary register. */
|
||||
__m256i* rotated_node; /*!< \brief To store rotated versions of the nodes. */
|
||||
|
||||
uint8_t n_subnodes; /*!< \brief Number of subnodes. */
|
||||
bg_node_t* codeword; /*!< \brief Contains the entire codeword, before puncturing. */
|
||||
bg_node_t* codeword_to_free; /*!< \brief Auxiliary pointer with a free memory of size CTTC_AVX2_B_SIZE previous to
|
||||
codeword */
|
||||
__m256i* aux; /*!< \brief Auxiliary register. */
|
||||
__m256i* rotated_node; /*!< \brief To store rotated versions of the nodes. */
|
||||
__m256i* rotated_node_to_free; /*!< \brief Auxiliary pointer to store rotated versions of the nodes with extra free
|
||||
memory of size CTTC_AVX2_B_SIZE previous to rotated_node */
|
||||
uint8_t n_subnodes; /*!< \brief Number of subnodes. */
|
||||
};
|
||||
|
||||
/*!
|
||||
|
@ -86,25 +89,25 @@ void* create_ldpc_enc_avx2long(srslte_ldpc_encoder_t* q)
|
|||
int left_out = q->ls % SRSLTE_AVX2_B_SIZE;
|
||||
vp->n_subnodes = q->ls / SRSLTE_AVX2_B_SIZE + (left_out > 0);
|
||||
|
||||
if ((vp->codeword = srslte_vec_malloc(q->bgN * vp->n_subnodes * sizeof(bg_node_t))) == NULL) {
|
||||
if ((vp->codeword_to_free = srslte_vec_malloc((q->bgN * vp->n_subnodes + 1) * sizeof(bg_node_t))) == NULL) {
|
||||
free(vp);
|
||||
return NULL;
|
||||
}
|
||||
vp->codeword = &vp->codeword_to_free[1];
|
||||
|
||||
if ((vp->aux = srslte_vec_malloc(q->bgM * vp->n_subnodes * sizeof(__m256i))) == NULL) {
|
||||
free(vp->codeword);
|
||||
free(vp->codeword_to_free);
|
||||
free(vp);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// for some reason, the software stops with a segmentation fault when ls is a multiple of 32
|
||||
// if we don't add the extra block.
|
||||
if ((vp->rotated_node = srslte_vec_malloc((vp->n_subnodes + 1) * sizeof(__m256i))) == NULL) {
|
||||
if ((vp->rotated_node_to_free = srslte_vec_malloc((vp->n_subnodes + 2) * sizeof(__m256i))) == NULL) {
|
||||
free(vp->aux);
|
||||
free(vp->codeword);
|
||||
free(vp->codeword_to_free);
|
||||
free(vp);
|
||||
return NULL;
|
||||
}
|
||||
vp->rotated_node = &vp->rotated_node_to_free[1];
|
||||
|
||||
return vp;
|
||||
}
|
||||
|
@ -114,9 +117,9 @@ void delete_ldpc_enc_avx2long(void* p)
|
|||
struct ldpc_enc_avx2long* vp = p;
|
||||
|
||||
if (vp != NULL) {
|
||||
free(vp->rotated_node);
|
||||
free(vp->rotated_node_to_free);
|
||||
free(vp->aux);
|
||||
free(vp->codeword);
|
||||
free(vp->codeword_to_free);
|
||||
free(vp);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -157,8 +157,9 @@ function(ldpc_rm_unit_tests)
|
|||
endforeach()
|
||||
endforeach()
|
||||
endfunction()
|
||||
set(test_name LDPC-RM-BG1)
|
||||
|
||||
set(test_name LDPC-RM)
|
||||
set(test_command ldpc_rm_test)
|
||||
ldpc_rm_unit_tests(${lifting_sizes})
|
||||
|
||||
add_test(NAME LDPC-RM-chain COMMAND ldpc_rm_chain_test -E 1 -B 1)
|
||||
add_test(NAME LDPC-RM-chain COMMAND ldpc_rm_chain_test -E 1 -B 1)
|
||||
|
|
|
@ -214,6 +214,9 @@ int main(int argc, char** argv)
|
|||
|
||||
uint32_t F = encoder.bgK - 5; // This value is arbitrary
|
||||
|
||||
finalK = encoder.liftK;
|
||||
finalN = encoder.liftN - 2 * lift_size;
|
||||
|
||||
if (rm_length == 0) {
|
||||
rm_length = finalN - F;
|
||||
}
|
||||
|
@ -235,9 +238,6 @@ int main(int argc, char** argv)
|
|||
1.0 * (encoder.liftK - F) / rm_length);
|
||||
printf("\n Signal-to-Noise Ratio -> %.2f dB\n", snr);
|
||||
|
||||
finalK = encoder.liftK;
|
||||
finalN = encoder.liftN - 2 * lift_size;
|
||||
|
||||
messages_true = malloc(finalK * batch_size * sizeof(uint8_t));
|
||||
messages_sim_f = malloc(finalK * batch_size * sizeof(uint8_t));
|
||||
messages_sim_s = malloc(finalK * batch_size * sizeof(uint8_t));
|
||||
|
@ -315,8 +315,6 @@ int main(int argc, char** argv)
|
|||
int n_useful_symbols =
|
||||
(rm_length + F) % lift_size ? ((rm_length + F) / lift_size + 1) * lift_size : (rm_length + F);
|
||||
|
||||
printf("n_useful_symbols = %d\n", n_useful_symbols);
|
||||
|
||||
// Encode messages
|
||||
gettimeofday(&t[1], NULL);
|
||||
for (j = 0; j < batch_size; j++) {
|
||||
|
|
Loading…
Reference in New Issue