diff --git a/ip_cores/util/src/rtl/barret_mod_pipe.sv b/ip_cores/util/src/rtl/barret_mod_pipe.sv index 382ad25..f5ac22f 100644 --- a/ip_cores/util/src/rtl/barret_mod_pipe.sv +++ b/ip_cores/util/src/rtl/barret_mod_pipe.sv @@ -52,8 +52,8 @@ logic [DAT_BITS-1:0] dat; logic val; logic rdy; -if_axi_stream #(.DAT_BYTS(DAT_BITS/8)) fifo_in_if(i_clk); -if_axi_stream #(.DAT_BYTS(DAT_BITS/8)) fifo_out_if(i_clk); +if_axi_stream #(.DAT_BITS(2*K + 2), .CTL_BITS(CTL_BITS)) fifo_in_if(i_clk); +if_axi_stream #(.DAT_BITS(2*K + 2), .CTL_BITS(CTL_BITS)) fifo_out_if(i_clk); logic fifo_out_full; // Stage 1 multiplication @@ -67,6 +67,9 @@ always_ff @ (posedge i_clk) begin o_mult_if_0.reset_source(); fifo_in_if.reset_source(); end else begin + if (fifo_in_if.rdy) fifo_in_if.val <= 0; + if (o_mult_if_0.rdy) o_mult_if_0.val <= 0; + if (o_rdy) begin o_mult_if_0.sop <= 1; o_mult_if_0.eop <= 1; @@ -75,6 +78,7 @@ always_ff @ (posedge i_clk) begin o_mult_if_0.dat[0 +: DAT_BITS] <= i_dat >> (2*K - 2); o_mult_if_0.dat[DAT_BITS +: DAT_BITS] <= U; fifo_in_if.dat <= i_dat % (1 << (2*K + 2)); + fifo_in_if.ctl <= i_ctl; fifo_in_if.val <= i_val; end end @@ -89,6 +93,8 @@ always_ff @ (posedge i_clk) begin if (i_rst) begin o_mult_if_1.reset_source(); end else begin + if (o_mult_if_1.rdy) o_mult_if_1.val <= 0; + if (i_mult_if_0.rdy) begin o_mult_if_1.sop <= 1; o_mult_if_1.eop <= 1; @@ -103,6 +109,8 @@ end // Stage 3 subtraction to final result always_comb begin i_mult_if_1.rdy = (rdy && val) || ~val; +end +always_comb begin fifo_out_if.rdy = i_mult_if_1.val && i_mult_if_1.rdy; end @@ -110,6 +118,8 @@ always_ff @ (posedge i_clk) begin if (i_rst) begin val <= 0; end else begin + if (i_mult_if_1.rdy) val <= 0; + if (i_mult_if_1.rdy) begin val <= i_mult_if_1.val; ctl <= i_mult_if_1.ctl; @@ -128,6 +138,9 @@ always_ff @ (posedge i_clk) begin o_val <= 0; o_err <= 0; end else begin + + if (i_rdy) o_val <= 0; + if (~o_val || (o_val && i_rdy)) begin o_val <= val; o_ctl <= ctl; @@ -142,7 +155,8 @@ end // Fifo to store inputs (as we need to do final subtraction) axi_stream_fifo #( .SIZE ( 32 ), - .DAT_BITS ( DAT_BITS ) + .DAT_BITS ( 2*K + 2 ), + .CTL_BITS ( CTL_BITS ) ) axi_stream_fifo ( .i_clk ( i_clk ),