From 4c9fc76048d37ee7b621008c90f4c9c85366054c Mon Sep 17 00:00:00 2001 From: bsdevlin Date: Thu, 8 Aug 2019 12:40:05 +0800 Subject: [PATCH] Updates to resource multipliers and new stream version for fe6 multiplication --- ip_cores/ec/src/rtl/ec_fe12_mul_s.sv | 8 +- ip_cores/ec/src/rtl/ec_fe6_mul_s.sv | 382 ++++++++++++++++++ .../ec/src/rtl/fe6_mul_by_nonresidue_s.sv | 2 +- ip_cores/ec/src/tb/ec_fe12_mul_s_tb.sv | 12 +- ip_cores/util/src/rtl/adder_pipe.sv | 7 + ip_cores/util/src/rtl/resource_share.sv | 62 +-- ip_cores/util/src/rtl/subtracter_pipe.sv | 8 + 7 files changed, 446 insertions(+), 35 deletions(-) create mode 100644 ip_cores/ec/src/rtl/ec_fe6_mul_s.sv diff --git a/ip_cores/ec/src/rtl/ec_fe12_mul_s.sv b/ip_cores/ec/src/rtl/ec_fe12_mul_s.sv index 93e89da..78b1a88 100644 --- a/ip_cores/ec/src/rtl/ec_fe12_mul_s.sv +++ b/ip_cores/ec/src/rtl/ec_fe12_mul_s.sv @@ -69,7 +69,7 @@ always_comb begin endcase case (i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside - 0: i_sub_fe_if.rdy = (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)); + 0: i_sub_fe_if.rdy = (sub_cnt >= 6) && (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)); 1: i_sub_fe_if.rdy = (out_cnt >= 6) && (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)); default: i_sub_fe_if.rdy = 0; endcase @@ -133,7 +133,7 @@ always_ff @ (posedge i_clk) begin if (i_add_fe_if.val && i_add_fe_if.rdy && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin b0 <= {i_add_fe_if.dat, b0[5:1]}; - b0_val <= 1; + b0_val <= i_add_fe_if.eop; end if (i_mul_fe12_if.rdy && i_mul_fe12_if.val) begin @@ -198,9 +198,7 @@ always_ff @ (posedge i_clk) begin start <= 0; end endcase - end - - + end end end diff --git a/ip_cores/ec/src/rtl/ec_fe6_mul_s.sv b/ip_cores/ec/src/rtl/ec_fe6_mul_s.sv new file mode 100644 index 0000000..0bdeebd --- /dev/null +++ b/ip_cores/ec/src/rtl/ec_fe6_mul_s.sv @@ -0,0 +1,382 @@ +/* + This provides the interface to perform + Fp^6 multiplication, over a Fp2 tower. + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module ec_fe6_mul_s +#( + parameter type FE_TYPE, + parameter type FE2_TYPE, + parameter type FE6_TYPE, + parameter CTL_BITS = 14, + parameter OVR_WRT_BIT = 8 // From this bit 4 bits are used for internal control, 2 bits for resource sharing - 6 total +)( + input i_clk, i_rst, + // Interface to FE2_TYPE multiplier (mod P) + if_axi_stream.source o_mul_fe2_if, + if_axi_stream.sink i_mul_fe2_if, + // Interface to FE2_TYPE adder (mod P) + if_axi_stream.source o_add_fe_if, + if_axi_stream.sink i_add_fe_if, + // Interface to FE2_TYPE subtractor (mod P) + if_axi_stream.source o_sub_fe_if, + if_axi_stream.sink i_sub_fe_if, + // Interface to FE2_TYPE multiply by non-residue + if_axi_stream.source o_mnr_fe2_if, + if_axi_stream.sink i_mnr_fe2_if, + // Interface to FE6_TYPE multiplier (mod P) + if_axi_stream.source o_mul_fe6_if, + if_axi_stream.sink i_mul_fe6_if +); + +localparam NUM_OVR_WRT_BIT = 5; + +// Multiplications are calculated using the formula in bls12_381.pkg::fe6_mul() +FE2_TYPE a_a, b_b, c_c, t; +FE6_TYPE out, in_a, in_b; + +logic [22:0] eq_val, eq_wait; +logic mul_cnt, add_cnt, sub_cnt, mnr_cnt; +logic mul_en, add_en, sub_en, mnr_en; +logic [4:0] nxt_fe2_mul, nxt_fe2_mnr, nxt_fe_add, nxt_fe_sub; +logic [2:0] out_cnt; + +logic rdy_l; + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + o_mul_fe6_if.reset_source(); + o_mnr_fe2_if.reset_source(); + o_mul_fe2_if.reset_source(); + o_sub_fe_if.reset_source(); + o_add_fe_if.reset_source(); + i_mul_fe6_if.rdy <= 0; + i_mul_fe2_if.rdy <= 0; + i_sub_fe_if.rdy <= 0; + i_add_fe_if.rdy <= 0; + i_mnr_fe2_if.rdy <= 0; + eq_val <= 0; + eq_wait <= 0; + rdy_l <= 0; + a_a <= 0; + b_b <= 0; + c_c <= 0; + t <= 0; + out <= 0; + {out_cnt, mul_cnt, add_cnt, sub_cnt, mnr_cnt} <= 0; + {nxt_fe2_mul, nxt_fe2_mnr, nxt_fe_add, nxt_fe_sub} <= 0; + {mul_en, add_en, sub_en, mnr_en} <= 0; + {in_a, in_b} <= 0; + + end else begin + + i_mul_fe2_if.rdy <= 1; + i_sub_fe_if.rdy <= 1; + i_add_fe_if.rdy <= 1; + i_mnr_fe2_if.rdy <= 1; + + if (o_mul_fe6_if.rdy) o_mul_fe6_if.val <= 0; + if (o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0; + if (o_sub_fe_if.rdy) o_sub_fe_if.val <= 0; + if (o_add_fe_if.rdy) o_add_fe_if.val <= 0; + if (o_mnr_fe2_if.rdy) o_mnr_fe2_if.val <= 0; + + if (~sub_en) get_next_sub(); + if (~add_en) get_next_add(); + if (~mul_en) get_next_fe2_mul(); + if (~mnr_en) get_next_fe2_mnr(); + + if (rdy_l == 0) i_mul_fe6_if.rdy <= 1; + + if (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)) begin + + if (eq_val[22] && eq_val[20] && eq_val[19]) begin + o_mul_fe6_if.val <= 1; + out_cnt <= out_cnt + 1; + end + + o_mul_fe6_if.sop <= out_cnt == 0; + o_mul_fe6_if.eop <= out_cnt == 5; + o_mul_fe6_if.dat <= out[out_cnt/2][out_cnt%2]; + + if(out_cnt == 5) begin + eq_val <= 0; + eq_wait <= 0; + rdy_l <= 0; + a_a <= 0; + b_b <= 0; + c_c <= 0; + t <= 0; + out <= 0; + {out_cnt, mul_cnt, add_cnt, sub_cnt} <= 0; + {nxt_fe2_mul, nxt_fe_add, nxt_fe_sub, nxt_fe2_mnr} <= 0; + {mul_en, add_en, sub_en, mnr_en} <= 0; + {in_a, in_b} <= 0; + end + end + + // Latch input + if (i_mul_fe6_if.rdy && i_mul_fe6_if.val) begin + in_a <= {i_mul_fe6_if.dat[0 +: $bits(FE_TYPE)], in_a[2:1], in_a[0][1]}; + in_b <= {i_mul_fe6_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], in_b[2:1], in_b[0][1]}; + if (i_mul_fe6_if.eop) begin + i_mul_fe6_if.rdy <= 0; + rdy_l <= 1; + o_mul_fe6_if.ctl <= i_mul_fe6_if.ctl; + end + end + + // Check any results from multiplier + if (i_mul_fe2_if.val && i_mul_fe2_if.rdy) begin + if (i_mul_fe2_if.eop) eq_val[i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1; + case(i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside + 0: a_a[i_mul_fe2_if.eop] <= i_mul_fe2_if.dat; + 1: b_b[i_mul_fe2_if.eop] <= i_mul_fe2_if.dat; + 2: c_c[i_mul_fe2_if.eop] <= i_mul_fe2_if.dat; + 5: out[0][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat; + 10: out[2][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat; + 15: out[1][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat; + default: o_mul_fe6_if.err <= 1; + endcase + end + + // Check any results from mnr + if (i_mnr_fe2_if.val && i_mnr_fe2_if.rdy) begin + if(i_mnr_fe2_if.eop) eq_val[i_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1; + case(i_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside + 18: out[0][i_mnr_fe2_if.eop] <= i_mnr_fe2_if.dat; + 21: c_c[i_mnr_fe2_if.eop] <= i_mnr_fe2_if.dat; + default: o_mul_fe6_if.err <= 1; + endcase + end + + // Check any results from sub + if (i_sub_fe_if.val && i_sub_fe_if.rdy) begin + if(i_sub_fe_if.eop) eq_val[i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1; + case(i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside + 6: out[0][i_sub_fe_if.eop] <= i_sub_fe_if.dat; + 7: out[0][i_sub_fe_if.eop] <= i_sub_fe_if.dat; + 11: out[2][i_sub_fe_if.eop] <= i_sub_fe_if.dat; + 16: out[1][i_sub_fe_if.eop] <= i_sub_fe_if.dat; + 17: out[1][i_sub_fe_if.eop] <= i_sub_fe_if.dat; + 20: out[2][i_sub_fe_if.eop] <= i_sub_fe_if.dat; + default: o_mul_fe6_if.err <= 1; + endcase + end + + // Check any results from add + if (i_add_fe_if.val && i_add_fe_if.rdy) begin + if (i_add_fe_if.eop) eq_val[i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1; + case(i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside + 3: out[0][i_add_fe_if.eop] <= i_add_fe_if.dat; + 4: t[i_add_fe_if.eop] <= i_add_fe_if.dat; + 8: out[2][i_add_fe_if.eop] <= i_add_fe_if.dat; + 9: t[i_add_fe_if.eop] <= i_add_fe_if.dat; + 12: out[2][i_add_fe_if.eop] <= i_add_fe_if.dat; + 13: out[1][i_add_fe_if.eop] <= i_add_fe_if.dat; + 14: t[i_add_fe_if.eop] <= i_add_fe_if.dat; + 19: out[0][i_add_fe_if.eop] <= i_add_fe_if.dat; + 22: out[1][i_add_fe_if.eop] <= i_add_fe_if.dat; + default: o_mul_fe6_if.err <= 1; + endcase + end + + // Issue new multiplies + if (mul_en) + case(nxt_fe2_mul) + 0: fe2_multiply(0, in_a[0], in_b[0]); + 1: fe2_multiply(1, in_a[1], in_b[1]); + 2: fe2_multiply(2, in_a[2], in_b[2]); + 5: fe2_multiply(5, out[0], t); + 10: fe2_multiply(10, out[2], t); + 15: fe2_multiply(15, out[1], t); + endcase + + // Issue new adds + if (add_en) + case(nxt_fe_add) + 3: fe2_addition(3, in_a[1], in_a[2]); + 4: fe2_addition(4, in_b[1], in_b[2]); + 8: fe2_addition(8, in_b[0], in_b[2]); + 9: fe2_addition(9, in_a[0], in_a[2]); + 12: fe2_addition(12, out[2], b_b); + 13: fe2_addition(13, in_b[0], in_b[1]); + 14: fe2_addition(14, in_a[0], in_a[1]); + 19: fe2_addition(19, out[0], a_a); + 22: fe2_addition(22, out[1], c_c); + endcase + + // Issue new sub + if (sub_en) + case(nxt_fe_sub) + 6: fe2_subtraction(6, out[0], b_b); + 7: fe2_subtraction(7, out[0], c_c); + 11: fe2_subtraction(11, out[2], a_a); + 16: fe2_subtraction(16, out[1], a_a); + 17: fe2_subtraction(17, out[1], b_b); + 20: fe2_subtraction(20, out[2], c_c); + endcase + + // Issue new mnr + if (mnr_en) + case(nxt_fe2_mnr) + 18: fe2_mnr(18, out[0]); + 21: fe2_mnr(21, c_c); + endcase + + end +end + +// Task for subtractions +task fe2_subtraction(input int unsigned ctl, input FE2_TYPE a, b); + if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin + o_sub_fe_if.val <= 1; + o_sub_fe_if.sop <= sub_cnt == 0; + o_sub_fe_if.eop <= sub_cnt == 1; + o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= a[sub_cnt]; + o_sub_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[sub_cnt]; + o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl; + eq_wait[ctl] <= 1; + if (sub_cnt == 1) begin + get_next_sub(); + end + sub_cnt <= sub_cnt + 1; + end +endtask + +// Task for addition +task fe2_addition(input int unsigned ctl, input FE2_TYPE a, b); + if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin + o_add_fe_if.val <= 1; + o_add_fe_if.sop <= add_cnt == 0; + o_add_fe_if.eop <= add_cnt == 1; + o_add_fe_if.dat[0 +: $bits(FE_TYPE)] <= a[add_cnt]; + o_add_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[add_cnt]; + o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl; + eq_wait[ctl] <= 1; + if (add_cnt == 1) begin + get_next_add(); + end + add_cnt <= add_cnt + 1; + end +endtask + +// Task for using mult +task fe2_multiply(input int unsigned ctl, input FE2_TYPE a, b); + if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin + o_mul_fe2_if.val <= 1; + o_mul_fe2_if.sop <= mul_cnt == 0; + o_mul_fe2_if.eop <= mul_cnt == 1; + o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= a[mul_cnt]; + o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[mul_cnt]; + o_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl; + eq_wait[ctl] <= 1; + if (mul_cnt == 1) begin + get_next_fe2_mul(); + end + mul_cnt <= mul_cnt + 1; + end +endtask + +// Task for using mnr +task fe2_mnr(input int unsigned ctl, input FE2_TYPE a); + if (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy)) begin + o_mnr_fe2_if.val <= 1; + o_mnr_fe2_if.sop <= mnr_cnt == 0; + o_mnr_fe2_if.eop <= mnr_cnt == 1; + o_mnr_fe2_if.dat <= a[mnr_cnt]; + o_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl; + eq_wait[ctl] <= 1; + if (mnr_cnt == 1) begin + get_next_fe2_mnr(); + end + mnr_cnt <= mnr_cnt + 1; + end +endtask + +task get_next_fe2_mul(); + mul_en <= 1; + if(~eq_wait[0] && rdy_l) + nxt_fe2_mul <= 0; + else if(~eq_wait[1] && rdy_l) + nxt_fe2_mul <= 1; + else if(~eq_wait[2] && rdy_l) + nxt_fe2_mul <= 2; + else if(~eq_wait[5] && eq_val[3] && eq_val[4]) + nxt_fe2_mul <= 5; + else if (~eq_wait[10] && eq_val[8] && eq_val[9]) + nxt_fe2_mul <= 10; + else if (~eq_wait[15] && eq_val[13] && eq_val[14]) + nxt_fe2_mul <= 15; + else + mul_en <= 0; +endtask + +task get_next_add(); + add_en <= 1; + if(~eq_wait[3] && rdy_l) + nxt_fe_add <= 3; + else if(~eq_wait[4] && rdy_l) + nxt_fe_add <= 4; + else if(~eq_wait[8] && rdy_l) + nxt_fe_add <= 8; + else if(~eq_wait[9] && eq_wait[5] && rdy_l) + nxt_fe_add <= 9; + else if (~eq_wait[12] && eq_val[11] && eq_val[1]) + nxt_fe_add <= 12; + else if(~eq_wait[13] && rdy_l) + nxt_fe_add <= 13; + else if(~eq_wait[14] && eq_wait[10] && rdy_l) + nxt_fe_add <= 14; + else if(~eq_wait[19] && eq_val[18] && eq_val[0]) + nxt_fe_add <= 19; + else if(~eq_wait[22] && eq_val[17] && eq_val[21]) + nxt_fe_add <= 22; + else + add_en <= 0; +endtask + +task get_next_sub(); + sub_en <= 1; + if(~eq_wait[6] && eq_val[5] && eq_val[1]) + nxt_fe_sub <= 6; + else if(~eq_wait[7] && eq_val[6] && eq_val[2]) + nxt_fe_sub <= 7; + else if (~eq_wait[11] && eq_val[10] && eq_val[0]) + nxt_fe_sub <= 11; + else if (~eq_wait[16] && eq_val[15] && eq_val[0]) + nxt_fe_sub <= 16; + else if (~eq_wait[17] && eq_val[16] && eq_val[1]) + nxt_fe_sub <= 17; + else if (~eq_wait[20] && eq_val[12] && eq_val[2]) + nxt_fe_sub <= 20; + else + sub_en <= 0; +endtask + +task get_next_fe2_mnr(); + mnr_en <= 1; + if(~eq_wait[18] && eq_val[7]) + nxt_fe2_mnr <= 18; + else if(~eq_wait[21] && eq_wait[20]) + nxt_fe2_mnr <= 21; + else + mnr_en <= 0; +endtask + +endmodule \ No newline at end of file diff --git a/ip_cores/ec/src/rtl/fe6_mul_by_nonresidue_s.sv b/ip_cores/ec/src/rtl/fe6_mul_by_nonresidue_s.sv index 3914737..a16cf20 100644 --- a/ip_cores/ec/src/rtl/fe6_mul_by_nonresidue_s.sv +++ b/ip_cores/ec/src/rtl/fe6_mul_by_nonresidue_s.sv @@ -35,7 +35,7 @@ FE_TYPE [3:0] t; always_comb begin case (mnr_cnt) inside 0,1,2,3: i_mnr_fe6_if.rdy = 1; - 4,5: i_mnr_fe6_if.rdy = ~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy); + 4,5: i_mnr_fe6_if.rdy = ~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy); default: i_mnr_fe6_if.rdy = 0; endcase diff --git a/ip_cores/ec/src/tb/ec_fe12_mul_s_tb.sv b/ip_cores/ec/src/tb/ec_fe12_mul_s_tb.sv index 06bff0c..2c73ea8 100644 --- a/ip_cores/ec/src/tb/ec_fe12_mul_s_tb.sv +++ b/ip_cores/ec/src/tb/ec_fe12_mul_s_tb.sv @@ -193,8 +193,8 @@ resource_share # ( .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), .OVR_WRT_BIT ( 8 ), - .PIPELINE_IN ( 3 ), - .PIPELINE_OUT ( 2 ) + .PIPELINE_IN ( 1 ), + .PIPELINE_OUT ( 1 ) ) resource_share_fe_add ( .i_clk ( clk ), @@ -210,8 +210,8 @@ resource_share # ( .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), .OVR_WRT_BIT ( 8 ), - .PIPELINE_IN ( 2 ), - .PIPELINE_OUT ( 4 ) + .PIPELINE_IN ( 1 ), + .PIPELINE_OUT ( 1 ) ) resource_share_fe_sub ( .i_clk ( clk ), @@ -227,8 +227,8 @@ resource_share # ( .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), .OVR_WRT_BIT ( 12 ), - .PIPELINE_IN ( 2 ), - .PIPELINE_OUT ( 2 ) + .PIPELINE_IN ( 1 ), + .PIPELINE_OUT ( 1 ) ) resource_share_fe2_mnr ( .i_clk ( clk ), diff --git a/ip_cores/util/src/rtl/adder_pipe.sv b/ip_cores/util/src/rtl/adder_pipe.sv index 258b6a1..401653e 100644 --- a/ip_cores/util/src/rtl/adder_pipe.sv +++ b/ip_cores/util/src/rtl/adder_pipe.sv @@ -37,6 +37,7 @@ logic [DAT_BITS-1:0] P_; logic [LEVEL:0][DAT_BITS:0] result0, result1; logic [LEVEL:0][DAT_BITS:0] a, b; +logic [LEVEL:0][1:0] sop_eop; logic [LEVEL:0][CTL_BITS-1:0] ctl; // Top ctl bit we use to check if this needs a subtraction in P logic [LEVEL:0] val, rdy; logic [LEVEL:0] carry_neg; @@ -52,11 +53,15 @@ always_comb begin a[0] = i_add.dat[0 +: BITS]; b[0] = 0; b[0] = i_add.dat[BITS +: BITS]; + sop_eop[0][0] = i_add.sop; + sop_eop[0][1] = i_add.eop; result0[0] = 0; result1[0] = 0; rdy[LEVEL] = o_add.rdy; i_add.rdy = rdy[0]; o_add.copy_if_comb(carry_neg[LEVEL] ? result0[LEVEL] : result1[LEVEL], val[LEVEL], 1, 1, 0, 0, ctl[LEVEL]); + o_add.sop = sop_eop[LEVEL][0]; + o_add.eop = sop_eop[LEVEL][1]; end generate @@ -94,12 +99,14 @@ genvar g; b[g+1] <= 0; ctl[g+1] <= 0; carry_neg[g+1] <= 0; + sop_eop[g+1] <= 0; end else begin if (rdy[g]) begin val[g+1] <= val[g]; ctl[g+1] <= ctl[g]; a[g+1] <= a[g]; b[g+1] <= b[g]; + sop_eop[g+1] <= sop_eop[g]; result0[g+1] <= result0[g]; result0[g+1][g*BITS_LEVEL +: BITS_LEVEL + 1] <= add_res0; diff --git a/ip_cores/util/src/rtl/resource_share.sv b/ip_cores/util/src/rtl/resource_share.sv index 09e463c..b53253d 100644 --- a/ip_cores/util/src/rtl/resource_share.sv +++ b/ip_cores/util/src/rtl/resource_share.sv @@ -38,6 +38,8 @@ module resource_share # ( if_axi_stream.source o_axi [NUM_IN-1:0] ); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .DAT_BITS(DAT_BITS), .CTL_BITS(CTL_BITS)) out_int (i_clk); + // Arbitratation to the resource packet_arb # ( .DAT_BITS ( DAT_BITS ), @@ -45,47 +47,61 @@ packet_arb # ( .CTL_BITS ( CTL_BITS ), .NUM_IN ( NUM_IN ), .OVR_WRT_BIT ( OVR_WRT_BIT ), - .PIPELINE ( PIPELINE_IN ), + .PIPELINE ( 0 ), .PRIORITY_IN ( PRIORITY_IN ) ) packet_arb_mult ( - .i_clk ( i_clk ), - .i_rst ( i_rst ), - .i_axi ( i_axi ), - .o_axi ( o_res ) + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .i_axi ( i_axi ), + .o_axi ( out_int ) +); + +pipeline_if #( + .DAT_BITS ( DAT_BITS ), + .DAT_BYTS ( DAT_BYTS ), + .CTL_BITS ( CTL_BITS ), + .NUM_STAGES ( PIPELINE_OUT ) +) +pipeline_if_out ( + .i_rst ( i_rst ), + .i_if ( out_int ), + .o_if ( o_res ) ); // Demuxing -if_axi_stream #(.DAT_BYTS(DAT_BYTS), .DAT_BITS(DAT_BITS), .CTL_BITS(CTL_BITS)) int_axi [NUM_IN-1:0] (i_clk); +if_axi_stream #(.DAT_BYTS(DAT_BYTS), .DAT_BITS(DAT_BITS), .CTL_BITS(CTL_BITS)) in_int (i_clk); genvar gen0; logic [NUM_IN-1:0] rdy; + +pipeline_if #( + .DAT_BITS ( DAT_BITS ), + .DAT_BYTS ( DAT_BYTS ), + .CTL_BITS ( CTL_BITS ), + .NUM_STAGES ( PIPELINE_IN ) +) +pipeline_if_in ( + .i_rst ( i_rst ), + .i_if ( i_res ), + .o_if ( in_int ) +); + generate for (gen0 = 0; gen0 < NUM_IN; gen0++) begin: GEN_DEMUX + always_comb begin - rdy[gen0] = int_axi[gen0].rdy; - int_axi[gen0].copy_if_comb(i_res.dat, i_res.val && i_res.ctl[OVR_WRT_BIT +: $clog2(NUM_IN)] == gen0, - i_res.sop, i_res.eop, i_res.err, i_res.mod, i_res.ctl); - int_axi[gen0].ctl[OVR_WRT_BIT +: $clog2(NUM_IN)] = 0; + rdy[gen0] = o_axi[gen0].rdy; + o_axi[gen0].copy_if_comb(in_int.dat, in_int.val && in_int.ctl[OVR_WRT_BIT +: $clog2(NUM_IN)] == gen0, + in_int.sop, in_int.eop, in_int.err, in_int.mod, in_int.ctl); + o_axi[gen0].ctl[OVR_WRT_BIT +: $clog2(NUM_IN)] = 0; end - pipeline_if #( - .DAT_BITS ( DAT_BITS ), - .DAT_BYTS ( DAT_BYTS ), - .CTL_BITS ( CTL_BITS ), - .NUM_STAGES ( PIPELINE_OUT ) - ) - pipeline_if ( - .i_rst ( i_rst ), - .i_if ( int_axi[gen0] ), - .o_if ( o_axi[gen0] ) - ); - end endgenerate always_comb begin - i_res.rdy = rdy[i_res.ctl[OVR_WRT_BIT +: $clog2(NUM_IN)]]; + in_int.rdy = rdy[in_int.ctl[OVR_WRT_BIT +: $clog2(NUM_IN)]]; end endmodule \ No newline at end of file diff --git a/ip_cores/util/src/rtl/subtracter_pipe.sv b/ip_cores/util/src/rtl/subtracter_pipe.sv index 42116b6..4ff96bb 100644 --- a/ip_cores/util/src/rtl/subtracter_pipe.sv +++ b/ip_cores/util/src/rtl/subtracter_pipe.sv @@ -38,6 +38,7 @@ logic [DAT_BITS-1:0] P_; logic [LEVEL:0][DAT_BITS:0] result0, result1; logic [LEVEL:0][DAT_BITS:0] a, b; logic [LEVEL:0][CTL_BITS-1:0] ctl; // Top ctl bit we use to check if this needs a subtraction in P +logic [LEVEL:0][1:0] sop_eop; logic [LEVEL:0] val, rdy; logic [LEVEL:0] carry_neg0, carry_neg1; @@ -49,6 +50,8 @@ always_comb begin carry_neg1[0] = 0; val[0] = i_sub.val; ctl[0] = i_sub.ctl; + sop_eop[0][0] = i_sub.sop; + sop_eop[0][1] = i_sub.eop; a[0] = 0; a[0] = i_sub.dat[0 +: BITS]; b[0] = 0; @@ -59,6 +62,8 @@ always_comb begin i_sub.rdy = rdy[0]; o_sub.dat = carry_neg1[LEVEL] ? result0[LEVEL] : result1[LEVEL]; o_sub.copy_if_comb(carry_neg1[LEVEL] ? result0[LEVEL] : result1[LEVEL], val[LEVEL], 1, 1, 0, 0, ctl[LEVEL]); + o_sub.sop = sop_eop[LEVEL][0]; + o_sub.eop = sop_eop[LEVEL][1]; end generate @@ -102,12 +107,15 @@ genvar g; a[g+1] <= 0; b[g+1] <= 0; ctl[g+1] <= 0; + sop_eop[g+1] <= 0; carry_neg0[g+1] <= 0; carry_neg1[g+1] <= 0; end else begin if (rdy[g]) begin val[g+1] <= val[g]; ctl[g+1] <= ctl[g]; + sop_eop[g+1] <= sop_eop[g]; + a[g+1] <= a[g]; b[g+1] <= b[g];