Updates to resource multipliers and new stream version for fe6

multiplication
This commit is contained in:
bsdevlin 2019-08-08 12:40:05 +08:00
parent 9a6268eb13
commit 4c9fc76048
7 changed files with 446 additions and 35 deletions

View File

@ -69,7 +69,7 @@ always_comb begin
endcase
case (i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0: i_sub_fe_if.rdy = (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy));
0: i_sub_fe_if.rdy = (sub_cnt >= 6) && (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy));
1: i_sub_fe_if.rdy = (out_cnt >= 6) && (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy));
default: i_sub_fe_if.rdy = 0;
endcase
@ -133,7 +133,7 @@ always_ff @ (posedge i_clk) begin
if (i_add_fe_if.val && i_add_fe_if.rdy && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin
b0 <= {i_add_fe_if.dat, b0[5:1]};
b0_val <= 1;
b0_val <= i_add_fe_if.eop;
end
if (i_mul_fe12_if.rdy && i_mul_fe12_if.val) begin
@ -198,9 +198,7 @@ always_ff @ (posedge i_clk) begin
start <= 0;
end
endcase
end
end
end
end

View File

@ -0,0 +1,382 @@
/*
This provides the interface to perform
Fp^6 multiplication, over a Fp2 tower.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fe6_mul_s
#(
parameter type FE_TYPE,
parameter type FE2_TYPE,
parameter type FE6_TYPE,
parameter CTL_BITS = 14,
parameter OVR_WRT_BIT = 8 // From this bit 4 bits are used for internal control, 2 bits for resource sharing - 6 total
)(
input i_clk, i_rst,
// Interface to FE2_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE2_TYPE adder (mod P)
if_axi_stream.source o_add_fe_if,
if_axi_stream.sink i_add_fe_if,
// Interface to FE2_TYPE subtractor (mod P)
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if,
// Interface to FE2_TYPE multiply by non-residue
if_axi_stream.source o_mnr_fe2_if,
if_axi_stream.sink i_mnr_fe2_if,
// Interface to FE6_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe6_if,
if_axi_stream.sink i_mul_fe6_if
);
localparam NUM_OVR_WRT_BIT = 5;
// Multiplications are calculated using the formula in bls12_381.pkg::fe6_mul()
FE2_TYPE a_a, b_b, c_c, t;
FE6_TYPE out, in_a, in_b;
logic [22:0] eq_val, eq_wait;
logic mul_cnt, add_cnt, sub_cnt, mnr_cnt;
logic mul_en, add_en, sub_en, mnr_en;
logic [4:0] nxt_fe2_mul, nxt_fe2_mnr, nxt_fe_add, nxt_fe_sub;
logic [2:0] out_cnt;
logic rdy_l;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mul_fe6_if.reset_source();
o_mnr_fe2_if.reset_source();
o_mul_fe2_if.reset_source();
o_sub_fe_if.reset_source();
o_add_fe_if.reset_source();
i_mul_fe6_if.rdy <= 0;
i_mul_fe2_if.rdy <= 0;
i_sub_fe_if.rdy <= 0;
i_add_fe_if.rdy <= 0;
i_mnr_fe2_if.rdy <= 0;
eq_val <= 0;
eq_wait <= 0;
rdy_l <= 0;
a_a <= 0;
b_b <= 0;
c_c <= 0;
t <= 0;
out <= 0;
{out_cnt, mul_cnt, add_cnt, sub_cnt, mnr_cnt} <= 0;
{nxt_fe2_mul, nxt_fe2_mnr, nxt_fe_add, nxt_fe_sub} <= 0;
{mul_en, add_en, sub_en, mnr_en} <= 0;
{in_a, in_b} <= 0;
end else begin
i_mul_fe2_if.rdy <= 1;
i_sub_fe_if.rdy <= 1;
i_add_fe_if.rdy <= 1;
i_mnr_fe2_if.rdy <= 1;
if (o_mul_fe6_if.rdy) o_mul_fe6_if.val <= 0;
if (o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0;
if (o_sub_fe_if.rdy) o_sub_fe_if.val <= 0;
if (o_add_fe_if.rdy) o_add_fe_if.val <= 0;
if (o_mnr_fe2_if.rdy) o_mnr_fe2_if.val <= 0;
if (~sub_en) get_next_sub();
if (~add_en) get_next_add();
if (~mul_en) get_next_fe2_mul();
if (~mnr_en) get_next_fe2_mnr();
if (rdy_l == 0) i_mul_fe6_if.rdy <= 1;
if (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)) begin
if (eq_val[22] && eq_val[20] && eq_val[19]) begin
o_mul_fe6_if.val <= 1;
out_cnt <= out_cnt + 1;
end
o_mul_fe6_if.sop <= out_cnt == 0;
o_mul_fe6_if.eop <= out_cnt == 5;
o_mul_fe6_if.dat <= out[out_cnt/2][out_cnt%2];
if(out_cnt == 5) begin
eq_val <= 0;
eq_wait <= 0;
rdy_l <= 0;
a_a <= 0;
b_b <= 0;
c_c <= 0;
t <= 0;
out <= 0;
{out_cnt, mul_cnt, add_cnt, sub_cnt} <= 0;
{nxt_fe2_mul, nxt_fe_add, nxt_fe_sub, nxt_fe2_mnr} <= 0;
{mul_en, add_en, sub_en, mnr_en} <= 0;
{in_a, in_b} <= 0;
end
end
// Latch input
if (i_mul_fe6_if.rdy && i_mul_fe6_if.val) begin
in_a <= {i_mul_fe6_if.dat[0 +: $bits(FE_TYPE)], in_a[2:1], in_a[0][1]};
in_b <= {i_mul_fe6_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], in_b[2:1], in_b[0][1]};
if (i_mul_fe6_if.eop) begin
i_mul_fe6_if.rdy <= 0;
rdy_l <= 1;
o_mul_fe6_if.ctl <= i_mul_fe6_if.ctl;
end
end
// Check any results from multiplier
if (i_mul_fe2_if.val && i_mul_fe2_if.rdy) begin
if (i_mul_fe2_if.eop) eq_val[i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
0: a_a[i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
1: b_b[i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
2: c_c[i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
5: out[0][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
10: out[2][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
15: out[1][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
default: o_mul_fe6_if.err <= 1;
endcase
end
// Check any results from mnr
if (i_mnr_fe2_if.val && i_mnr_fe2_if.rdy) begin
if(i_mnr_fe2_if.eop) eq_val[i_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
18: out[0][i_mnr_fe2_if.eop] <= i_mnr_fe2_if.dat;
21: c_c[i_mnr_fe2_if.eop] <= i_mnr_fe2_if.dat;
default: o_mul_fe6_if.err <= 1;
endcase
end
// Check any results from sub
if (i_sub_fe_if.val && i_sub_fe_if.rdy) begin
if(i_sub_fe_if.eop) eq_val[i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
6: out[0][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
7: out[0][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
11: out[2][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
16: out[1][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
17: out[1][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
20: out[2][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
default: o_mul_fe6_if.err <= 1;
endcase
end
// Check any results from add
if (i_add_fe_if.val && i_add_fe_if.rdy) begin
if (i_add_fe_if.eop) eq_val[i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
3: out[0][i_add_fe_if.eop] <= i_add_fe_if.dat;
4: t[i_add_fe_if.eop] <= i_add_fe_if.dat;
8: out[2][i_add_fe_if.eop] <= i_add_fe_if.dat;
9: t[i_add_fe_if.eop] <= i_add_fe_if.dat;
12: out[2][i_add_fe_if.eop] <= i_add_fe_if.dat;
13: out[1][i_add_fe_if.eop] <= i_add_fe_if.dat;
14: t[i_add_fe_if.eop] <= i_add_fe_if.dat;
19: out[0][i_add_fe_if.eop] <= i_add_fe_if.dat;
22: out[1][i_add_fe_if.eop] <= i_add_fe_if.dat;
default: o_mul_fe6_if.err <= 1;
endcase
end
// Issue new multiplies
if (mul_en)
case(nxt_fe2_mul)
0: fe2_multiply(0, in_a[0], in_b[0]);
1: fe2_multiply(1, in_a[1], in_b[1]);
2: fe2_multiply(2, in_a[2], in_b[2]);
5: fe2_multiply(5, out[0], t);
10: fe2_multiply(10, out[2], t);
15: fe2_multiply(15, out[1], t);
endcase
// Issue new adds
if (add_en)
case(nxt_fe_add)
3: fe2_addition(3, in_a[1], in_a[2]);
4: fe2_addition(4, in_b[1], in_b[2]);
8: fe2_addition(8, in_b[0], in_b[2]);
9: fe2_addition(9, in_a[0], in_a[2]);
12: fe2_addition(12, out[2], b_b);
13: fe2_addition(13, in_b[0], in_b[1]);
14: fe2_addition(14, in_a[0], in_a[1]);
19: fe2_addition(19, out[0], a_a);
22: fe2_addition(22, out[1], c_c);
endcase
// Issue new sub
if (sub_en)
case(nxt_fe_sub)
6: fe2_subtraction(6, out[0], b_b);
7: fe2_subtraction(7, out[0], c_c);
11: fe2_subtraction(11, out[2], a_a);
16: fe2_subtraction(16, out[1], a_a);
17: fe2_subtraction(17, out[1], b_b);
20: fe2_subtraction(20, out[2], c_c);
endcase
// Issue new mnr
if (mnr_en)
case(nxt_fe2_mnr)
18: fe2_mnr(18, out[0]);
21: fe2_mnr(21, c_c);
endcase
end
end
// Task for subtractions
task fe2_subtraction(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin
o_sub_fe_if.val <= 1;
o_sub_fe_if.sop <= sub_cnt == 0;
o_sub_fe_if.eop <= sub_cnt == 1;
o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= a[sub_cnt];
o_sub_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[sub_cnt];
o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (sub_cnt == 1) begin
get_next_sub();
end
sub_cnt <= sub_cnt + 1;
end
endtask
// Task for addition
task fe2_addition(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin
o_add_fe_if.val <= 1;
o_add_fe_if.sop <= add_cnt == 0;
o_add_fe_if.eop <= add_cnt == 1;
o_add_fe_if.dat[0 +: $bits(FE_TYPE)] <= a[add_cnt];
o_add_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[add_cnt];
o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (add_cnt == 1) begin
get_next_add();
end
add_cnt <= add_cnt + 1;
end
endtask
// Task for using mult
task fe2_multiply(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.val <= 1;
o_mul_fe2_if.sop <= mul_cnt == 0;
o_mul_fe2_if.eop <= mul_cnt == 1;
o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= a[mul_cnt];
o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[mul_cnt];
o_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (mul_cnt == 1) begin
get_next_fe2_mul();
end
mul_cnt <= mul_cnt + 1;
end
endtask
// Task for using mnr
task fe2_mnr(input int unsigned ctl, input FE2_TYPE a);
if (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy)) begin
o_mnr_fe2_if.val <= 1;
o_mnr_fe2_if.sop <= mnr_cnt == 0;
o_mnr_fe2_if.eop <= mnr_cnt == 1;
o_mnr_fe2_if.dat <= a[mnr_cnt];
o_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (mnr_cnt == 1) begin
get_next_fe2_mnr();
end
mnr_cnt <= mnr_cnt + 1;
end
endtask
task get_next_fe2_mul();
mul_en <= 1;
if(~eq_wait[0] && rdy_l)
nxt_fe2_mul <= 0;
else if(~eq_wait[1] && rdy_l)
nxt_fe2_mul <= 1;
else if(~eq_wait[2] && rdy_l)
nxt_fe2_mul <= 2;
else if(~eq_wait[5] && eq_val[3] && eq_val[4])
nxt_fe2_mul <= 5;
else if (~eq_wait[10] && eq_val[8] && eq_val[9])
nxt_fe2_mul <= 10;
else if (~eq_wait[15] && eq_val[13] && eq_val[14])
nxt_fe2_mul <= 15;
else
mul_en <= 0;
endtask
task get_next_add();
add_en <= 1;
if(~eq_wait[3] && rdy_l)
nxt_fe_add <= 3;
else if(~eq_wait[4] && rdy_l)
nxt_fe_add <= 4;
else if(~eq_wait[8] && rdy_l)
nxt_fe_add <= 8;
else if(~eq_wait[9] && eq_wait[5] && rdy_l)
nxt_fe_add <= 9;
else if (~eq_wait[12] && eq_val[11] && eq_val[1])
nxt_fe_add <= 12;
else if(~eq_wait[13] && rdy_l)
nxt_fe_add <= 13;
else if(~eq_wait[14] && eq_wait[10] && rdy_l)
nxt_fe_add <= 14;
else if(~eq_wait[19] && eq_val[18] && eq_val[0])
nxt_fe_add <= 19;
else if(~eq_wait[22] && eq_val[17] && eq_val[21])
nxt_fe_add <= 22;
else
add_en <= 0;
endtask
task get_next_sub();
sub_en <= 1;
if(~eq_wait[6] && eq_val[5] && eq_val[1])
nxt_fe_sub <= 6;
else if(~eq_wait[7] && eq_val[6] && eq_val[2])
nxt_fe_sub <= 7;
else if (~eq_wait[11] && eq_val[10] && eq_val[0])
nxt_fe_sub <= 11;
else if (~eq_wait[16] && eq_val[15] && eq_val[0])
nxt_fe_sub <= 16;
else if (~eq_wait[17] && eq_val[16] && eq_val[1])
nxt_fe_sub <= 17;
else if (~eq_wait[20] && eq_val[12] && eq_val[2])
nxt_fe_sub <= 20;
else
sub_en <= 0;
endtask
task get_next_fe2_mnr();
mnr_en <= 1;
if(~eq_wait[18] && eq_val[7])
nxt_fe2_mnr <= 18;
else if(~eq_wait[21] && eq_wait[20])
nxt_fe2_mnr <= 21;
else
mnr_en <= 0;
endtask
endmodule

View File

@ -35,7 +35,7 @@ FE_TYPE [3:0] t;
always_comb begin
case (mnr_cnt) inside
0,1,2,3: i_mnr_fe6_if.rdy = 1;
4,5: i_mnr_fe6_if.rdy = ~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy);
4,5: i_mnr_fe6_if.rdy = ~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy);
default: i_mnr_fe6_if.rdy = 0;
endcase

View File

@ -193,8 +193,8 @@ resource_share # (
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 3 ),
.PIPELINE_OUT ( 2 )
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe_add (
.i_clk ( clk ),
@ -210,8 +210,8 @@ resource_share # (
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 2 ),
.PIPELINE_OUT ( 4 )
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe_sub (
.i_clk ( clk ),
@ -227,8 +227,8 @@ resource_share # (
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 2 ),
.PIPELINE_OUT ( 2 )
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe2_mnr (
.i_clk ( clk ),

View File

@ -37,6 +37,7 @@ logic [DAT_BITS-1:0] P_;
logic [LEVEL:0][DAT_BITS:0] result0, result1;
logic [LEVEL:0][DAT_BITS:0] a, b;
logic [LEVEL:0][1:0] sop_eop;
logic [LEVEL:0][CTL_BITS-1:0] ctl; // Top ctl bit we use to check if this needs a subtraction in P
logic [LEVEL:0] val, rdy;
logic [LEVEL:0] carry_neg;
@ -52,11 +53,15 @@ always_comb begin
a[0] = i_add.dat[0 +: BITS];
b[0] = 0;
b[0] = i_add.dat[BITS +: BITS];
sop_eop[0][0] = i_add.sop;
sop_eop[0][1] = i_add.eop;
result0[0] = 0;
result1[0] = 0;
rdy[LEVEL] = o_add.rdy;
i_add.rdy = rdy[0];
o_add.copy_if_comb(carry_neg[LEVEL] ? result0[LEVEL] : result1[LEVEL], val[LEVEL], 1, 1, 0, 0, ctl[LEVEL]);
o_add.sop = sop_eop[LEVEL][0];
o_add.eop = sop_eop[LEVEL][1];
end
generate
@ -94,12 +99,14 @@ genvar g;
b[g+1] <= 0;
ctl[g+1] <= 0;
carry_neg[g+1] <= 0;
sop_eop[g+1] <= 0;
end else begin
if (rdy[g]) begin
val[g+1] <= val[g];
ctl[g+1] <= ctl[g];
a[g+1] <= a[g];
b[g+1] <= b[g];
sop_eop[g+1] <= sop_eop[g];
result0[g+1] <= result0[g];
result0[g+1][g*BITS_LEVEL +: BITS_LEVEL + 1] <= add_res0;

View File

@ -38,6 +38,8 @@ module resource_share # (
if_axi_stream.source o_axi [NUM_IN-1:0]
);
if_axi_stream #(.DAT_BYTS(DAT_BYTS), .DAT_BITS(DAT_BITS), .CTL_BITS(CTL_BITS)) out_int (i_clk);
// Arbitratation to the resource
packet_arb # (
.DAT_BITS ( DAT_BITS ),
@ -45,47 +47,61 @@ packet_arb # (
.CTL_BITS ( CTL_BITS ),
.NUM_IN ( NUM_IN ),
.OVR_WRT_BIT ( OVR_WRT_BIT ),
.PIPELINE ( PIPELINE_IN ),
.PIPELINE ( 0 ),
.PRIORITY_IN ( PRIORITY_IN )
)
packet_arb_mult (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( i_axi ),
.o_axi ( o_res )
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( i_axi ),
.o_axi ( out_int )
);
pipeline_if #(
.DAT_BITS ( DAT_BITS ),
.DAT_BYTS ( DAT_BYTS ),
.CTL_BITS ( CTL_BITS ),
.NUM_STAGES ( PIPELINE_OUT )
)
pipeline_if_out (
.i_rst ( i_rst ),
.i_if ( out_int ),
.o_if ( o_res )
);
// Demuxing
if_axi_stream #(.DAT_BYTS(DAT_BYTS), .DAT_BITS(DAT_BITS), .CTL_BITS(CTL_BITS)) int_axi [NUM_IN-1:0] (i_clk);
if_axi_stream #(.DAT_BYTS(DAT_BYTS), .DAT_BITS(DAT_BITS), .CTL_BITS(CTL_BITS)) in_int (i_clk);
genvar gen0;
logic [NUM_IN-1:0] rdy;
pipeline_if #(
.DAT_BITS ( DAT_BITS ),
.DAT_BYTS ( DAT_BYTS ),
.CTL_BITS ( CTL_BITS ),
.NUM_STAGES ( PIPELINE_IN )
)
pipeline_if_in (
.i_rst ( i_rst ),
.i_if ( i_res ),
.o_if ( in_int )
);
generate
for (gen0 = 0; gen0 < NUM_IN; gen0++) begin: GEN_DEMUX
always_comb begin
rdy[gen0] = int_axi[gen0].rdy;
int_axi[gen0].copy_if_comb(i_res.dat, i_res.val && i_res.ctl[OVR_WRT_BIT +: $clog2(NUM_IN)] == gen0,
i_res.sop, i_res.eop, i_res.err, i_res.mod, i_res.ctl);
int_axi[gen0].ctl[OVR_WRT_BIT +: $clog2(NUM_IN)] = 0;
rdy[gen0] = o_axi[gen0].rdy;
o_axi[gen0].copy_if_comb(in_int.dat, in_int.val && in_int.ctl[OVR_WRT_BIT +: $clog2(NUM_IN)] == gen0,
in_int.sop, in_int.eop, in_int.err, in_int.mod, in_int.ctl);
o_axi[gen0].ctl[OVR_WRT_BIT +: $clog2(NUM_IN)] = 0;
end
pipeline_if #(
.DAT_BITS ( DAT_BITS ),
.DAT_BYTS ( DAT_BYTS ),
.CTL_BITS ( CTL_BITS ),
.NUM_STAGES ( PIPELINE_OUT )
)
pipeline_if (
.i_rst ( i_rst ),
.i_if ( int_axi[gen0] ),
.o_if ( o_axi[gen0] )
);
end
endgenerate
always_comb begin
i_res.rdy = rdy[i_res.ctl[OVR_WRT_BIT +: $clog2(NUM_IN)]];
in_int.rdy = rdy[in_int.ctl[OVR_WRT_BIT +: $clog2(NUM_IN)]];
end
endmodule

View File

@ -38,6 +38,7 @@ logic [DAT_BITS-1:0] P_;
logic [LEVEL:0][DAT_BITS:0] result0, result1;
logic [LEVEL:0][DAT_BITS:0] a, b;
logic [LEVEL:0][CTL_BITS-1:0] ctl; // Top ctl bit we use to check if this needs a subtraction in P
logic [LEVEL:0][1:0] sop_eop;
logic [LEVEL:0] val, rdy;
logic [LEVEL:0] carry_neg0, carry_neg1;
@ -49,6 +50,8 @@ always_comb begin
carry_neg1[0] = 0;
val[0] = i_sub.val;
ctl[0] = i_sub.ctl;
sop_eop[0][0] = i_sub.sop;
sop_eop[0][1] = i_sub.eop;
a[0] = 0;
a[0] = i_sub.dat[0 +: BITS];
b[0] = 0;
@ -59,6 +62,8 @@ always_comb begin
i_sub.rdy = rdy[0];
o_sub.dat = carry_neg1[LEVEL] ? result0[LEVEL] : result1[LEVEL];
o_sub.copy_if_comb(carry_neg1[LEVEL] ? result0[LEVEL] : result1[LEVEL], val[LEVEL], 1, 1, 0, 0, ctl[LEVEL]);
o_sub.sop = sop_eop[LEVEL][0];
o_sub.eop = sop_eop[LEVEL][1];
end
generate
@ -102,12 +107,15 @@ genvar g;
a[g+1] <= 0;
b[g+1] <= 0;
ctl[g+1] <= 0;
sop_eop[g+1] <= 0;
carry_neg0[g+1] <= 0;
carry_neg1[g+1] <= 0;
end else begin
if (rdy[g]) begin
val[g+1] <= val[g];
ctl[g+1] <= ctl[g];
sop_eop[g+1] <= sop_eop[g];
a[g+1] <= a[g];
b[g+1] <= b[g];