Updates for streaming logic and test bench

This commit is contained in:
bsdevlin 2019-08-04 11:45:12 +08:00
parent f8371eba2e
commit d7163efef2
5 changed files with 658 additions and 10 deletions

View File

@ -1,7 +1,8 @@
/*
This provides the interface to perform Fp2 field element mul. Using karabusta algorithm.
This provides the interface to perform Fp2 field element mul.
Inputs must be interleaved starting at c0 (i.e. clock 0 = {b.c0, a.c0})
_s in the name represents the input is a stream starting at c0.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@ -19,13 +20,13 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fe2_mul
module ec_fe2_mul_s
#(
parameter type FE_TYPE, // Base field element type
parameter CTL_BITS = 12
)(
input i_clk, i_rst,
// Interface to FE(P)_TYPE adder (mod P) 2*FE_TYPE data width
// Interface to FE2_TYPE mul (mod P) 2*FE_TYPE data width
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE_TYPE mul (mod P) 2*FE_TYPE data width
@ -46,7 +47,7 @@ logic out_cnt;
// Point addtions are simple additions on each of the Fp elements
always_comb begin
i_mul_fe2_if.rdy = (mul_cnt == 0 || mul_cnt == 1) && (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy));
i_mul_fe_if.rdy = (add_sub_cnt == 0 || add_sub_cnt == 1) ? ~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy) :
i_mul_fe_if.rdy = (add_sub_cnt == 0 || add_sub_cnt == 1) ? ~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy) :
~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy);
i_add_fe_if.rdy = out_cnt == 1 && (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy));
i_sub_fe_if.rdy = out_cnt == 0 && (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy));
@ -76,14 +77,14 @@ always_ff @ (posedge i_clk) begin
o_mul_fe_if.dat <= i_mul_fe2_if.dat; // a0 * b0
o_mul_fe_if.val <= i_mul_fe2_if.val;
o_mul_fe_if.ctl <= i_mul_fe2_if.ctl;
{b, a} <= i_mul_fe2_if.dat;
{b, a} <= i_mul_fe2_if.dat;
if (i_mul_fe2_if.val) mul_cnt <= mul_cnt + 1;
end
end
1: begin
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
o_mul_fe_if.dat <= i_mul_fe2_if.dat; // a1 * b1
o_mul_fe_if.val <= i_mul_fe2_if.val;
o_mul_fe_if.val <= i_mul_fe2_if.val;
if (i_mul_fe2_if.val) mul_cnt <= mul_cnt + 1;
end
end
@ -108,7 +109,7 @@ always_ff @ (posedge i_clk) begin
case(add_sub_cnt)
0: begin
if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin
o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
if (i_mul_fe_if.val) add_sub_cnt <= add_sub_cnt + 1;
end
end
@ -126,7 +127,7 @@ always_ff @ (posedge i_clk) begin
if (i_mul_fe_if.val) add_sub_cnt <= add_sub_cnt + 1;
end
end
3: begin
3: begin
o_add_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
o_add_fe_if.ctl <= i_mul_fe_if.ctl; // a1b0 + a0b1
if (i_mul_fe_if.val) begin
@ -135,7 +136,7 @@ always_ff @ (posedge i_clk) begin
end
end
endcase
case(out_cnt)
0: begin
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin

View File

@ -0,0 +1,312 @@
/*
This provides the interface to perform Fp6 field element mul. Using karabusta algorithm.
Because of feedback path we can lockup if there are not enough pipelines for fe2_sub/ fe2_add.
Inputs must be interleaved starting at c0 (i.e. clock 0 = {b.c0, a.c0})
_s in the name represents the input is a stream starting at c0.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fe6_mul_s
#(
parameter type FE_TYPE, // Base field element type
parameter type FE2_TYPE, // Fp6 is towered over Fp2
parameter type FE6_TYPE, // Fp6 is towered over Fp2
parameter CTL_BITS = 12,
parameter OVR_WRT_BIT = 0
)(
input i_clk, i_rst,
// Interface to FE2_TYPE multiplier (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE_TYPE adder (mod P), 2*FE_TYPE data width
if_axi_stream.source o_add_fe_if,
if_axi_stream.sink i_add_fe_if,
// Interface to FE_TYPE subtractor (mod P), 2*FE_TYPE data width
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if,
// Interface to FE2_TYPE multiply by non-residue, FE_TYPE data width
if_axi_stream.source o_mnr_fe2_if,
if_axi_stream.sink i_mnr_fe2_if,
// Interface to FE6_TYPE multiplier (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe6_if,
if_axi_stream.sink i_mul_fe6_if
);
localparam CNT_BITS = 5;
localparam NUM_OVR_WRT = $clog2((1 << CNT_BITS)/2); // Only need half the bits for control
// Multiplications are calculated using the formula in bls12_381.pkg::fe6_mul()
// Need storage to latch input stream
// a_a is a[2], b_b is a[1], c_c is a[0]
FE6_TYPE a, b;
FE_TYPE t;
logic [CNT_BITS-1:0] add_cnt, sub_cnt, mul_cnt, mnr_cnt, out_cnt;
logic start;
always_comb begin
i_mul_fe6_if.rdy = (start == 0) && (~o_mul_fe2_if.val || (o_mul_fe2_if.val & o_mul_fe2_if.rdy));
case (i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0,1,2: i_mul_fe2_if.rdy = 1;
3,4,5: i_mul_fe2_if.rdy = ~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy);
default: i_mul_fe2_if.rdy = 0;
endcase
case (i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0,1,2,3,4,5: i_add_fe_if.rdy = ~o_mul_fe2_if.val || (o_mul_fe2_if.val & o_mul_fe2_if.rdy);
6: i_add_fe_if.rdy = 1;
7: i_add_fe_if.rdy = (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy));
8: i_add_fe_if.rdy = (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy));
default: i_add_fe_if.rdy = 0;
endcase
case (i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0,1: i_sub_fe_if.rdy = (sub_cnt/2 > 2) && (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy));
2: i_sub_fe_if.rdy = (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy));
3: i_sub_fe_if.rdy = (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy));
4: i_sub_fe_if.rdy = 1;
5: i_sub_fe_if.rdy = (out_cnt/2 == 2) && (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy));
default: i_sub_fe_if.rdy = 0;
endcase
case (i_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0: i_mnr_fe2_if.rdy = (add_cnt/2 == 7) && (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy));
1: i_mnr_fe2_if.rdy = (add_cnt/2 == 8) && (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy));
default: i_mnr_fe2_if.rdy = 0;
endcase
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mul_fe6_if.reset_source();
o_mnr_fe2_if.reset_source();
o_mul_fe2_if.reset_source();
o_sub_fe_if.reset_source();
o_add_fe_if.reset_source();
add_cnt <= 0;
sub_cnt <= 0;
mul_cnt <= 0;
mnr_cnt <= 0;
out_cnt <= 0;
a <= 0;
b <= 0;
t <= 0;
start <= 0;
end else begin
if (o_mul_fe6_if.rdy) o_mul_fe6_if.val <= 0;
if (o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0;
if (o_sub_fe_if.rdy) o_sub_fe_if.val <= 0;
if (o_add_fe_if.rdy) o_add_fe_if.val <= 0;
if (o_mnr_fe2_if.rdy) o_mnr_fe2_if.val <= 0;
// Latch some results temp
if (i_add_fe_if.val && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 6) begin
b[0] <= {i_add_fe_if.dat, b[0][1]};
end
if (i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 4) begin
b[1] <= {i_sub_fe_if.dat, b[1][1]};
end
// Latch multiplier results of a_a, b_b, c_c
if (i_mul_fe2_if.val && i_mul_fe2_if.rdy && i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] < 3) begin
a <= {i_mul_fe2_if.dat, {a[2], a[1], a[0][1]}};
end
if (i_mul_fe6_if.rdy && i_mul_fe6_if.eop && i_mul_fe6_if.val)
start <= 1;
// Multiplier input flow
case (mul_cnt) inside
0,1,2,3,4,5: begin // Calculates a_a, b_b, c_c
fe2_mul(i_mul_fe6_if.val, i_mul_fe6_if.dat[0 +: $bits(FE_TYPE)],
i_mul_fe6_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], mul_cnt);
if (i_mul_fe6_if.val && i_mul_fe6_if.rdy) begin
a <= {i_mul_fe6_if.dat[0 +: $bits(FE_TYPE)], a[2], a[1], a[0][1]};
b <= {i_mul_fe6_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], b[2], b[1], b[0][1]};
end
end
6,10,14: begin
// Store result into multiplier and temp - calculates fe6_mul[0] / fe6_mul[1] / fe6_mul[2]
if (i_add_fe_if.val && i_add_fe_if.rdy) begin
o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= i_add_fe_if.dat;
o_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= o_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] + 1;
mul_cnt <= mul_cnt + 1;
end
end
7,11,15: begin
if (i_add_fe_if.val && i_add_fe_if.rdy) begin
t <= i_add_fe_if.dat;
mul_cnt <= mul_cnt + 1;
end
end
8,12,16: begin
if (i_add_fe_if.val && i_add_fe_if.rdy) begin // .rdy takes into account the multiplier output state
o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_add_fe_if.dat;
o_mul_fe2_if.sop <= 1;
o_mul_fe2_if.eop <= 0;
o_mul_fe2_if.val <= 1;
mul_cnt <= mul_cnt + 1;
end
end
9,13,17: begin
if (i_add_fe_if.val && i_add_fe_if.rdy) begin
o_mul_fe2_if.dat <= {i_add_fe_if.dat, t};
o_mul_fe2_if.sop <= 0;
o_mul_fe2_if.eop <= 1;
o_mul_fe2_if.val <= 1;
mul_cnt <= mul_cnt + 1;
end
end
default: if (start==0) mul_cnt <= 0;
endcase
// Adder input flow
case (add_cnt) inside
0,1: fe2_add(start, a[1][add_cnt%2], a[2][add_cnt%2], add_cnt);
2,3: fe2_add(start, b[1][add_cnt%2], b[2][add_cnt%2], add_cnt);
4,5: fe2_add(start, b[0][add_cnt%2], b[1][add_cnt%2], add_cnt);
6,7: fe2_add(start, a[0][add_cnt%2], a[1][add_cnt%2], add_cnt);
8,9: fe2_add(start, b[0][add_cnt%2], b[2][add_cnt%2], add_cnt);
10,11: fe2_add(start, a[0][add_cnt%2], a[2][add_cnt%2], add_cnt);
12,13: fe2_add(i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 2 , i_sub_fe_if.dat, a[1][add_cnt%2], add_cnt);
14,15: fe2_add(i_mnr_fe2_if.val, i_mnr_fe2_if.dat, a[0][add_cnt%2], add_cnt);
16,17: fe2_add(i_mnr_fe2_if.val, b[1][add_cnt%2], i_mnr_fe2_if.dat, add_cnt);
default: if (start==0) add_cnt <= 0;
endcase
// Sub input flow
case (sub_cnt) inside
0,1: fe2_sub(i_mul_fe2_if.val && i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 3, i_mul_fe2_if.dat, a[1][sub_cnt%2], sub_cnt);
2,3: fe2_sub(i_mul_fe2_if.val, i_mul_fe2_if.dat, a[1][sub_cnt%2], sub_cnt);
4,5: fe2_sub(i_mul_fe2_if.val, i_mul_fe2_if.dat, a[0][sub_cnt%2], sub_cnt);
6,7: fe2_sub(i_sub_fe_if.val, i_sub_fe_if.dat, a[2][sub_cnt%2], sub_cnt);
8,9: fe2_sub(i_sub_fe_if.val, i_sub_fe_if.dat, a[0][sub_cnt%2], sub_cnt);
10,11: fe2_sub(add_cnt >= 18, b[0][sub_cnt%2], a[2][sub_cnt%2], sub_cnt);
default: if (start==0) sub_cnt <= 0;
endcase
// mnr flow
case (mnr_cnt) inside
0,1: fe2_mnr(i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 3, i_sub_fe_if.dat, mnr_cnt);
2,3: fe2_mnr(1, a[2][mnr_cnt%2], mnr_cnt);
default: if (start==0) mnr_cnt <= 0;
endcase
// Final output flow
if (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)) begin
case (out_cnt) inside
0,1: begin
o_mul_fe6_if.dat <= i_add_fe_if.dat;
o_mul_fe6_if.ctl <= i_add_fe_if.ctl;
o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0;
o_mul_fe6_if.sop <= out_cnt == 0;
o_mul_fe6_if.eop <= 0;
if (i_add_fe_if.val && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 7) begin
o_mul_fe6_if.val <= 1;
out_cnt <= out_cnt + 1;
end
end
2,3: begin
o_mul_fe6_if.dat <= i_add_fe_if.dat;
o_mul_fe6_if.sop <= 0;
o_mul_fe6_if.eop <= 0;
if (i_add_fe_if.val) begin
o_mul_fe6_if.val <= 1;
out_cnt <= out_cnt + 1;
end
end
4,5: begin
o_mul_fe6_if.dat <= i_sub_fe_if.dat;
o_mul_fe6_if.sop <= 0;
o_mul_fe6_if.eop <= out_cnt == 5;
if (i_sub_fe_if.val) begin
o_mul_fe6_if.val <= 1;
out_cnt <= out_cnt + 1;
end
end
default: begin
out_cnt <= 0;
start <= 0;
end
endcase
end
end
end
// Task for fe2_mul
task automatic fe2_mul(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt);
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.sop <= val ? ~o_mul_fe2_if.sop : o_mul_fe2_if.sop;
o_mul_fe2_if.eop <= val ? o_mul_fe2_if.sop : o_mul_fe2_if.eop;
o_mul_fe2_if.dat <= {b, a};
o_mul_fe2_if.val <= val;
o_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= cnt / 2;
if (val) cnt = cnt + 1;
end
endtask
// Task for fe2_add
task automatic fe2_add(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt);
if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin
o_add_fe_if.sop <= val ? ~o_add_fe_if.sop : o_add_fe_if.sop;
o_add_fe_if.eop <= val ? o_add_fe_if.sop : o_add_fe_if.eop;
o_add_fe_if.dat <= {b, a};
o_add_fe_if.val <= val;
o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= cnt / 2;
if (val) cnt = cnt + 1;
end
endtask
// Task for fe2_sub
task automatic fe2_sub(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt);
if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin
o_sub_fe_if.sop <= val ? ~o_sub_fe_if.sop : o_sub_fe_if.sop;
o_sub_fe_if.eop <= val ? o_sub_fe_if.sop : o_sub_fe_if.eop;
o_sub_fe_if.dat <= {b, a};
o_sub_fe_if.val <= val;
o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= cnt / 2;
if (val) cnt = cnt + 1;
end
endtask
// Task for fe2_mnr
task automatic fe2_mnr(input logic val, input logic [$bits(FE_TYPE)-1:0] a, ref [CNT_BITS-1:0] cnt);
if (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy)) begin
o_mnr_fe2_if.sop <= val ? ~o_mnr_fe2_if.sop : o_mnr_fe2_if.sop;
o_mnr_fe2_if.eop <= val ? o_mnr_fe2_if.sop : o_mnr_fe2_if.eop;
o_mnr_fe2_if.dat <= {b, a};
o_mnr_fe2_if.val <= val;
o_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= cnt / 2;
if (val) cnt = cnt + 1;
end
endtask
endmodule

View File

@ -0,0 +1,89 @@
/*
Multiplies by non-residue for Fp2 towering.
_s in the name represents the input is a stream starting at c0.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module fe2_mul_by_nonresidue_s
#(
parameter type FE_TYPE
)(
input i_clk, i_rst,
if_axi_stream.source o_mnr_fe2_if,
if_axi_stream.sink i_mnr_fe2_if, // Input is multiplied by non residue
if_axi_stream.source o_add_fe_if,
if_axi_stream.sink i_add_fe_if,
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if
);
logic add_sub_cnt;
always_comb begin
i_mnr_fe2_if.rdy = (~o_add_fe_if.val || (o_add_fe_if.rdy && o_add_fe_if.val)) && (~o_sub_fe_if.val || (o_sub_fe_if.rdy && o_sub_fe_if.val));
i_add_fe_if.rdy = add_sub_cnt == 1 && (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy));
i_sub_fe_if.rdy = add_sub_cnt == 0 && (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy));
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mnr_fe2_if.reset_source();
o_add_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_sub_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0);
add_sub_cnt <= 0;
end else begin
if (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy) o_mnr_fe2_if.val <= 0;
if (i_mnr_fe2_if.rdy) begin
if (i_mnr_fe2_if.sop) begin
o_add_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_mnr_fe2_if.dat;
o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_mnr_fe2_if.dat;
end else begin
o_add_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mnr_fe2_if.dat;
o_sub_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mnr_fe2_if.dat;
end
o_add_fe_if.val <= i_mnr_fe2_if.val && i_mnr_fe2_if.rdy && i_mnr_fe2_if.eop;
o_add_fe_if.ctl <= i_mnr_fe2_if.ctl;
o_sub_fe_if.val <= i_mnr_fe2_if.val && i_mnr_fe2_if.rdy && i_mnr_fe2_if.eop;
o_sub_fe_if.ctl <= i_mnr_fe2_if.ctl;
end
if (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy)) begin
case(add_sub_cnt)
0: begin
o_mnr_fe2_if.dat <= i_sub_fe_if.dat;
o_mnr_fe2_if.ctl <= i_sub_fe_if.ctl;
o_mnr_fe2_if.val <= i_sub_fe_if.val;
o_mnr_fe2_if.sop <= 1;
o_mnr_fe2_if.eop <= 0;
if (i_sub_fe_if.val)
add_sub_cnt <= add_sub_cnt + 1;
end
1: begin
o_mnr_fe2_if.dat <= i_add_fe_if.dat;
o_mnr_fe2_if.ctl <= i_add_fe_if.ctl;
o_mnr_fe2_if.val <= i_add_fe_if.val;
o_mnr_fe2_if.sop <= 0;
o_mnr_fe2_if.eop <= 1;
if (i_add_fe_if.val)
add_sub_cnt <= add_sub_cnt + 1;
end
endcase
end
end
end
endmodule

View File

@ -0,0 +1,246 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module ec_fe6_mul_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
parameter type FE_TYPE = bls12_381_pkg::fe_t;
parameter type FE2_TYPE = bls12_381_pkg::fe2_t;
parameter type FE6_TYPE = bls12_381_pkg::fe6_t;
parameter P = bls12_381_pkg::P;
localparam CTL_BITS = 32;
localparam CLK_PERIOD = 100;
logic clk, rst;
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #(CLK_PERIOD/2) clk = ~clk;
end
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if [3:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if [3:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [3:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [3:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_i_if (clk);
if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) o_mul_fe6_if (clk);
if_axi_stream #(.DAT_BYTS((2*$bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) i_mul_fe6_if (clk);
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( CTL_BITS )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_mul ( mul_fe_o_if ),
.o_mul ( mul_fe_i_if )
);
adder_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_add ( add_fe_o_if[3] ),
.o_add ( add_fe_i_if[3] )
);
subtractor_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_sub ( sub_fe_o_if[3] ),
.o_sub ( sub_fe_i_if[3] )
);
ec_fe2_mul_s #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS )
)
ec_fe2_mul_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe2_if ( mul_fe2_i_if ),
.i_mul_fe2_if ( mul_fe2_o_if ),
.o_add_fe_if ( add_fe_o_if[0] ),
.i_add_fe_if ( add_fe_i_if[0] ),
.o_sub_fe_if ( sub_fe_o_if[0] ),
.i_sub_fe_if ( sub_fe_i_if[0] ),
.o_mul_fe_if ( mul_fe_o_if ),
.i_mul_fe_if ( mul_fe_i_if )
);
fe2_mul_by_nonresidue_s #(
.FE_TYPE ( FE_TYPE )
)
fe2_mul_by_nonresidue_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mnr_fe2_if ( mnr_fe2_i_if ),
.i_mnr_fe2_if ( mnr_fe2_o_if ),
.o_add_fe_if ( add_fe_o_if[1] ),
.i_add_fe_if ( add_fe_i_if[1] ),
.o_sub_fe_if ( sub_fe_o_if[1] ),
.i_sub_fe_if ( sub_fe_i_if[1] )
);
ec_fe6_mul_s #(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.FE6_TYPE ( FE6_TYPE ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 0 )
)
ec_fe6_mul_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe2_if ( mul_fe2_o_if ),
.i_mul_fe2_if ( mul_fe2_i_if ),
.o_add_fe_if ( add_fe_o_if[2] ),
.i_add_fe_if ( add_fe_i_if[2] ),
.o_sub_fe_if ( sub_fe_o_if[2] ),
.i_sub_fe_if ( sub_fe_i_if[2] ),
.o_mnr_fe2_if ( mnr_fe2_o_if ),
.i_mnr_fe2_if ( mnr_fe2_i_if),
.o_mul_fe6_if ( o_mul_fe6_if ),
.i_mul_fe6_if ( i_mul_fe6_if )
);
resource_share # (
.NUM_IN ( 3 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 2 ),
.PIPELINE_OUT ( 2 )
)
resource_share_fe_add (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( add_fe_o_if[2:0] ),
.o_res ( add_fe_o_if[3] ),
.i_res ( add_fe_i_if[3] ),
.o_axi ( add_fe_i_if[2:0] )
);
resource_share # (
.NUM_IN ( 3 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 2 ),
.PIPELINE_OUT ( 2 )
)
resource_share_fe_sub (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( sub_fe_o_if[2:0] ),
.o_res ( sub_fe_o_if[3] ),
.i_res ( sub_fe_i_if[3] ),
.o_axi ( sub_fe_i_if[2:0] )
);
task test();
fe6_t a, b, f_exp, f_exp2, f_out;
integer signed get_len;
integer start_time, finish_time;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat, dat_in;
$display("Running test ...");
for (int i = 0; i < 10; i++) begin
$display("Loop %d", i);
dat_in = 0;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++) begin
a[j][k] = random_vector(384/8) % P;
b[j][k] = random_vector(384/8) % P;
dat_in[(j*2+k)*768 +: 2*$bits(FE_TYPE)] = {b[j][k], a[j][k]};
end
f_exp = fe6_mul(a, b);
start_time = $time;
fork
i_mul_fe6_if.put_stream(dat_in, 6*768/8);
o_mul_fe6_if.get_stream(get_dat, get_len);
join
finish_time = $time;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++)
f_out[j][k] = get_dat[(j*2+k)*384 +: $bits(FE_TYPE)];
if (f_exp != f_out) begin
$display("Input a was:");
print_fe6(a);
$display("Input b was:");
print_fe6(b);
$display("Output was:");
print_fe6(f_out);
$display("Output Expected:");
print_fe6(f_exp);
$fatal(1, "%m %t ERROR: output was wrong", $time);
end
$display("test PASSED in %d clocks", (finish_time-start_time)/CLK_PERIOD);
end
endtask
initial begin
i_mul_fe6_if.reset_source();
o_mul_fe6_if.rdy = 0;
#10ns;
test();
#50ns $finish();
end
endmodule

View File

@ -65,7 +65,7 @@ end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
forever #(CLK_PERIOD/2) clk = ~clk;
end
ec_fe2_arithmetic #(