Updates for streaming mode fp12 multiplication and testbench

This commit is contained in:
bsdevlin 2019-08-05 15:23:30 +08:00
parent d7163efef2
commit 82a1710564
7 changed files with 696 additions and 22 deletions

View File

@ -0,0 +1,261 @@
/*
This provides the interface to perform Fp12 field element mul. Using karabusta algorithm.
Inputs must be interleaved starting at c0 (i.e. clock 0 = {b.c0, a.c0})
_s in the name represents the input is a stream starting at c0.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fe12_mul_s
#(
parameter type FE_TYPE, // Base field element type\
parameter CTL_BITS = 12,
parameter OVR_WRT_BIT = 0
)(
input i_clk, i_rst,
// Interface to FE6_TYPE multiplier (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe6_if,
if_axi_stream.sink i_mul_fe6_if,
// Interface to FE_TYPE adder (mod P), 2*FE_TYPE data width
if_axi_stream.source o_add_fe_if,
if_axi_stream.sink i_add_fe_if,
// Interface to FE_TYPE subtractor (mod P), 2*FE_TYPE data width
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if,
// Interface to FE6_TYPE multiply by non-residue, FE_TYPE data width
if_axi_stream.source o_mnr_fe6_if,
if_axi_stream.sink i_mnr_fe6_if,
// Interface to FE12_TYPE multiplier (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe12_if,
if_axi_stream.sink i_mul_fe12_if
);
localparam CNT_BITS = 5;
localparam NUM_OVR_WRT = $clog2((1 << CNT_BITS)/2); // Only need half the bits for control
// Multiplications are calculated using the formula in bls12_381.pkg::fe6_mul()
// Need storage to latch input stream, also used for temp storage
FE_TYPE [5:0] a0, a1, b0, b1, aa;
logic [CNT_BITS-1:0] add_cnt, sub_cnt, mul_cnt, mnr_cnt, out_cnt;
logic start, bb_val, b0_val;
always_comb begin
i_mul_fe12_if.rdy = ~start && (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy));
case (i_mnr_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0: i_mnr_fe6_if.rdy = (add_cnt >= 12) && (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy));
default: i_mnr_fe6_if.rdy = 0;
endcase
case (i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0: i_add_fe_if.rdy = 1;
1: i_add_fe_if.rdy = 1;
2: i_add_fe_if.rdy = (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy));
default: i_add_fe_if.rdy = 0;
endcase
case (i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0: i_sub_fe_if.rdy = (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy));
1: i_sub_fe_if.rdy = (out_cnt >= 6) && (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy));
default: i_sub_fe_if.rdy = 0;
endcase
case (i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0: i_mul_fe6_if.rdy = 1;
1: i_mul_fe6_if.rdy = add_cnt >= 12;
2: i_mul_fe6_if.rdy = (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy));
default: i_mul_fe6_if.rdy = 0;
endcase
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mul_fe12_if.reset_source();
o_mnr_fe6_if.reset_source();
o_mul_fe6_if.reset_source();
o_sub_fe_if.reset_source();
o_add_fe_if.reset_source();
add_cnt <= 0;
sub_cnt <= 0;
mul_cnt <= 0;
mnr_cnt <= 0;
out_cnt <= 0;
{a1, a0} <= 0;
{b1, b0} <= 0;
start <= 0;
bb_val <= 0;
b0_val <= 0;
end else begin
if (o_mul_fe6_if.rdy) o_mul_fe6_if.val <= 0;
if (o_mul_fe12_if.rdy) o_mul_fe12_if.val <= 0;
if (o_sub_fe_if.rdy) o_sub_fe_if.val <= 0;
if (o_add_fe_if.rdy) o_add_fe_if.val <= 0;
if (o_mnr_fe6_if.rdy) o_mnr_fe6_if.val <= 0;
if (i_mul_fe12_if.val && i_mul_fe12_if.rdy) begin
// Latch input
{a1, a0} <= {i_mul_fe12_if.dat[0 +: $bits(FE_TYPE)], a1, a0[5:1]};
{b1, b0} <= {i_mul_fe12_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], b1, b0[5:1]};
end
// Latch multiplier results of aa, bb
if (i_mul_fe6_if.val && i_mul_fe6_if.rdy && add_cnt >= 12 && i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 0) begin
aa <= {i_mul_fe6_if.dat, aa[5:1]};
end
if (i_mul_fe6_if.val && i_mul_fe6_if.rdy && add_cnt >= 12 && i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin
b1 <= {i_mul_fe6_if.dat, b1[5:1]};
if (i_mul_fe6_if.eop) bb_val <= 1;
end
if (i_add_fe_if.val && i_add_fe_if.rdy && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 0) begin
a0 <= {i_add_fe_if.dat, a0[5:1]};
end
if (i_add_fe_if.val && i_add_fe_if.rdy && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin
b0 <= {i_add_fe_if.dat, b0[5:1]};
b0_val <= 1;
end
if (i_mul_fe12_if.rdy && i_mul_fe12_if.val) begin
if(i_mul_fe12_if.eop) start <= 1;
if(i_mul_fe12_if.sop) o_mul_fe12_if.ctl <= i_mul_fe12_if.ctl;
end
// Multiplier input flow
case (mul_cnt) inside
0,1,2,3,4,5: fe6_mul(i_mul_fe12_if.val, i_mul_fe12_if.dat[0 +: $bits(FE_TYPE)], i_mul_fe12_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], mul_cnt);
6,7,8,9,10,11: fe6_mul(i_mul_fe12_if.val, i_mul_fe12_if.dat[0 +: $bits(FE_TYPE)], i_mul_fe12_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], mul_cnt);
12,13,14,15,16,17: fe6_mul(b0_val, b0[mul_cnt%6], a0[mul_cnt%6], mul_cnt);
endcase
// Adder input flow
case (add_cnt) inside
0,1,2,3,4,5: fe6_add(start, a1[add_cnt%6], a0[add_cnt%6], add_cnt);
6,7,8,9,10,11: fe6_add(start, b0[add_cnt%6], b1[add_cnt%6], add_cnt);
12,13,14,15,16,17: fe6_add(i_mnr_fe6_if.val, aa[add_cnt%6], i_mnr_fe6_if.dat, add_cnt);
endcase
// Sub input flow
case (sub_cnt) inside
0,1,2,3,4,5: fe6_sub(i_mul_fe6_if.val && i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 2, i_mul_fe6_if.dat, aa[sub_cnt%6], sub_cnt);
6,7,8,9,10,11: fe6_sub(i_sub_fe_if.val, i_sub_fe_if.dat, b1[sub_cnt%6], sub_cnt);
endcase
// mnr flow
case (mnr_cnt) inside
0,1,2,3,4,5: fe6_mnr(bb_val, b1[mnr_cnt%6], mnr_cnt);
endcase
// Final output flow
if (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)) begin
case (out_cnt) inside
0,1,2,3,4,5: begin
o_mul_fe12_if.dat <= i_add_fe_if.dat;
o_mul_fe12_if.sop <= out_cnt == 0;
o_mul_fe12_if.eop <= 0;
if (i_add_fe_if.val && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 2) begin
o_mul_fe12_if.val <= 1;
out_cnt <= out_cnt + 1;
end
end
6,7,8,9,10,11: begin
o_mul_fe12_if.dat <= i_sub_fe_if.dat;
o_mul_fe12_if.sop <= 0;
o_mul_fe12_if.eop <= out_cnt == 11;
if (i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin
o_mul_fe12_if.val <= 1;
out_cnt <= out_cnt + 1;
end
end
default: begin
out_cnt <= 0;
bb_val <= 0;
b0_val <= 0;
mnr_cnt <= 0;
mul_cnt <= 0;
add_cnt <= 0;
sub_cnt <= 0;
start <= 0;
end
endcase
end
end
end
// Task for fe6_mul
task automatic fe6_mul(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt);
if (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)) begin
o_mul_fe6_if.sop <= cnt == 0 || cnt == 6 || cnt == 12;
o_mul_fe6_if.eop <= cnt == 5 || cnt == 11 || cnt == 17;
o_mul_fe6_if.dat <= {b, a};
o_mul_fe6_if.val <= val;
if (cnt == 0) o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0;
if (cnt == 6) o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 1;
if (cnt == 12) o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 2;
if (val) cnt = cnt + 1;
end
endtask
// Task for fe6_add
task automatic fe6_add(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt);
if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin
o_add_fe_if.sop <= cnt == 0 || cnt == 6 || cnt == 12;
o_add_fe_if.eop <= cnt == 5 || cnt == 11 || cnt == 17;
o_add_fe_if.dat <= {b, a};
o_add_fe_if.val <= val;
if (cnt == 0) o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0;
if (cnt == 6) o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 1;
if (cnt == 12) o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 2;
if (val) cnt = cnt + 1;
end
endtask
// Task for fe6_sub
task automatic fe6_sub(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt);
if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin
o_sub_fe_if.sop <= cnt == 0 || cnt == 6;
o_sub_fe_if.eop <= cnt == 5 || cnt == 11;
o_sub_fe_if.dat <= {b, a};
o_sub_fe_if.val <= val;
if (cnt == 0) o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0;
if (cnt == 6) o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 1;
if (val) cnt = cnt + 1;
end
endtask
// Task for fe6_mnr
task automatic fe6_mnr(input logic val, input logic [$bits(FE_TYPE)-1:0] a, ref [CNT_BITS-1:0] cnt);
if (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy)) begin
o_mnr_fe6_if.sop <= cnt == 0;
o_mnr_fe6_if.eop <= cnt == 5;
o_mnr_fe6_if.dat <= a;
o_mnr_fe6_if.val <= val;
o_mnr_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0;
if (val) cnt = cnt + 1;
end
endtask
endmodule

View File

@ -136,8 +136,12 @@ always_ff @ (posedge i_clk) begin
a <= {i_mul_fe2_if.dat, {a[2], a[1], a[0][1]}};
end
if (i_mul_fe6_if.rdy && i_mul_fe6_if.eop && i_mul_fe6_if.val)
start <= 1;
if (i_mul_fe6_if.rdy && i_mul_fe6_if.val) begin
if(i_mul_fe6_if.eop) start <= 1;
if(i_mul_fe6_if.sop) o_mul_fe6_if.ctl <= i_mul_fe6_if.ctl;
end
// Multiplier input flow
case (mul_cnt) inside
@ -182,7 +186,6 @@ always_ff @ (posedge i_clk) begin
mul_cnt <= mul_cnt + 1;
end
end
default: if (start==0) mul_cnt <= 0;
endcase
// Adder input flow
@ -196,7 +199,6 @@ always_ff @ (posedge i_clk) begin
12,13: fe2_add(i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 2 , i_sub_fe_if.dat, a[1][add_cnt%2], add_cnt);
14,15: fe2_add(i_mnr_fe2_if.val, i_mnr_fe2_if.dat, a[0][add_cnt%2], add_cnt);
16,17: fe2_add(i_mnr_fe2_if.val, b[1][add_cnt%2], i_mnr_fe2_if.dat, add_cnt);
default: if (start==0) add_cnt <= 0;
endcase
// Sub input flow
@ -207,14 +209,12 @@ always_ff @ (posedge i_clk) begin
6,7: fe2_sub(i_sub_fe_if.val, i_sub_fe_if.dat, a[2][sub_cnt%2], sub_cnt);
8,9: fe2_sub(i_sub_fe_if.val, i_sub_fe_if.dat, a[0][sub_cnt%2], sub_cnt);
10,11: fe2_sub(add_cnt >= 18, b[0][sub_cnt%2], a[2][sub_cnt%2], sub_cnt);
default: if (start==0) sub_cnt <= 0;
endcase
// mnr flow
case (mnr_cnt) inside
0,1: fe2_mnr(i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 3, i_sub_fe_if.dat, mnr_cnt);
2,3: fe2_mnr(1, a[2][mnr_cnt%2], mnr_cnt);
default: if (start==0) mnr_cnt <= 0;
endcase
// Final output flow
@ -222,8 +222,6 @@ always_ff @ (posedge i_clk) begin
case (out_cnt) inside
0,1: begin
o_mul_fe6_if.dat <= i_add_fe_if.dat;
o_mul_fe6_if.ctl <= i_add_fe_if.ctl;
o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0;
o_mul_fe6_if.sop <= out_cnt == 0;
o_mul_fe6_if.eop <= 0;
if (i_add_fe_if.val && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 7) begin
@ -251,6 +249,10 @@ always_ff @ (posedge i_clk) begin
end
default: begin
out_cnt <= 0;
mnr_cnt <= 0;
mul_cnt <= 0;
add_cnt <= 0;
sub_cnt <= 0;
start <= 0;
end
endcase

View File

@ -26,7 +26,8 @@ module ec_fe12_arithmetic
parameter type FE6_TYPE,
parameter type FE12_TYPE,
parameter CTL_BITS = 12,
parameter OVR_WRT_BIT = 8 // From this bit 4 bits are used for internal control, 2 bits for resource sharing - 6 total
parameter OVR_WRT_BIT = 8 // From this bit 4 bits are used for internal control,
// 2 bits for resource sharing, 1 bit for square control, 1 bit for sparse mult by c0, c1, c4
)(
input i_clk, i_rst,
// Interface to FE6_TYPE multiplier (mod P)
@ -53,6 +54,8 @@ module ec_fe12_arithmetic
);
localparam NUM_OVR_WRT_BIT = 4;
localparam SQR_BIT = OVR_WRT_BIT + 6;
if_axi_stream #(.DAT_BITS($bits(FE6_TYPE)), .CTL_BITS(CTL_BITS)) add_if_fe6_i [1:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE6_TYPE)), .CTL_BITS(CTL_BITS)) add_if_fe6_o [1:0] (i_clk);
@ -187,6 +190,7 @@ always_ff @ (posedge i_clk) begin
eq_val <= 0;
eq_wait <= 0;
rdy_l <= 0;
end else begin
i_mul_fe6_if.rdy <= 1;
@ -260,7 +264,7 @@ always_ff @ (posedge i_clk) begin
// Issue new multiplies
if (~eq_wait[0] && i_mul_fe12_if.val && eq_val[2] && eq_val[3]) begin // 0. aa = mul(a[0], b[0])
fe6_multiply(0, i_mul_fe12_if.dat[0 +: $bits(FE6_TYPE)],
i_mul_fe12_if.dat[$bits(FE12_TYPE) +: $bits(FE6_TYPE)]);
i_mul_fe12_if.dat[$bits(FE12_TYPE) +: $bits(FE6_TYPE)]);
end else
if (~eq_wait[1] && i_mul_fe12_if.val) begin // 1. bb = mul(a[1], b[1])
fe6_multiply(1, i_mul_fe12_if.dat[$bits(FE6_TYPE) +: $bits(FE6_TYPE)],
@ -356,13 +360,11 @@ task fe6_addition(input int unsigned ctl, input FE6_TYPE a, b);
endtask
// Task for using mult
task fe6_multiply(input int unsigned ctl, input FE6_TYPE a, b, input logic [1:0] en = 2'b11);
task fe6_multiply(input int unsigned ctl, input FE6_TYPE a, b);
if (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)) begin
o_mul_fe6_if.val <= 1;
if (en[0])
o_mul_fe6_if.dat[0 +: $bits(FE6_TYPE)] <= a;
if (en[1])
o_mul_fe6_if.dat[$bits(FE6_TYPE) +: $bits(FE6_TYPE)] <= b;
o_mul_fe6_if.dat[0 +: $bits(FE6_TYPE)] <= a;
o_mul_fe6_if.dat[$bits(FE6_TYPE) +: $bits(FE6_TYPE)] <= b;
o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
@ -379,4 +381,5 @@ task fe6_mnr(input int unsigned ctl, input FE6_TYPE a, input logic en = 1'b1);
end
endtask
endmodule

View File

@ -0,0 +1,107 @@
/*
Multiplies by non-residue for Fp6 towering.
_s in the name represents the input is a stream starting at c0.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module fe6_mul_by_nonresidue_s
#(
parameter type FE_TYPE
)(
input i_clk, i_rst,
if_axi_stream.source o_mnr_fe6_if,
if_axi_stream.sink i_mnr_fe6_if ,
if_axi_stream.source o_mnr_fe2_if,
if_axi_stream.sink i_mnr_fe2_if
);
logic [2:0] mnr_cnt, out_cnt;
FE_TYPE [3:0] t;
always_comb begin
case (mnr_cnt) inside
0,1,2,3: i_mnr_fe6_if.rdy = 1;
4,5: i_mnr_fe6_if.rdy = ~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy);
default: i_mnr_fe6_if.rdy = 0;
endcase
i_mnr_fe2_if.rdy = (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy));
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mnr_fe2_if.reset_source();
o_mnr_fe6_if.reset_source();
mnr_cnt <= 0;
out_cnt <= 0;
t <= 0;
end else begin
if (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy) o_mnr_fe6_if.val <= 0;
if (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy) o_mnr_fe2_if.val <= 0;
case (mnr_cnt) inside
0,1,2,3: begin
if (i_mnr_fe6_if.val && i_mnr_fe6_if.rdy) begin
t <= {i_mnr_fe6_if.dat, t[3:1]};
mnr_cnt <= mnr_cnt + 1;
end
end
4,5: begin
if (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy)) begin
o_mnr_fe2_if.val <= i_mnr_fe6_if.val;
o_mnr_fe2_if.sop <= mnr_cnt == 4;
o_mnr_fe2_if.eop <= mnr_cnt == 5;
o_mnr_fe2_if.dat <= i_mnr_fe6_if.dat;
if (i_mnr_fe6_if.val) begin
mnr_cnt <= mnr_cnt + 1;
end
end
end
endcase
case (out_cnt) inside
0,1: begin
if (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy)) begin
o_mnr_fe6_if.val <= i_mnr_fe2_if.val;
o_mnr_fe6_if.sop <= out_cnt == 0;
o_mnr_fe6_if.eop <= 0;
o_mnr_fe6_if.dat <= i_mnr_fe2_if.dat;
if (i_mnr_fe2_if.val) begin
out_cnt <= out_cnt + 1;
end
end
end
2,3,4,5: begin
if (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy)) begin
o_mnr_fe6_if.val <= 1;
o_mnr_fe6_if.sop <= 0;
o_mnr_fe6_if.eop <= out_cnt == 5;
o_mnr_fe6_if.dat <= t[out_cnt-2];
out_cnt <= out_cnt + 1;
end
end
default: begin
mnr_cnt <= 0;
out_cnt <= 0;
end
endcase
end
end
endmodule

View File

@ -0,0 +1,301 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module ec_fe12_mul_s_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
parameter type FE_TYPE = bls12_381_pkg::fe_t;
parameter type FE2_TYPE = bls12_381_pkg::fe2_t;
parameter type FE6_TYPE = bls12_381_pkg::fe6_t;
parameter type FE12_TYPE = bls12_381_pkg::fe12_t;
parameter P = bls12_381_pkg::P;
localparam CTL_BITS = 32;
localparam CLK_PERIOD = 100;
logic clk, rst;
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #(CLK_PERIOD/2) clk = ~clk;
end
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if [4:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if [4:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [4:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [4:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe6_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe6_i_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_o_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_i_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_i_if (clk);
if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) o_mul_fe12_if (clk);
if_axi_stream #(.DAT_BYTS((2*$bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) i_mul_fe12_if (clk);
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( CTL_BITS )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_mul ( mul_fe_o_if ),
.o_mul ( mul_fe_i_if )
);
adder_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_add ( add_fe_o_if[4] ),
.o_add ( add_fe_i_if[4] )
);
subtractor_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_sub ( sub_fe_o_if[4] ),
.o_sub ( sub_fe_i_if[4] )
);
ec_fe2_mul_s #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS )
)
ec_fe2_mul_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe2_if ( mul_fe2_i_if ),
.i_mul_fe2_if ( mul_fe2_o_if ),
.o_add_fe_if ( add_fe_o_if[0] ),
.i_add_fe_if ( add_fe_i_if[0] ),
.o_sub_fe_if ( sub_fe_o_if[0] ),
.i_sub_fe_if ( sub_fe_i_if[0] ),
.o_mul_fe_if ( mul_fe_o_if ),
.i_mul_fe_if ( mul_fe_i_if )
);
fe2_mul_by_nonresidue_s #(
.FE_TYPE ( FE_TYPE )
)
fe2_mul_by_nonresidue_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mnr_fe2_if ( mnr_fe2_i_if[2] ),
.i_mnr_fe2_if ( mnr_fe2_o_if[2] ),
.o_add_fe_if ( add_fe_o_if[1] ),
.i_add_fe_if ( add_fe_i_if[1] ),
.o_sub_fe_if ( sub_fe_o_if[1] ),
.i_sub_fe_if ( sub_fe_i_if[1] )
);
ec_fe6_mul_s #(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.FE6_TYPE ( FE6_TYPE ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 0 )
)
ec_fe6_mul_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe2_if ( mul_fe2_o_if ),
.i_mul_fe2_if ( mul_fe2_i_if ),
.o_add_fe_if ( add_fe_o_if[2] ),
.i_add_fe_if ( add_fe_i_if[2] ),
.o_sub_fe_if ( sub_fe_o_if[2] ),
.i_sub_fe_if ( sub_fe_i_if[2] ),
.o_mnr_fe2_if ( mnr_fe2_o_if[0] ),
.i_mnr_fe2_if ( mnr_fe2_i_if[0] ),
.o_mul_fe6_if ( mul_fe6_i_if ),
.i_mul_fe6_if ( mul_fe6_o_if )
);
fe6_mul_by_nonresidue_s #(
.FE_TYPE ( FE_TYPE )
)
fe6_mul_by_nonresidue_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mnr_fe2_if ( mnr_fe2_o_if[1] ),
.i_mnr_fe2_if ( mnr_fe2_i_if[1] ),
.o_mnr_fe6_if ( mnr_fe6_i_if ),
.i_mnr_fe6_if ( mnr_fe6_o_if )
);
ec_fe12_mul_s #(
.FE_TYPE ( FE_TYPE ),
.OVR_WRT_BIT ( 16 )
)
ec_fe12_mul_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe6_if ( mul_fe6_o_if ),
.i_mul_fe6_if ( mul_fe6_i_if ),
.o_add_fe_if ( add_fe_o_if[3] ),
.i_add_fe_if ( add_fe_i_if[3] ),
.o_sub_fe_if ( sub_fe_o_if[3] ),
.i_sub_fe_if ( sub_fe_i_if[3] ),
.o_mnr_fe6_if ( mnr_fe6_o_if ),
.i_mnr_fe6_if ( mnr_fe6_i_if ),
.o_mul_fe12_if ( o_mul_fe12_if ),
.i_mul_fe12_if ( i_mul_fe12_if )
);
resource_share # (
.NUM_IN ( 4 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 3 ),
.PIPELINE_OUT ( 2 )
)
resource_share_fe_add (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( add_fe_o_if[3:0] ),
.o_res ( add_fe_o_if[4] ),
.i_res ( add_fe_i_if[4] ),
.o_axi ( add_fe_i_if[3:0] )
);
resource_share # (
.NUM_IN ( 4 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 2 ),
.PIPELINE_OUT ( 4 )
)
resource_share_fe_sub (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( sub_fe_o_if[3:0] ),
.o_res ( sub_fe_o_if[4] ),
.i_res ( sub_fe_i_if[4] ),
.o_axi ( sub_fe_i_if[3:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 2 ),
.PIPELINE_OUT ( 2 )
)
resource_share_fe2_mnr (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( mnr_fe2_o_if[1:0] ),
.o_res ( mnr_fe2_o_if[2] ),
.i_res ( mnr_fe2_i_if[2] ),
.o_axi ( mnr_fe2_i_if[1:0] )
);
task test();
fe12_t a, b, f_exp, f_exp2, f_out;
integer signed get_len;
integer start_time, finish_time;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat, dat_in;
$display("Running test ...");
for (int lp = 0; lp < 10; lp++) begin
$display("Loop %d", lp);
dat_in = 0;
for (int i = 0; i < 2; i++)
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++) begin
a[i][j][k] = random_vector(384/8) % P;
b[i][j][k] = random_vector(384/8) % P;
dat_in[(i*6+j*2+k)*768 +: 2*$bits(FE_TYPE)] = {b[i][j][k], a[i][j][k]};
end
f_exp = fe12_mul(a, b);
start_time = $time;
fork
i_mul_fe12_if.put_stream(dat_in, 12*768/8);
o_mul_fe12_if.get_stream(get_dat, get_len);
join
finish_time = $time;
for (int i = 0; i < 2; i++)
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++)
f_out[i][j][k] = get_dat[(i*6+j*2+k)*384 +: $bits(FE_TYPE)];
if (f_exp != f_out) begin
$display("Input a was:");
print_fe12(a);
$display("Input b was:");
print_fe12(b);
$display("Output was:");
print_fe12(f_out);
$display("Output Expected:");
print_fe12(f_exp);
$fatal(1, "%m %t ERROR: output was wrong", $time);
end
$display("test PASSED in %d clocks", (finish_time-start_time)/CLK_PERIOD);
end
endtask
initial begin
i_mul_fe12_if.reset_source();
o_mul_fe12_if.rdy = 0;
#10ns;
test();
#50ns $finish();
end
endmodule

View File

@ -16,7 +16,7 @@
*/
`timescale 1ps/1ps
module ec_fe6_mul_tb ();
module ec_fe6_mul_s_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
@ -200,20 +200,20 @@ task test();
b[j][k] = random_vector(384/8) % P;
dat_in[(j*2+k)*768 +: 2*$bits(FE_TYPE)] = {b[j][k], a[j][k]};
end
f_exp = fe6_mul(a, b);
start_time = $time;
fork
i_mul_fe6_if.put_stream(dat_in, 6*768/8);
o_mul_fe6_if.get_stream(get_dat, get_len);
join
finish_time = $time;
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++)
f_out[j][k] = get_dat[(j*2+k)*384 +: $bits(FE_TYPE)];
if (f_exp != f_out) begin
$display("Input a was:");
print_fe6(a);
@ -225,7 +225,7 @@ task test();
print_fe6(f_exp);
$fatal(1, "%m %t ERROR: output was wrong", $time);
end
$display("test PASSED in %d clocks", (finish_time-start_time)/CLK_PERIOD);
end

View File

@ -71,7 +71,7 @@ end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
forever #(CLK_PERIOD/2) clk = ~clk;
end
ec_fe2_arithmetic #(