diff --git a/ip_cores/ec/src/rtl/ec_fe12_mul_s.sv b/ip_cores/ec/src/rtl/ec_fe12_mul_s.sv new file mode 100644 index 0000000..ae55b2f --- /dev/null +++ b/ip_cores/ec/src/rtl/ec_fe12_mul_s.sv @@ -0,0 +1,261 @@ +/* + This provides the interface to perform Fp12 field element mul. Using karabusta algorithm. + + Inputs must be interleaved starting at c0 (i.e. clock 0 = {b.c0, a.c0}) + _s in the name represents the input is a stream starting at c0. + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module ec_fe12_mul_s +#( + parameter type FE_TYPE, // Base field element type\ + parameter CTL_BITS = 12, + parameter OVR_WRT_BIT = 0 +)( + input i_clk, i_rst, + // Interface to FE6_TYPE multiplier (mod P), 2*FE_TYPE data width + if_axi_stream.source o_mul_fe6_if, + if_axi_stream.sink i_mul_fe6_if, + // Interface to FE_TYPE adder (mod P), 2*FE_TYPE data width + if_axi_stream.source o_add_fe_if, + if_axi_stream.sink i_add_fe_if, + // Interface to FE_TYPE subtractor (mod P), 2*FE_TYPE data width + if_axi_stream.source o_sub_fe_if, + if_axi_stream.sink i_sub_fe_if, + // Interface to FE6_TYPE multiply by non-residue, FE_TYPE data width + if_axi_stream.source o_mnr_fe6_if, + if_axi_stream.sink i_mnr_fe6_if, + // Interface to FE12_TYPE multiplier (mod P), 2*FE_TYPE data width + if_axi_stream.source o_mul_fe12_if, + if_axi_stream.sink i_mul_fe12_if +); + +localparam CNT_BITS = 5; +localparam NUM_OVR_WRT = $clog2((1 << CNT_BITS)/2); // Only need half the bits for control + +// Multiplications are calculated using the formula in bls12_381.pkg::fe6_mul() +// Need storage to latch input stream, also used for temp storage +FE_TYPE [5:0] a0, a1, b0, b1, aa; +logic [CNT_BITS-1:0] add_cnt, sub_cnt, mul_cnt, mnr_cnt, out_cnt; +logic start, bb_val, b0_val; + +always_comb begin + i_mul_fe12_if.rdy = ~start && (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)); + + case (i_mnr_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside + 0: i_mnr_fe6_if.rdy = (add_cnt >= 12) && (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)); + default: i_mnr_fe6_if.rdy = 0; + endcase + + case (i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside + 0: i_add_fe_if.rdy = 1; + 1: i_add_fe_if.rdy = 1; + 2: i_add_fe_if.rdy = (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)); + default: i_add_fe_if.rdy = 0; + endcase + + case (i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside + 0: i_sub_fe_if.rdy = (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)); + 1: i_sub_fe_if.rdy = (out_cnt >= 6) && (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)); + default: i_sub_fe_if.rdy = 0; + endcase + + case (i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside + 0: i_mul_fe6_if.rdy = 1; + 1: i_mul_fe6_if.rdy = add_cnt >= 12; + 2: i_mul_fe6_if.rdy = (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)); + default: i_mul_fe6_if.rdy = 0; + endcase + +end + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + o_mul_fe12_if.reset_source(); + o_mnr_fe6_if.reset_source(); + o_mul_fe6_if.reset_source(); + o_sub_fe_if.reset_source(); + o_add_fe_if.reset_source(); + + add_cnt <= 0; + sub_cnt <= 0; + mul_cnt <= 0; + mnr_cnt <= 0; + out_cnt <= 0; + + {a1, a0} <= 0; + {b1, b0} <= 0; + + start <= 0; + bb_val <= 0; + b0_val <= 0; + end else begin + + if (o_mul_fe6_if.rdy) o_mul_fe6_if.val <= 0; + if (o_mul_fe12_if.rdy) o_mul_fe12_if.val <= 0; + if (o_sub_fe_if.rdy) o_sub_fe_if.val <= 0; + if (o_add_fe_if.rdy) o_add_fe_if.val <= 0; + if (o_mnr_fe6_if.rdy) o_mnr_fe6_if.val <= 0; + + if (i_mul_fe12_if.val && i_mul_fe12_if.rdy) begin + // Latch input + {a1, a0} <= {i_mul_fe12_if.dat[0 +: $bits(FE_TYPE)], a1, a0[5:1]}; + {b1, b0} <= {i_mul_fe12_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], b1, b0[5:1]}; + end + + // Latch multiplier results of aa, bb + if (i_mul_fe6_if.val && i_mul_fe6_if.rdy && add_cnt >= 12 && i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 0) begin + aa <= {i_mul_fe6_if.dat, aa[5:1]}; + end + + if (i_mul_fe6_if.val && i_mul_fe6_if.rdy && add_cnt >= 12 && i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin + b1 <= {i_mul_fe6_if.dat, b1[5:1]}; + if (i_mul_fe6_if.eop) bb_val <= 1; + end + + if (i_add_fe_if.val && i_add_fe_if.rdy && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 0) begin + a0 <= {i_add_fe_if.dat, a0[5:1]}; + end + + if (i_add_fe_if.val && i_add_fe_if.rdy && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin + b0 <= {i_add_fe_if.dat, b0[5:1]}; + b0_val <= 1; + end + + if (i_mul_fe12_if.rdy && i_mul_fe12_if.val) begin + if(i_mul_fe12_if.eop) start <= 1; + if(i_mul_fe12_if.sop) o_mul_fe12_if.ctl <= i_mul_fe12_if.ctl; + end + + // Multiplier input flow + case (mul_cnt) inside + 0,1,2,3,4,5: fe6_mul(i_mul_fe12_if.val, i_mul_fe12_if.dat[0 +: $bits(FE_TYPE)], i_mul_fe12_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], mul_cnt); + 6,7,8,9,10,11: fe6_mul(i_mul_fe12_if.val, i_mul_fe12_if.dat[0 +: $bits(FE_TYPE)], i_mul_fe12_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], mul_cnt); + 12,13,14,15,16,17: fe6_mul(b0_val, b0[mul_cnt%6], a0[mul_cnt%6], mul_cnt); + endcase + + // Adder input flow + case (add_cnt) inside + 0,1,2,3,4,5: fe6_add(start, a1[add_cnt%6], a0[add_cnt%6], add_cnt); + 6,7,8,9,10,11: fe6_add(start, b0[add_cnt%6], b1[add_cnt%6], add_cnt); + 12,13,14,15,16,17: fe6_add(i_mnr_fe6_if.val, aa[add_cnt%6], i_mnr_fe6_if.dat, add_cnt); + endcase + + // Sub input flow + case (sub_cnt) inside + 0,1,2,3,4,5: fe6_sub(i_mul_fe6_if.val && i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 2, i_mul_fe6_if.dat, aa[sub_cnt%6], sub_cnt); + 6,7,8,9,10,11: fe6_sub(i_sub_fe_if.val, i_sub_fe_if.dat, b1[sub_cnt%6], sub_cnt); + endcase + + // mnr flow + case (mnr_cnt) inside + 0,1,2,3,4,5: fe6_mnr(bb_val, b1[mnr_cnt%6], mnr_cnt); + endcase + + // Final output flow + if (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)) begin + case (out_cnt) inside + 0,1,2,3,4,5: begin + o_mul_fe12_if.dat <= i_add_fe_if.dat; + o_mul_fe12_if.sop <= out_cnt == 0; + o_mul_fe12_if.eop <= 0; + if (i_add_fe_if.val && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 2) begin + o_mul_fe12_if.val <= 1; + out_cnt <= out_cnt + 1; + end + end + 6,7,8,9,10,11: begin + o_mul_fe12_if.dat <= i_sub_fe_if.dat; + o_mul_fe12_if.sop <= 0; + o_mul_fe12_if.eop <= out_cnt == 11; + if (i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin + o_mul_fe12_if.val <= 1; + out_cnt <= out_cnt + 1; + end + end + default: begin + out_cnt <= 0; + bb_val <= 0; + b0_val <= 0; + mnr_cnt <= 0; + mul_cnt <= 0; + add_cnt <= 0; + sub_cnt <= 0; + start <= 0; + end + endcase + end + + + end +end + +// Task for fe6_mul +task automatic fe6_mul(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt); + if (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)) begin + o_mul_fe6_if.sop <= cnt == 0 || cnt == 6 || cnt == 12; + o_mul_fe6_if.eop <= cnt == 5 || cnt == 11 || cnt == 17; + o_mul_fe6_if.dat <= {b, a}; + o_mul_fe6_if.val <= val; + if (cnt == 0) o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0; + if (cnt == 6) o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 1; + if (cnt == 12) o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 2; + if (val) cnt = cnt + 1; + end +endtask + +// Task for fe6_add +task automatic fe6_add(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt); + if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin + o_add_fe_if.sop <= cnt == 0 || cnt == 6 || cnt == 12; + o_add_fe_if.eop <= cnt == 5 || cnt == 11 || cnt == 17; + o_add_fe_if.dat <= {b, a}; + o_add_fe_if.val <= val; + if (cnt == 0) o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0; + if (cnt == 6) o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 1; + if (cnt == 12) o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 2; + if (val) cnt = cnt + 1; + end +endtask + +// Task for fe6_sub +task automatic fe6_sub(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b, ref [CNT_BITS-1:0] cnt); + if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin + o_sub_fe_if.sop <= cnt == 0 || cnt == 6; + o_sub_fe_if.eop <= cnt == 5 || cnt == 11; + o_sub_fe_if.dat <= {b, a}; + o_sub_fe_if.val <= val; + if (cnt == 0) o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0; + if (cnt == 6) o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 1; + if (val) cnt = cnt + 1; + end +endtask + +// Task for fe6_mnr +task automatic fe6_mnr(input logic val, input logic [$bits(FE_TYPE)-1:0] a, ref [CNT_BITS-1:0] cnt); + if (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy)) begin + o_mnr_fe6_if.sop <= cnt == 0; + o_mnr_fe6_if.eop <= cnt == 5; + o_mnr_fe6_if.dat <= a; + o_mnr_fe6_if.val <= val; + o_mnr_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0; + if (val) cnt = cnt + 1; + end +endtask + + +endmodule \ No newline at end of file diff --git a/ip_cores/ec/src/rtl/ec_fe6_mul_s.sv b/ip_cores/ec/src/rtl/ec_fe6_mul_s.sv index 6afccb6..a810594 100644 --- a/ip_cores/ec/src/rtl/ec_fe6_mul_s.sv +++ b/ip_cores/ec/src/rtl/ec_fe6_mul_s.sv @@ -136,8 +136,12 @@ always_ff @ (posedge i_clk) begin a <= {i_mul_fe2_if.dat, {a[2], a[1], a[0][1]}}; end - if (i_mul_fe6_if.rdy && i_mul_fe6_if.eop && i_mul_fe6_if.val) - start <= 1; + if (i_mul_fe6_if.rdy && i_mul_fe6_if.val) begin + if(i_mul_fe6_if.eop) start <= 1; + if(i_mul_fe6_if.sop) o_mul_fe6_if.ctl <= i_mul_fe6_if.ctl; + end + + // Multiplier input flow case (mul_cnt) inside @@ -182,7 +186,6 @@ always_ff @ (posedge i_clk) begin mul_cnt <= mul_cnt + 1; end end - default: if (start==0) mul_cnt <= 0; endcase // Adder input flow @@ -196,7 +199,6 @@ always_ff @ (posedge i_clk) begin 12,13: fe2_add(i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 2 , i_sub_fe_if.dat, a[1][add_cnt%2], add_cnt); 14,15: fe2_add(i_mnr_fe2_if.val, i_mnr_fe2_if.dat, a[0][add_cnt%2], add_cnt); 16,17: fe2_add(i_mnr_fe2_if.val, b[1][add_cnt%2], i_mnr_fe2_if.dat, add_cnt); - default: if (start==0) add_cnt <= 0; endcase // Sub input flow @@ -207,14 +209,12 @@ always_ff @ (posedge i_clk) begin 6,7: fe2_sub(i_sub_fe_if.val, i_sub_fe_if.dat, a[2][sub_cnt%2], sub_cnt); 8,9: fe2_sub(i_sub_fe_if.val, i_sub_fe_if.dat, a[0][sub_cnt%2], sub_cnt); 10,11: fe2_sub(add_cnt >= 18, b[0][sub_cnt%2], a[2][sub_cnt%2], sub_cnt); - default: if (start==0) sub_cnt <= 0; endcase // mnr flow case (mnr_cnt) inside 0,1: fe2_mnr(i_sub_fe_if.val && i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 3, i_sub_fe_if.dat, mnr_cnt); 2,3: fe2_mnr(1, a[2][mnr_cnt%2], mnr_cnt); - default: if (start==0) mnr_cnt <= 0; endcase // Final output flow @@ -222,8 +222,6 @@ always_ff @ (posedge i_clk) begin case (out_cnt) inside 0,1: begin o_mul_fe6_if.dat <= i_add_fe_if.dat; - o_mul_fe6_if.ctl <= i_add_fe_if.ctl; - o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= 0; o_mul_fe6_if.sop <= out_cnt == 0; o_mul_fe6_if.eop <= 0; if (i_add_fe_if.val && i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 7) begin @@ -251,6 +249,10 @@ always_ff @ (posedge i_clk) begin end default: begin out_cnt <= 0; + mnr_cnt <= 0; + mul_cnt <= 0; + add_cnt <= 0; + sub_cnt <= 0; start <= 0; end endcase diff --git a/ip_cores/ec/src/rtl/ec_fp12_arithmetic.sv b/ip_cores/ec/src/rtl/ec_fp12_arithmetic.sv index e54a1cb..1dfb879 100644 --- a/ip_cores/ec/src/rtl/ec_fp12_arithmetic.sv +++ b/ip_cores/ec/src/rtl/ec_fp12_arithmetic.sv @@ -26,7 +26,8 @@ module ec_fe12_arithmetic parameter type FE6_TYPE, parameter type FE12_TYPE, parameter CTL_BITS = 12, - parameter OVR_WRT_BIT = 8 // From this bit 4 bits are used for internal control, 2 bits for resource sharing - 6 total + parameter OVR_WRT_BIT = 8 // From this bit 4 bits are used for internal control, + // 2 bits for resource sharing, 1 bit for square control, 1 bit for sparse mult by c0, c1, c4 )( input i_clk, i_rst, // Interface to FE6_TYPE multiplier (mod P) @@ -53,6 +54,8 @@ module ec_fe12_arithmetic ); localparam NUM_OVR_WRT_BIT = 4; +localparam SQR_BIT = OVR_WRT_BIT + 6; + if_axi_stream #(.DAT_BITS($bits(FE6_TYPE)), .CTL_BITS(CTL_BITS)) add_if_fe6_i [1:0] (i_clk); if_axi_stream #(.DAT_BITS(2*$bits(FE6_TYPE)), .CTL_BITS(CTL_BITS)) add_if_fe6_o [1:0] (i_clk); @@ -187,6 +190,7 @@ always_ff @ (posedge i_clk) begin eq_val <= 0; eq_wait <= 0; rdy_l <= 0; + end else begin i_mul_fe6_if.rdy <= 1; @@ -260,7 +264,7 @@ always_ff @ (posedge i_clk) begin // Issue new multiplies if (~eq_wait[0] && i_mul_fe12_if.val && eq_val[2] && eq_val[3]) begin // 0. aa = mul(a[0], b[0]) fe6_multiply(0, i_mul_fe12_if.dat[0 +: $bits(FE6_TYPE)], - i_mul_fe12_if.dat[$bits(FE12_TYPE) +: $bits(FE6_TYPE)]); + i_mul_fe12_if.dat[$bits(FE12_TYPE) +: $bits(FE6_TYPE)]); end else if (~eq_wait[1] && i_mul_fe12_if.val) begin // 1. bb = mul(a[1], b[1]) fe6_multiply(1, i_mul_fe12_if.dat[$bits(FE6_TYPE) +: $bits(FE6_TYPE)], @@ -356,13 +360,11 @@ task fe6_addition(input int unsigned ctl, input FE6_TYPE a, b); endtask // Task for using mult -task fe6_multiply(input int unsigned ctl, input FE6_TYPE a, b, input logic [1:0] en = 2'b11); +task fe6_multiply(input int unsigned ctl, input FE6_TYPE a, b); if (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)) begin o_mul_fe6_if.val <= 1; - if (en[0]) - o_mul_fe6_if.dat[0 +: $bits(FE6_TYPE)] <= a; - if (en[1]) - o_mul_fe6_if.dat[$bits(FE6_TYPE) +: $bits(FE6_TYPE)] <= b; + o_mul_fe6_if.dat[0 +: $bits(FE6_TYPE)] <= a; + o_mul_fe6_if.dat[$bits(FE6_TYPE) +: $bits(FE6_TYPE)] <= b; o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl; eq_wait[ctl] <= 1; end @@ -379,4 +381,5 @@ task fe6_mnr(input int unsigned ctl, input FE6_TYPE a, input logic en = 1'b1); end endtask + endmodule \ No newline at end of file diff --git a/ip_cores/ec/src/rtl/fe6_mul_by_nonresidue_s.sv b/ip_cores/ec/src/rtl/fe6_mul_by_nonresidue_s.sv new file mode 100644 index 0000000..3914737 --- /dev/null +++ b/ip_cores/ec/src/rtl/fe6_mul_by_nonresidue_s.sv @@ -0,0 +1,107 @@ +/* + Multiplies by non-residue for Fp6 towering. + _s in the name represents the input is a stream starting at c0. + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module fe6_mul_by_nonresidue_s +#( + parameter type FE_TYPE +)( + input i_clk, i_rst, + if_axi_stream.source o_mnr_fe6_if, + if_axi_stream.sink i_mnr_fe6_if , + if_axi_stream.source o_mnr_fe2_if, + if_axi_stream.sink i_mnr_fe2_if +); + +logic [2:0] mnr_cnt, out_cnt; + +FE_TYPE [3:0] t; +always_comb begin + case (mnr_cnt) inside + 0,1,2,3: i_mnr_fe6_if.rdy = 1; + 4,5: i_mnr_fe6_if.rdy = ~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy); + default: i_mnr_fe6_if.rdy = 0; + endcase + + i_mnr_fe2_if.rdy = (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy)); +end + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + o_mnr_fe2_if.reset_source(); + o_mnr_fe6_if.reset_source(); + mnr_cnt <= 0; + out_cnt <= 0; + t <= 0; + end else begin + + if (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy) o_mnr_fe6_if.val <= 0; + if (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy) o_mnr_fe2_if.val <= 0; + + case (mnr_cnt) inside + 0,1,2,3: begin + if (i_mnr_fe6_if.val && i_mnr_fe6_if.rdy) begin + t <= {i_mnr_fe6_if.dat, t[3:1]}; + mnr_cnt <= mnr_cnt + 1; + end + end + 4,5: begin + if (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy)) begin + o_mnr_fe2_if.val <= i_mnr_fe6_if.val; + o_mnr_fe2_if.sop <= mnr_cnt == 4; + o_mnr_fe2_if.eop <= mnr_cnt == 5; + o_mnr_fe2_if.dat <= i_mnr_fe6_if.dat; + if (i_mnr_fe6_if.val) begin + mnr_cnt <= mnr_cnt + 1; + end + end + end + endcase + + + case (out_cnt) inside + 0,1: begin + if (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy)) begin + o_mnr_fe6_if.val <= i_mnr_fe2_if.val; + o_mnr_fe6_if.sop <= out_cnt == 0; + o_mnr_fe6_if.eop <= 0; + o_mnr_fe6_if.dat <= i_mnr_fe2_if.dat; + if (i_mnr_fe2_if.val) begin + out_cnt <= out_cnt + 1; + end + end + end + 2,3,4,5: begin + if (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy)) begin + o_mnr_fe6_if.val <= 1; + o_mnr_fe6_if.sop <= 0; + o_mnr_fe6_if.eop <= out_cnt == 5; + o_mnr_fe6_if.dat <= t[out_cnt-2]; + out_cnt <= out_cnt + 1; + end + end + default: begin + mnr_cnt <= 0; + out_cnt <= 0; + end + endcase + + end +end +endmodule \ No newline at end of file diff --git a/ip_cores/ec/src/tb/ec_fe12_mul_s_tb.sv b/ip_cores/ec/src/tb/ec_fe12_mul_s_tb.sv new file mode 100644 index 0000000..06bff0c --- /dev/null +++ b/ip_cores/ec/src/tb/ec_fe12_mul_s_tb.sv @@ -0,0 +1,301 @@ +/* + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +`timescale 1ps/1ps + +module ec_fe12_mul_s_tb (); + +import common_pkg::*; +import bls12_381_pkg::*; + +parameter type FE_TYPE = bls12_381_pkg::fe_t; +parameter type FE2_TYPE = bls12_381_pkg::fe2_t; +parameter type FE6_TYPE = bls12_381_pkg::fe6_t; +parameter type FE12_TYPE = bls12_381_pkg::fe12_t; +parameter P = bls12_381_pkg::P; + + +localparam CTL_BITS = 32; + +localparam CLK_PERIOD = 100; + +logic clk, rst; + +initial begin + rst = 0; + repeat(2) #(20*CLK_PERIOD) rst = ~rst; +end + +initial begin + clk = 0; + forever #(CLK_PERIOD/2) clk = ~clk; +end + +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if (clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if [4:0] (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if [4:0] (clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [4:0] (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [4:0] (clk); + +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if (clk); + +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe6_o_if (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe6_i_if (clk); + +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_o_if [2:0] (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_i_if [2:0] (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_o_if (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_i_if (clk); + +if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) o_mul_fe12_if (clk); +if_axi_stream #(.DAT_BYTS((2*$bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) i_mul_fe12_if (clk); + + +ec_fp_mult_mod #( + .P ( P ), + .KARATSUBA_LVL ( 3 ), + .CTL_BITS ( CTL_BITS ) +) +ec_fp_mult_mod ( + .i_clk( clk ), + .i_rst( rst ), + .i_mul ( mul_fe_o_if ), + .o_mul ( mul_fe_i_if ) +); + +adder_pipe # ( + .BITS ( bls12_381_pkg::DAT_BITS ), + .P ( P ), + .CTL_BITS ( CTL_BITS ), + .LEVEL ( 2 ) +) +adder_pipe ( + .i_clk ( clk ), + .i_rst ( rst ), + .i_add ( add_fe_o_if[4] ), + .o_add ( add_fe_i_if[4] ) +); + +subtractor_pipe # ( + .BITS ( bls12_381_pkg::DAT_BITS ), + .P ( P ), + .CTL_BITS ( CTL_BITS ), + .LEVEL ( 2 ) +) +subtractor_pipe ( + .i_clk ( clk ), + .i_rst ( rst ), + .i_sub ( sub_fe_o_if[4] ), + .o_sub ( sub_fe_i_if[4] ) +); + +ec_fe2_mul_s #( + .FE_TYPE ( FE_TYPE ), + .CTL_BITS ( CTL_BITS ) +) +ec_fe2_mul_s ( + .i_clk ( clk ), + .i_rst ( rst ), + .o_mul_fe2_if ( mul_fe2_i_if ), + .i_mul_fe2_if ( mul_fe2_o_if ), + .o_add_fe_if ( add_fe_o_if[0] ), + .i_add_fe_if ( add_fe_i_if[0] ), + .o_sub_fe_if ( sub_fe_o_if[0] ), + .i_sub_fe_if ( sub_fe_i_if[0] ), + .o_mul_fe_if ( mul_fe_o_if ), + .i_mul_fe_if ( mul_fe_i_if ) +); + +fe2_mul_by_nonresidue_s #( + .FE_TYPE ( FE_TYPE ) +) +fe2_mul_by_nonresidue_s ( + .i_clk ( clk ), + .i_rst ( rst ), + .o_mnr_fe2_if ( mnr_fe2_i_if[2] ), + .i_mnr_fe2_if ( mnr_fe2_o_if[2] ), + .o_add_fe_if ( add_fe_o_if[1] ), + .i_add_fe_if ( add_fe_i_if[1] ), + .o_sub_fe_if ( sub_fe_o_if[1] ), + .i_sub_fe_if ( sub_fe_i_if[1] ) +); + +ec_fe6_mul_s #( + .FE_TYPE ( FE_TYPE ), + .FE2_TYPE ( FE2_TYPE ), + .FE6_TYPE ( FE6_TYPE ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( 0 ) +) +ec_fe6_mul_s ( + .i_clk ( clk ), + .i_rst ( rst ), + .o_mul_fe2_if ( mul_fe2_o_if ), + .i_mul_fe2_if ( mul_fe2_i_if ), + .o_add_fe_if ( add_fe_o_if[2] ), + .i_add_fe_if ( add_fe_i_if[2] ), + .o_sub_fe_if ( sub_fe_o_if[2] ), + .i_sub_fe_if ( sub_fe_i_if[2] ), + .o_mnr_fe2_if ( mnr_fe2_o_if[0] ), + .i_mnr_fe2_if ( mnr_fe2_i_if[0] ), + .o_mul_fe6_if ( mul_fe6_i_if ), + .i_mul_fe6_if ( mul_fe6_o_if ) +); + +fe6_mul_by_nonresidue_s #( + .FE_TYPE ( FE_TYPE ) +) +fe6_mul_by_nonresidue_s ( + .i_clk ( clk ), + .i_rst ( rst ), + .o_mnr_fe2_if ( mnr_fe2_o_if[1] ), + .i_mnr_fe2_if ( mnr_fe2_i_if[1] ), + .o_mnr_fe6_if ( mnr_fe6_i_if ), + .i_mnr_fe6_if ( mnr_fe6_o_if ) +); + +ec_fe12_mul_s #( + .FE_TYPE ( FE_TYPE ), + .OVR_WRT_BIT ( 16 ) +) +ec_fe12_mul_s ( + .i_clk ( clk ), + .i_rst ( rst ), + .o_mul_fe6_if ( mul_fe6_o_if ), + .i_mul_fe6_if ( mul_fe6_i_if ), + .o_add_fe_if ( add_fe_o_if[3] ), + .i_add_fe_if ( add_fe_i_if[3] ), + .o_sub_fe_if ( sub_fe_o_if[3] ), + .i_sub_fe_if ( sub_fe_i_if[3] ), + .o_mnr_fe6_if ( mnr_fe6_o_if ), + .i_mnr_fe6_if ( mnr_fe6_i_if ), + .o_mul_fe12_if ( o_mul_fe12_if ), + .i_mul_fe12_if ( i_mul_fe12_if ) +); + +resource_share # ( + .NUM_IN ( 4 ), + .DAT_BITS ( 2*$bits(FE_TYPE) ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( 8 ), + .PIPELINE_IN ( 3 ), + .PIPELINE_OUT ( 2 ) +) +resource_share_fe_add ( + .i_clk ( clk ), + .i_rst ( rst ), + .i_axi ( add_fe_o_if[3:0] ), + .o_res ( add_fe_o_if[4] ), + .i_res ( add_fe_i_if[4] ), + .o_axi ( add_fe_i_if[3:0] ) +); + +resource_share # ( + .NUM_IN ( 4 ), + .DAT_BITS ( 2*$bits(FE_TYPE) ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( 8 ), + .PIPELINE_IN ( 2 ), + .PIPELINE_OUT ( 4 ) +) +resource_share_fe_sub ( + .i_clk ( clk ), + .i_rst ( rst ), + .i_axi ( sub_fe_o_if[3:0] ), + .o_res ( sub_fe_o_if[4] ), + .i_res ( sub_fe_i_if[4] ), + .o_axi ( sub_fe_i_if[3:0] ) +); + +resource_share # ( + .NUM_IN ( 2 ), + .DAT_BITS ( 2*$bits(FE_TYPE) ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( 12 ), + .PIPELINE_IN ( 2 ), + .PIPELINE_OUT ( 2 ) +) +resource_share_fe2_mnr ( + .i_clk ( clk ), + .i_rst ( rst ), + .i_axi ( mnr_fe2_o_if[1:0] ), + .o_res ( mnr_fe2_o_if[2] ), + .i_res ( mnr_fe2_i_if[2] ), + .o_axi ( mnr_fe2_i_if[1:0] ) +); + +task test(); + fe12_t a, b, f_exp, f_exp2, f_out; + integer signed get_len; + integer start_time, finish_time; + logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat, dat_in; + + $display("Running test ..."); + for (int lp = 0; lp < 10; lp++) begin + $display("Loop %d", lp); + dat_in = 0; + for (int i = 0; i < 2; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) begin + a[i][j][k] = random_vector(384/8) % P; + b[i][j][k] = random_vector(384/8) % P; + dat_in[(i*6+j*2+k)*768 +: 2*$bits(FE_TYPE)] = {b[i][j][k], a[i][j][k]}; + end + + f_exp = fe12_mul(a, b); + + start_time = $time; + fork + i_mul_fe12_if.put_stream(dat_in, 12*768/8); + o_mul_fe12_if.get_stream(get_dat, get_len); + join + finish_time = $time; + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + f_out[i][j][k] = get_dat[(i*6+j*2+k)*384 +: $bits(FE_TYPE)]; + + if (f_exp != f_out) begin + $display("Input a was:"); + print_fe12(a); + $display("Input b was:"); + print_fe12(b); + $display("Output was:"); + print_fe12(f_out); + $display("Output Expected:"); + print_fe12(f_exp); + $fatal(1, "%m %t ERROR: output was wrong", $time); + end + + $display("test PASSED in %d clocks", (finish_time-start_time)/CLK_PERIOD); + end + +endtask + +initial begin + i_mul_fe12_if.reset_source(); + o_mul_fe12_if.rdy = 0; + #10ns; + + test(); + + #50ns $finish(); +end + +endmodule \ No newline at end of file diff --git a/ip_cores/ec/src/tb/ec_fe6_mul_s_tb.sv b/ip_cores/ec/src/tb/ec_fe6_mul_s_tb.sv index b3f565d..d1c802c 100644 --- a/ip_cores/ec/src/tb/ec_fe6_mul_s_tb.sv +++ b/ip_cores/ec/src/tb/ec_fe6_mul_s_tb.sv @@ -16,7 +16,7 @@ */ `timescale 1ps/1ps -module ec_fe6_mul_tb (); +module ec_fe6_mul_s_tb (); import common_pkg::*; import bls12_381_pkg::*; @@ -200,20 +200,20 @@ task test(); b[j][k] = random_vector(384/8) % P; dat_in[(j*2+k)*768 +: 2*$bits(FE_TYPE)] = {b[j][k], a[j][k]}; end - + f_exp = fe6_mul(a, b); - + start_time = $time; fork i_mul_fe6_if.put_stream(dat_in, 6*768/8); o_mul_fe6_if.get_stream(get_dat, get_len); join finish_time = $time; - + for (int j = 0; j < 3; j++) for (int k = 0; k < 2; k++) f_out[j][k] = get_dat[(j*2+k)*384 +: $bits(FE_TYPE)]; - + if (f_exp != f_out) begin $display("Input a was:"); print_fe6(a); @@ -225,7 +225,7 @@ task test(); print_fe6(f_exp); $fatal(1, "%m %t ERROR: output was wrong", $time); end - + $display("test PASSED in %d clocks", (finish_time-start_time)/CLK_PERIOD); end diff --git a/ip_cores/ec/src/tb/ec_fp12_arithmetic_tb.sv b/ip_cores/ec/src/tb/ec_fp12_arithmetic_tb.sv index e7abac1..c65532b 100644 --- a/ip_cores/ec/src/tb/ec_fp12_arithmetic_tb.sv +++ b/ip_cores/ec/src/tb/ec_fp12_arithmetic_tb.sv @@ -71,7 +71,7 @@ end initial begin clk = 0; - forever #CLK_PERIOD clk = ~clk; + forever #(CLK_PERIOD/2) clk = ~clk; end ec_fe2_arithmetic #(