diff --git a/ip_cores/ec/src/rtl/ec_fe2_mul.sv b/ip_cores/ec/src/rtl/ec_fe2_mul.sv new file mode 100644 index 0000000..cfb51f0 --- /dev/null +++ b/ip_cores/ec/src/rtl/ec_fe2_mul.sv @@ -0,0 +1,164 @@ +/* + This provides the interface to perform Fp2 field element mul. Using karabusta algorithm. + + Inputs must be interleaved starting at c0 (i.e. clock 0 = {b.c0, a.c0}) + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module ec_fe2_mul +#( + parameter type FE_TYPE, // Base field element type + parameter CTL_BITS = 12 +)( + input i_clk, i_rst, + // Interface to FE(P)_TYPE adder (mod P) 2*FE_TYPE data width + if_axi_stream.source o_mul_fe2_if, + if_axi_stream.sink i_mul_fe2_if, + // Interface to FE_TYPE mul (mod P) 2*FE_TYPE data width + if_axi_stream.source o_add_fe_if, + if_axi_stream.sink i_add_fe_if, + // Interface to FE_TYPE mul (mod P) 2*FE_TYPE data width + if_axi_stream.source o_sub_fe_if, + if_axi_stream.sink i_sub_fe_if, + // Interface to FE_TYPE mul (mod P) 2*FE_TYPE data width + if_axi_stream.source o_mul_fe_if, + if_axi_stream.sink i_mul_fe_if +); + +FE_TYPE a, b; // Temp storage +logic [1:0] mul_cnt, add_sub_cnt; +logic out_cnt; + +// Point addtions are simple additions on each of the Fp elements +always_comb begin + i_mul_fe2_if.rdy = (mul_cnt == 0 || mul_cnt == 1) && (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)); + i_mul_fe_if.rdy = (add_sub_cnt == 0 || add_sub_cnt == 1) ? ~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy) : + ~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy); + i_add_fe_if.rdy = out_cnt == 1 && (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)); + i_sub_fe_if.rdy = out_cnt == 0 && (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)); +end + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + o_mul_fe2_if.reset_source(); + o_add_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0); + o_sub_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0); + o_mul_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0); + a <= 0; + b <= 0; + mul_cnt <= 0; + add_sub_cnt <= 0; + out_cnt <= 0; + end else begin + + if (o_add_fe_if.val && o_add_fe_if.rdy) o_add_fe_if.val <= 0; + if (o_sub_fe_if.val && o_sub_fe_if.rdy) o_sub_fe_if.val <= 0; + if (o_mul_fe_if.val && o_mul_fe_if.rdy) o_mul_fe_if.val <= 0; + if (o_mul_fe2_if.val && o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0; + + case(mul_cnt) + 0: begin + if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin + o_mul_fe_if.dat <= i_mul_fe2_if.dat; // a0 * b0 + o_mul_fe_if.val <= i_mul_fe2_if.val; + o_mul_fe_if.ctl <= i_mul_fe2_if.ctl; + {b, a} <= i_mul_fe2_if.dat; + if (i_mul_fe2_if.val) mul_cnt <= mul_cnt + 1; + end + end + 1: begin + if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin + o_mul_fe_if.dat <= i_mul_fe2_if.dat; // a1 * b1 + o_mul_fe_if.val <= i_mul_fe2_if.val; + if (i_mul_fe2_if.val) mul_cnt <= mul_cnt + 1; + end + end + 2: begin + if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin + o_mul_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b; // a1 * b0 + o_mul_fe_if.val <= 1; + mul_cnt <= mul_cnt + 1; + b <= o_mul_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)]; + end + end + 3: begin + if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin + o_mul_fe_if.dat <= {a, b}; // b1 * a0 + o_mul_fe_if.val <= 1; + mul_cnt <= 0; + end + end + endcase + + + case(add_sub_cnt) + 0: begin + if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin + o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat; + if (i_mul_fe_if.val) add_sub_cnt <= add_sub_cnt + 1; + end + end + 1: begin + o_sub_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_fe_if.dat; + o_sub_fe_if.ctl <= i_mul_fe_if.ctl; // a0b0 - a1b1 + if (i_mul_fe_if.val) begin + o_sub_fe_if.val <= 1; + add_sub_cnt <= add_sub_cnt + 1; + end + end + 2: begin + if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin + o_add_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat; + if (i_mul_fe_if.val) add_sub_cnt <= add_sub_cnt + 1; + end + end + 3: begin + o_add_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_fe_if.dat; + o_add_fe_if.ctl <= i_mul_fe_if.ctl; // a1b0 + a0b1 + if (i_mul_fe_if.val) begin + o_add_fe_if.val <= 1; + add_sub_cnt <= add_sub_cnt + 1; + end + end + endcase + + case(out_cnt) + 0: begin + if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin + o_mul_fe2_if.dat <= i_sub_fe_if.dat; + o_mul_fe2_if.sop <= 1; + o_mul_fe2_if.eop <= 0; + o_mul_fe2_if.ctl <= i_sub_fe_if.ctl; + o_mul_fe2_if.val <= i_sub_fe_if.val; + if (i_sub_fe_if.val) out_cnt <= out_cnt + 1; + end + end + 1: begin + if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin + o_mul_fe2_if.dat <= i_add_fe_if.dat; + o_mul_fe2_if.sop <= 0; + o_mul_fe2_if.eop <= 1; + o_mul_fe2_if.ctl <= i_add_fe_if.ctl; + o_mul_fe2_if.val <= i_add_fe_if.val; + if (i_add_fe_if.val) out_cnt <= out_cnt + 1; + end + end + endcase + + end +end +endmodule \ No newline at end of file diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_fmap.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_fmap.sv new file mode 100644 index 0000000..f35c145 --- /dev/null +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_fmap.sv @@ -0,0 +1,100 @@ +/* + This does the for Frobenius map calculation required in final + exponentiation in the ate pairing on a Fp^2 element. + + Input is expected to be streamed in with Fp .c0 in the first clock cycle + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module bls12_381_fe12_fmap + import bls12_381_pkg::*; +#( + parameter type FE_TYPE = fe_t, // Base field element type + parameter CTL_BITS = 12, + parameter CTL_BIT_POW = 8 // This is where we encode the power value with 2 bits - only 0,1,2,3 are supported +)( + input i_clk, i_rst, + // Input/Output intefaces for fmap result, FE_TYPE data width + if_axi_stream.source o_fmap_fe12_if, + if_axi_stream.sink i_fmap_fe12_if, + // Interface to FE6_TYPE fmap block, FE_TYPE data width + if_axi_stream.source o_fmap_fe6_if, + if_axi_stream.sink i_fmap_fe6_if, + // Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width + if_axi_stream.source o_mul_fe2_if, + if_axi_stream.sink i_mul_fe2_if +); + +logic [4:0] out_cnt, out_cnt1, out_cnt2; + +always_comb begin + i_fmap_fe12_if.rdy = ~o_fmap_fe6_if.val || (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy); + i_fmap_fe6_if.rdy = ~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy); + i_mul_fe2_if.rdy = ~o_fmap_fe12_if.val || (o_fmap_fe12_if.val && o_fmap_fe12_if.rdy); +end + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + o_fmap_fe12_if.reset_source(); + o_fmap_fe6_if.reset_source(); + o_mul_fe2_if.reset_source(); + out_cnt <= 0; + out_cnt1 <= 0; + out_cnt2 <= 0; + end else begin + + if (o_fmap_fe12_if.val && o_fmap_fe12_if.rdy) o_fmap_fe12_if.val <= 0; + if (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy) o_fmap_fe6_if.val <= 0; + if (o_mul_fe2_if.val && o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0; + + if (~o_fmap_fe6_if.val || (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy)) begin + o_fmap_fe6_if.val <= i_fmap_fe12_if.val; + o_fmap_fe6_if.sop <= out_cnt2 == 0; + o_fmap_fe6_if.eop <= out_cnt2 == 5; + o_fmap_fe6_if.ctl <= i_fmap_fe12_if.ctl; + o_fmap_fe6_if.dat <= i_fmap_fe12_if.dat; + out_cnt2 <= i_fmap_fe12_if.val ? out_cnt2 == 5 ? 0 : out_cnt2 + 1 : out_cnt2; + end + + if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin + o_mul_fe2_if.val <= i_fmap_fe6_if.val; + o_mul_fe2_if.sop <= out_cnt % 2 == 0; + o_mul_fe2_if.eop <= out_cnt % 2 == 1; + o_mul_fe2_if.ctl <= i_fmap_fe6_if.ctl; + case (out_cnt) inside + 0,1,2,3,4,5: begin + o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= i_fmap_fe6_if.dat; + o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= out_cnt % 2 == 0 ? 1 : 0; + end + 6,7,8,9,10,11: o_mul_fe2_if.dat <= {FROBENIUS_COEFF_FQ12_C1[i_fmap_fe6_if.ctl[CTL_BIT_POW +: 2]][out_cnt % 2], i_fmap_fe6_if.dat}; + endcase + out_cnt <= i_fmap_fe6_if.val ? out_cnt == 11 ? 0 : out_cnt + 1 : out_cnt; + end + + + if (~o_fmap_fe12_if.val || (o_fmap_fe12_if.val && o_fmap_fe12_if.rdy)) begin + o_fmap_fe12_if.val <= i_mul_fe2_if.val; + o_fmap_fe12_if.sop <= out_cnt1 == 0; + o_fmap_fe12_if.eop <= out_cnt1 == 11; + o_fmap_fe12_if.ctl <= i_mul_fe2_if.ctl; + o_fmap_fe12_if.dat <= i_mul_fe2_if.dat; + out_cnt1 <= i_mul_fe2_if.val ? out_cnt1 == 11 ? 0 : out_cnt1 + 1 : out_cnt1; + end + end +end + +endmodule \ No newline at end of file diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_fmap_wrapper.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_fmap_wrapper.sv new file mode 100644 index 0000000..1b66173 --- /dev/null +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_fmap_wrapper.sv @@ -0,0 +1,114 @@ +/* + This does the for Frobenius map calculation required in final + exponentiation in the ate pairing on a Fp^2 element. + + Input is expected to be streamed in with Fp .c0 in the first clock cycle + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module bls12_381_fe12_fmap_wrapper + import bls12_381_pkg::*; +#( + parameter type FE_TYPE = fe_t, + parameter CTL_BITS = 12, + parameter CTL_BIT_POW = 8 // This is where we encode the power value with 2 bits - only 0,1,2,3 are supported - 1 extra bit required after this for control +)( + input i_clk, i_rst, + // Input/Output intefaces for fmap result, FE_TYPE data width + if_axi_stream.source o_fmap_fe12_if, + if_axi_stream.sink i_fmap_fe12_if, + // Interface to FE2_TYPE mul (mod P), 2*FE_TYPE data width + if_axi_stream.source o_mul_fe2_if, + if_axi_stream.sink i_mul_fe2_if, + // Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width + if_axi_stream.source o_mul_fe_if, + if_axi_stream.sink i_mul_fe_if +); + +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_if_o [1:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_if_i [1:0] (i_clk); + +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe6_if_o (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe6_if_i (i_clk); + +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe2_if_o (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe2_if_i (i_clk); + +bls12_381_fe2_fmap #( + .FE_TYPE ( FE_TYPE ), + .CTL_BITS ( CTL_BITS ), + .CTL_BIT_POW ( CTL_BIT_POW ) +) +bls12_381_fe2_fmap ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .o_fmap_fe2_if ( fmap_fe2_if_i ), + .i_fmap_fe2_if ( fmap_fe2_if_o ), + .o_mul_fe_if ( o_mul_fe_if ), + .i_mul_fe_if ( i_mul_fe_if ) +); + +bls12_381_fe6_fmap #( + .FE_TYPE ( FE_TYPE ), + .CTL_BITS ( CTL_BITS ), + .CTL_BIT_POW ( CTL_BIT_POW ) +) +bls12_381_fe6_fmap ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .o_fmap_fe6_if ( fmap_fe6_if_i ), + .i_fmap_fe6_if ( fmap_fe6_if_o ), + .o_fmap_fe2_if ( fmap_fe2_if_o ), + .i_fmap_fe2_if ( fmap_fe2_if_i ), + .o_mul_fe2_if ( mul_fe2_if_o[0] ), + .i_mul_fe2_if ( mul_fe2_if_i[0] ) +); + +bls12_381_fe12_fmap #( + .FE_TYPE ( FE_TYPE ), + .CTL_BITS ( CTL_BITS ), + .CTL_BIT_POW ( CTL_BIT_POW ) +) +bls12_381_fe12_fmap ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .o_fmap_fe12_if ( o_fmap_fe12_if ), + .i_fmap_fe12_if ( i_fmap_fe12_if ), + .o_fmap_fe6_if ( fmap_fe6_if_o ), + .i_fmap_fe6_if ( fmap_fe6_if_i ), + .o_mul_fe2_if ( mul_fe2_if_o[1] ), + .i_mul_fe2_if ( mul_fe2_if_i[1] ) +); + +resource_share # ( + .NUM_IN ( 2 ), + .DAT_BITS ( 2*$bits(FE_TYPE) ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( CTL_BIT_POW+2 ), + .PIPELINE_IN ( 0 ), + .PIPELINE_OUT ( 0 ) +) +resource_share_fe2_mul ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .i_axi ( mul_fe2_if_o[1:0] ), + .o_res ( o_mul_fe2_if ), + .i_res ( i_mul_fe2_if ), + .o_axi ( mul_fe2_if_i[1:0] ) +); + +endmodule \ No newline at end of file diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_fe2_fmap.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe2_fmap.sv new file mode 100644 index 0000000..24d6be7 --- /dev/null +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe2_fmap.sv @@ -0,0 +1,84 @@ +/* + This does the for Frobenius map calculation required in final + exponentiation in the ate pairing on a Fp^2 element. + + Input is expected to be streamed in with Fp .c0 in the first clock cycle + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module bls12_381_fe2_fmap + import bls12_381_pkg::*; +#( + parameter type FE_TYPE = fe_t, // Base field element type + parameter CTL_BITS = 12, + parameter CTL_BIT_POW = 8 // This is where we encode the power value with 2 bits - only 0,1,2,3 are supported +)( + input i_clk, i_rst, + // Input/Output intefaces for fmap result, FE_TYPE data width + if_axi_stream.source o_fmap_fe2_if, + if_axi_stream.sink i_fmap_fe2_if, + // Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width + if_axi_stream.source o_mul_fe_if, + if_axi_stream.sink i_mul_fe_if +); + + +always_comb begin + i_fmap_fe2_if.rdy = ~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy); + i_mul_fe_if.rdy = ~o_fmap_fe2_if.val || (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy); +end + +logic mul_cnt; + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + o_fmap_fe2_if.reset_source(); + o_mul_fe_if.reset_source(); + mul_cnt <= 0; + end else begin + + if (o_mul_fe_if.val && o_mul_fe_if.rdy) o_mul_fe_if.val <= 0; + if (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy) o_fmap_fe2_if.val <= 0; + + if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin + case(mul_cnt) + 0: begin + o_mul_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_fmap_fe2_if.dat; + o_mul_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= 1; + end + 1: begin + o_mul_fe_if.dat <= {i_fmap_fe2_if.dat, FROBENIUS_COEFF_FQ2_C1[i_fmap_fe2_if.ctl[CTL_BIT_POW +: 2]]}; + end + endcase + o_mul_fe_if.val <= i_fmap_fe2_if.val; + o_mul_fe_if.ctl <= i_fmap_fe2_if.ctl; + o_mul_fe_if.sop <= 1; + o_mul_fe_if.eop <= 1; + mul_cnt <= i_fmap_fe2_if.val ? mul_cnt + 1 : mul_cnt; + end + + if (~o_fmap_fe2_if.val || (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy)) begin + o_fmap_fe2_if.val <= i_mul_fe_if.val; + o_fmap_fe2_if.eop <= i_mul_fe_if.val ? o_fmap_fe2_if.sop : o_fmap_fe2_if.eop; + o_fmap_fe2_if.sop <= i_mul_fe_if.val ? ~o_fmap_fe2_if.sop : o_fmap_fe2_if.sop; + o_fmap_fe2_if.dat <= i_mul_fe_if.dat; + o_fmap_fe2_if.ctl <= i_mul_fe_if.ctl; + end + end +end + +endmodule \ No newline at end of file diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_fe6_fmap.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe6_fmap.sv new file mode 100644 index 0000000..89c8efa --- /dev/null +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe6_fmap.sv @@ -0,0 +1,99 @@ +/* + This does the for Frobenius map calculation required in final + exponentiation in the ate pairing on a Fp^2 element. + + Input is expected to be streamed in with Fp .c0 in the first clock cycle + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +module bls12_381_fe6_fmap + import bls12_381_pkg::*; +#( + parameter type FE_TYPE = fe_t, // Base field element type + parameter CTL_BITS = 12, + parameter CTL_BIT_POW = 8 // This is where we encode the power value with 2 bits - only 0,1,2,3 are supported +)( + input i_clk, i_rst, + // Input/Output intefaces for fmap result, FE_TYPE data width + if_axi_stream.source o_fmap_fe6_if, + if_axi_stream.sink i_fmap_fe6_if, + // Interface to FE2_TYPE fmap block, FE_TYPE data width + if_axi_stream.source o_fmap_fe2_if, + if_axi_stream.sink i_fmap_fe2_if, + // Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width + if_axi_stream.source o_mul_fe2_if, + if_axi_stream.sink i_mul_fe2_if +); + +logic [2:0] out_cnt, out_cnt1; + +always_comb begin + i_fmap_fe6_if.rdy = ~o_fmap_fe2_if.val || (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy); + i_fmap_fe2_if.rdy = ~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy); + i_mul_fe2_if.rdy = ~o_fmap_fe6_if.val || (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy); +end + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + o_fmap_fe6_if.reset_source(); + o_fmap_fe2_if.reset_source(); + o_mul_fe2_if.reset_source(); + out_cnt <= 0; + out_cnt1 <= 0; + end else begin + + if (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy) o_fmap_fe6_if.val <= 0; + if (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy) o_fmap_fe2_if.val <= 0; + if (o_mul_fe2_if.val && o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0; + + if (~o_fmap_fe2_if.val || (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy)) begin + o_fmap_fe2_if.val <= i_fmap_fe6_if.val; + o_fmap_fe2_if.dat <= i_fmap_fe6_if.dat; + o_fmap_fe2_if.sop <= i_fmap_fe6_if.val ? ~o_fmap_fe2_if.sop : o_fmap_fe2_if.sop; + o_fmap_fe2_if.eop <= i_fmap_fe6_if.val ? o_fmap_fe2_if.sop : o_fmap_fe2_if.eop; + o_fmap_fe2_if.ctl <= i_fmap_fe6_if.ctl; + end + + if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin + o_mul_fe2_if.val <= i_fmap_fe2_if.val; + o_mul_fe2_if.sop <= out_cnt % 2 == 0; + o_mul_fe2_if.eop <= out_cnt % 2 == 1; + o_mul_fe2_if.ctl <= i_fmap_fe2_if.ctl; + case (out_cnt) inside + 0,1: begin + o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= i_fmap_fe2_if.dat; + o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= out_cnt == 0 ? 1 : 0; + end + 2,3: o_mul_fe2_if.dat <= {FROBENIUS_COEFF_FQ6_C1[i_fmap_fe2_if.ctl[CTL_BIT_POW +: 2]][out_cnt % 2], i_fmap_fe2_if.dat}; + 4,5: o_mul_fe2_if.dat <= {FROBENIUS_COEFF_FQ6_C2[i_fmap_fe2_if.ctl[CTL_BIT_POW +: 2]][out_cnt % 2], i_fmap_fe2_if.dat}; + endcase + out_cnt <= i_fmap_fe2_if.val ? out_cnt == 5 ? 0 : out_cnt + 1 : out_cnt; + end + + + if (~o_fmap_fe6_if.val || (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy)) begin + o_fmap_fe6_if.val <= i_mul_fe2_if.val; + o_fmap_fe6_if.sop <= out_cnt1 == 0; + o_fmap_fe6_if.eop <= out_cnt1 == 5; + o_fmap_fe6_if.ctl <= i_mul_fe2_if.ctl; + o_fmap_fe6_if.dat <= i_mul_fe2_if.dat; + out_cnt1 <= i_mul_fe2_if.val ? out_cnt1 == 5 ? 0 : out_cnt1 + 1 : out_cnt1; + end + end +end + +endmodule \ No newline at end of file diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv index b39f53b..45a4cff 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv @@ -26,7 +26,7 @@ package bls12_381_pkg; fe_t Gx = 381'h17F1D3A73197D7942695638C4FA9AC0FC3688C4F9774B905A14E3A3F171BAC586C55E83FF97A1AEFFB3AF00ADB22C6BB; fe_t Gy = 381'h08B3F481E3AAA0F1A09E30ED741D8AE4FCF5E095D5D00AF600DB18CB2C04B3EDD03CC744A2888AE40CAA232946C5E7E1; - + localparam [63:0] ATE_X = 64'hd201000000010000; localparam ATE_X_START = 63; @@ -71,7 +71,47 @@ package bls12_381_pkg; typedef fe_t [1:0] fe2_t; typedef fe2_t [2:0] fe6_t; typedef fe6_t [1:0] fe12_t; + + // These are used in the final exponentiation of the pairing. + // We only list coeff needed for powers of 0,1,2,3 + parameter fe2_t FROBENIUS_COEFF_FQ12_C1 [3:0] = { + {381'h06af0e0437ff400b6831e36d6bd17ffe48395dabc2d3435e77f76e17009241c5ee67992f72ec05f4c81084fbede3cc09, + 381'h135203e60180a68ee2e9c448d77a2cd91c3dedd930b1cf60ef396489f61eb45e304466cf3e67fa0af1ee7b04121bdea2}, + {381'h0, + 381'h00000000000000005f19672fdf76ce51ba69c6076a0f77eaddb3a93be6f89688de17d813620a00022e01fffffffeffff}, + {381'h00fc3e2b36c4e03288e9e902231f9fb854a14787b6c7b36fec0c8ec971f63c5f282d5ac14d6c7ec22cf78a126ddc4af3, + 381'h1904d3bf02bb0667c231beb4202c0d1f0fd603fd3cbd5f4f7b2443d784bab9c4f67ea53d63e7813d8d0775ed92235fb8}, + {381'h0, + 381'h1}}; + + parameter fe2_t FROBENIUS_COEFF_FQ6_C1 [3:0] = { + {381'h1, + 381'h0}, + {381'h0, + 381'h00000000000000005f19672fdf76ce51ba69c6076a0f77eaddb3a93be6f89688de17d813620a00022e01fffffffefffe}, + {381'h1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4897d29650fb85f9b409427eb4f49fffd8bfd00000000aaac, + 381'h0}, + {381'h0, + 381'h1}}; + + parameter fe2_t FROBENIUS_COEFF_FQ6_C2 [3:0] = { + {381'h0, + 381'h1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaaa}, + {381'h0, + 381'h1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4897d29650fb85f9b409427eb4f49fffd8bfd00000000aaac}, + {381'h0, + 381'h1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4897d29650fb85f9b409427eb4f49fffd8bfd00000000aaad}, + {381'h0, + 381'h1}}; + + parameter fe_t FROBENIUS_COEFF_FQ2_C1 [1:0] = { + 381'h1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaaa, + 381'h000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001}; + + + + // Generator points for G2 fe2_t G2x = {381'h13e02b6052719f607dacd3a088274f65596bd0d09920b61ab5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e, 381'h024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8}; @@ -326,7 +366,7 @@ package bls12_381_pkg; endfunction - function jb_point_t point_mult(logic [DAT_BITS-1:0] c, jb_point_t p); + function jb_point_t point_mult(input logic [DAT_BITS-1:0] c, jb_point_t p); jb_point_t result, addend; result = 0; addend = p; @@ -340,7 +380,7 @@ package bls12_381_pkg; return result; endfunction - function fp2_jb_point_t fp2_point_mult(logic [DAT_BITS-1:0] c, fp2_jb_point_t p); + function fp2_jb_point_t fp2_point_mult(input logic [DAT_BITS-1:0] c, fp2_jb_point_t p); fp2_jb_point_t result, addend; result = 0; addend = p; @@ -425,6 +465,50 @@ package bls12_381_pkg; fe6_mul_by_nonresidue[2] = a[1]; fe6_mul_by_nonresidue[0] = fe2_mul_by_nonresidue(a[2]); endfunction + + function fe6_t fe6_inv(fe6_t a); + fe2_t add_i0, add_i1, sub_i0, mul_i0; + fe6_inv[0] = fe2_mul_by_nonresidue(a[2]); + fe6_inv[0] = fe2_mul(fe6_inv[0], a[1]); + fe6_inv[0] = fe2_sub(0, fe6_inv[0]); + add_i0 = fe2_mul(a[0], a[0]); + fe6_inv[0] = fe2_add(add_i0, fe6_inv[0]); + + fe6_inv[1] = fe2_mul(a[2], a[2]); + fe6_inv[1] = fe2_mul_by_nonresidue(fe6_inv[1]); + sub_i0 = fe2_mul(a[0], a[1]); + fe6_inv[1] = fe2_sub(fe6_inv[1], sub_i0); + + fe6_inv[2] = fe2_mul(a[1], a[1]); + sub_i0 = fe2_mul(a[2], a[0]); + fe6_inv[2] = fe2_sub(fe6_inv[2], sub_i0); + + add_i0 = fe2_mul(a[2], fe6_inv[1]); + add_i1 = fe2_mul(a[1], fe6_inv[2]); + add_i1 = fe2_add(add_i0, add_i1); + add_i1 = fe2_mul_by_nonresidue(add_i1); + add_i0 = fe2_mul(a[0], fe6_inv[0]); + add_i1 = fe2_add(add_i1, add_i0); + + mul_i0 = fe2_inv(add_i1); + + fe6_inv[0] = fe2_mul(fe6_inv[0], mul_i0); + fe6_inv[1] = fe2_mul(fe6_inv[1], mul_i0); + fe6_inv[2] = fe2_mul(fe6_inv[2], mul_i0); + + endfunction + + function fe12_t fe12_inv(fe12_t a); + fe12_t sub_i0, sub_i1, mul_i0; + sub_i0 = fe6_mul(a[0], a[0]); + sub_i1 = fe6_mul(a[1], a[1]); + sub_i1 = fe6_mul_by_nonresidue(sub_i1); + sub_i0 = fe6_sub(sub_i0,sub_i1); + sub_i0 = fe6_inv(sub_i0); + fe12_inv[0] = fe6_mul(a[0], sub_i0); + fe12_inv[1] = fe6_mul(a[1], sub_i0); + fe12_inv[1] = fe6_sub(0, fe12_inv[1]); + endfunction function fe6_t fe6_add(fe6_t a, b); for(int i = 0; i < 3; i++) @@ -495,6 +579,20 @@ package bls12_381_pkg; bb = fe6_mul_by_nonresidue(bb); // 7. bb = mnr(bb) [6] fe12_mul[0] = fe6_add(bb, aa); // 8. fe6_mul[0] = add(add_i0, bb) [0, 1, 7] endfunction + + function fe12_t fe12_sqr(fe12_t a); + fe6_t sub_i1, mul_i0, mul_i1; + sub_i1 = fe6_mul(a[0], a[1]); // 0. + mul_i0 = fe6_add(a[0], a[1]); // 1. (wait eq0) + mul_i1 = fe6_mul_by_nonresidue(a[1]); + mul_i1 = fe6_add(mul_i1, mul_i1); + fe12_sqr[0] = fe6_mul(mul_i1, mul_i0); + fe12_sqr[0] = fe6_sub(fe12_sqr[0], sub_i1); + fe12_sqr[1] = fe2_add(sub_i1, sub_i1); + sub_i1 = fe6_mul_by_nonresidue(sub_i1); + fe12_sqr[0] = fe6_sub(fe12_sqr[0], sub_i1); + + endfunction // This performs the miller loop @@ -520,7 +618,12 @@ package bls12_381_pkg; end endtask - + + task automatic ate_pairing(input af_point_t P, input fp2_af_point_t Q, ref fe12_t f); + miller_loop(P, Q, f); + final_exponent(f); + endtask; + // This performs both the line evaluation and the doubling // Returns a sparse f12 element task automatic miller_double_step(ref fp2_jb_point_t R, input af_point_t P, ref fe12_t f); @@ -656,30 +759,101 @@ package bls12_381_pkg; f = {{FE2_zero, t10, FE2_zero}, {FE2_zero, t1, t9}}; endtask - - // Calculates the final exponent used in ate pairing - /*task automatic final_exponent(ref fe12_t f); - f = fe12_sub(0, f); // TODO can remove this? - - - endtask*/ - - // Sparse multiplication by coefficients 0,1,4 - function fe12_t f12_sparse_mul_014(fe12_t f, fe2_t c0, c1, c4); - fe6_t aa, bb; - fe2_t t; - aa = fe6_mul(f[0], {FE2_zero, c1, c0}); // TODO implement sparse fp6 - bb = fe6_mul(f[1], {FE2_zero, c4, FE2_zero}); // TODO implement sparse fp6 - t = fe2_add(c1, c4); - f[1] = fe6_add(f[1], f[0]); - f[1] = fe6_mul(f[1], {FE2_zero, t, c0}); - f[1] = fe6_sub(f[1], aa); - f[1] = fe6_sub(f[1], bb); - f[0] = fe6_mul_by_nonresidue(bb); - f[0] = fe6_add(f[0], aa); - - return f; + + function fe2_t fe2_fmap(input fe2_t a, input int pow); + fe2_fmap[0] = a[0]; + fe2_fmap[1] = fe_mul(a[1], FROBENIUS_COEFF_FQ2_C1[pow % 2]); endfunction + + function fe6_t fe6_fmap(input fe6_t a, input int pow); + fe6_fmap[0] = fe2_fmap(a[0], pow); + fe6_fmap[1] = fe2_fmap(a[1], pow); + fe6_fmap[2] = fe2_fmap(a[2], pow); + fe6_fmap[1] = fe2_mul(fe6_fmap[1], FROBENIUS_COEFF_FQ6_C1[pow % 6]); + fe6_fmap[2] = fe2_mul(fe6_fmap[2], FROBENIUS_COEFF_FQ6_C2[pow % 6]); + endfunction + + + function fe12_t fe12_fmap(input fe12_t a, input int pow); + fe12_fmap[0] = fe6_fmap(a[0], pow); + fe12_fmap[1] = fe6_fmap(a[1], pow); + fe12_fmap[1][0] = fe2_mul(fe12_fmap[1][0], FROBENIUS_COEFF_FQ12_C1[pow % 12]); + fe12_fmap[1][1] = fe2_mul(fe12_fmap[1][1], FROBENIUS_COEFF_FQ12_C1[pow % 12]); + fe12_fmap[1][2] = fe2_mul(fe12_fmap[1][2], FROBENIUS_COEFF_FQ12_C1[pow % 12]); + endfunction + + // Max size is 1024 bit number + function fe12_t fe12_pow(input fe12_t a, input logic [1023:0] pow); + fe12_pow = FE12_one; + + while (pow != 0) begin + if (pow[0]) + fe12_pow = fe12_mul(fe12_pow, a); + a = fe12_mul(a, a); + pow = pow >> 1; + end + + fe12_pow[1] = fe6_sub(0, fe12_pow[1]); + endfunction + + // Calculates the final exponent used in ate pairing + task automatic final_exponent(ref fe12_t f); + fe12_t mul_i1, y0, y1, y2, y3, r, r_inv; + logic [63:0] bls_x; + bls_x = ATE_X; + + r = f; + r[1] = fe6_sub(0, r[1]); + r_inv = fe12_inv(r); + r = fe12_mul(f, r_inv); + + mul_i1 = fe12_fmap(r, 2); + + r = fe12_mul(mul_i1, r); + + y0 = fe12_mul(r, r); + + + y1 = fe12_pow(y0, bls_x); + + + bls_x = bls_x >> 1; + y2 = fe12_pow(y1, bls_x); + bls_x = bls_x << 1; + + y3 = r; + y3[1] = fe6_sub(0, y3[1]); + y1 = fe12_mul(y1, y3); + y1[1] = fe6_sub(0, y1[1]); + y1 = fe12_mul(y1, y2); + + y2 = fe12_pow(y1, bls_x); + y3 = fe12_pow(y2, bls_x); + + + + y1[1] = fe6_sub(0, y1[1]); + y3 = fe12_mul(y3, y1); + + y1[1] = fe6_sub(0, y1[1]); + y1 = fe12_fmap(y1, 3); + + y2 = fe12_fmap(y2, 2); + y1 = fe12_mul(y1, y2); + + y2 = fe12_pow(y3, bls_x); + + y2 = fe12_mul(y2, y0); + + y2 = fe12_mul(y2, r); + y1 = fe12_mul(y1, y2); + + y2 = fe12_fmap(y3, 1); + y1 = fe12_mul(y1, y2); + + f = y1; + endtask + function af_point_t to_affine(jb_point_t p); fe_t z_; diff --git a/zcash_fpga/src/tb/bls12_381_fmap_tb.sv b/zcash_fpga/src/tb/bls12_381_fmap_tb.sv new file mode 100644 index 0000000..9f7aafa --- /dev/null +++ b/zcash_fpga/src/tb/bls12_381_fmap_tb.sv @@ -0,0 +1,201 @@ +/* + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +`timescale 1ps/1ps + +module bls12_381_fmap_tb (); + +import common_pkg::*; +import bls12_381_pkg::*; + +parameter type FE_TYPE = bls12_381_pkg::fe_t; +parameter P = bls12_381_pkg::P; + + +localparam CTL_BITS = 32; + +localparam CLK_PERIOD = 100; + +logic clk, rst; + +initial begin + rst = 0; + repeat(2) #(20*CLK_PERIOD) rst = ~rst; +end + +initial begin + clk = 0; + forever #CLK_PERIOD clk = ~clk; +end + +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if [2:0] (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if [2:0] (clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if (clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if (clk); + +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if (clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if (clk); + +if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) fmap_fe12_o_if (clk); +if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) fmap_fe12_i_if (clk); + +ec_fp_mult_mod #( + .P ( P ), + .KARATSUBA_LVL ( 3 ), + .CTL_BITS ( CTL_BITS ) +) +ec_fp_mult_mod ( + .i_clk( clk ), + .i_rst( rst ), + .i_mul ( mul_fe_o_if[2] ), + .o_mul ( mul_fe_i_if[2] ) +); + +adder_pipe # ( + .BITS ( bls12_381_pkg::DAT_BITS ), + .P ( P ), + .CTL_BITS ( CTL_BITS ), + .LEVEL ( 2 ) +) +adder_pipe ( + .i_clk ( clk ), + .i_rst ( rst ), + .i_add ( add_fe_o_if ), + .o_add ( add_fe_i_if ) +); + +subtractor_pipe # ( + .BITS ( bls12_381_pkg::DAT_BITS ), + .P ( P ), + .CTL_BITS ( CTL_BITS ), + .LEVEL ( 2 ) +) +subtractor_pipe ( + .i_clk ( clk ), + .i_rst ( rst ), + .i_sub ( sub_fe_o_if ), + .o_sub ( sub_fe_i_if ) +); + +ec_fe2_mul #( + .FE_TYPE ( FE_TYPE ), + .CTL_BITS ( CTL_BITS ) +) +ec_fe2_mul ( + .i_clk ( clk ), + .i_rst ( rst ), + .o_mul_fe2_if ( mul_fe2_i_if ), + .i_mul_fe2_if ( mul_fe2_o_if ), + .o_add_fe_if ( add_fe_o_if ), + .i_add_fe_if ( add_fe_i_if ), + .o_sub_fe_if ( sub_fe_o_if), + .i_sub_fe_if ( sub_fe_i_if ), + .o_mul_fe_if ( mul_fe_o_if[1] ), + .i_mul_fe_if ( mul_fe_i_if[1] ) +); + +resource_share # ( + .NUM_IN ( 2 ), + .DAT_BITS ( 2*$bits(FE_TYPE) ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( 4 ), + .PIPELINE_IN ( 0 ), + .PIPELINE_OUT ( 0 ) +) +resource_share_fe_mul ( + .i_clk ( clk ), + .i_rst ( rst ), + .i_axi ( mul_fe_o_if[1:0] ), + .o_res ( mul_fe_o_if[2] ), + .i_res ( mul_fe_i_if[2] ), + .o_axi ( mul_fe_i_if[1:0] ) +); + +bls12_381_fe12_fmap_wrapper #( + .FE_TYPE ( FE_TYPE ), + .CTL_BITS ( CTL_BITS ), + .CTL_BIT_POW ( 0 ) +) +bls12_381_fe12_fmap_wrapper ( + .i_clk ( clk ), + .i_rst ( rst ), + .o_fmap_fe12_if ( fmap_fe12_o_if ), + .i_fmap_fe12_if ( fmap_fe12_i_if ), + .o_mul_fe2_if ( mul_fe2_o_if ), + .i_mul_fe2_if ( mul_fe2_i_if ), + .o_mul_fe_if ( mul_fe_o_if[0] ), + .i_mul_fe_if ( mul_fe_i_if[0] ) +); + +task test(); + fe12_t f, f_exp, f_out; + integer signed get_len; + integer pow; + logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat, dat_in; + + $display("Running test ..."); + dat_in = 0; + + for (int pow = 0; pow < 4; pow++) begin + for (int i = 0; i < 2; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) begin + dat_in[(i*6+j*2+k)*384 +: $bits(FE_TYPE)] = random_vector(384/8) % P; + f[i][j][k] = dat_in[(i*6+j*2+k)*384 +: $bits(FE_TYPE)]; + end + + f_exp = fe12_fmap(f, pow); + + fork + fmap_fe12_i_if.put_stream(dat_in, 12*384/8, pow); + fmap_fe12_o_if.get_stream(get_dat, get_len, 0); + join + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + f_out[i][j][k] = get_dat[(i*6+j*2+k)*384 +: $bits(FE_TYPE)]; + + if (f_exp != f_out) begin + $display("Input was:"); + print_fe12(f); + $display("Output was:"); + print_fe12(f_out); + $display("Output Expected:"); + print_fe12(f_exp); + $fatal(1, "%m %t ERROR: output was wrong", $time); + end + $display("test OK with pow=%d", pow); + end + $display("test PASSED"); + +endtask + + + +initial begin + fmap_fe12_i_if.reset_source(); + fmap_fe12_o_if.rdy = 0; + #10ns; + + test(); + + #50ns $finish(); +end + +endmodule \ No newline at end of file