Updates for bls12-381 Fp12 inversion and wrapper files.

This commit is contained in:
bsdevlin 2019-08-10 23:31:58 +08:00
parent 3faee7b780
commit 891dd214dc
10 changed files with 1769 additions and 69 deletions

View File

@ -0,0 +1,318 @@
/*
This provides the interface to perform
Fp^12 inverse
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fe12_inv_s
#(
parameter type FE_TYPE,
parameter OVR_WRT_BIT = 8 // From this bit 5 bits are used for internal control
)(
input i_clk, i_rst,
// Interface to FE2_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe6_if,
if_axi_stream.sink i_mul_fe6_if,
// Interface to FE2_TYPE subtractor (mod P)
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if,
// Interface to FE2_TYPE multiply by non-residue
if_axi_stream.source o_mnr_fe6_if,
if_axi_stream.sink i_mnr_fe6_if,
// Interface to FE2_TYPE inverse (mod P)
if_axi_stream.source o_inv_fe6_if,
if_axi_stream.sink i_inv_fe6_if,
// Interface to FE6_TYPE inverse (mod P)
if_axi_stream.source o_inv_fe12_if,
if_axi_stream.sink i_inv_fe12_if
);
localparam NUM_OVR_WRT_BIT = 3;
// Multiplications are calculated using the formula in bls12_381.pkg::fe6_inv()
FE_TYPE [1:0][5:0] t;
FE_TYPE [1:0][5:0] a;
logic [7:0] eq_val, eq_wait;
logic [2:0] mul_cnt, sub_cnt, mnr_cnt, inv_cnt;
logic mul_en, sub_en, mnr_en, inv_en;
logic [2:0] nxt_mul, nxt_mnr, nxt_sub, nxt_inv;
logic [3:0] out_cnt;
logic rdy_l;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_inv_fe12_if.reset_source();
o_mnr_fe6_if.reset_source();
o_mul_fe6_if.reset_source();
o_inv_fe6_if.reset_source();
o_sub_fe_if.reset_source();
i_inv_fe12_if.rdy <= 0;
i_mul_fe6_if.rdy <= 0;
i_sub_fe_if.rdy <= 0;
i_mnr_fe6_if.rdy <= 0;
i_inv_fe6_if.rdy <= 0;
eq_val <= 0;
eq_wait <= 0;
rdy_l <= 0;
t <= 0;
a <= 0;
{out_cnt, mul_cnt, sub_cnt, mnr_cnt, inv_cnt} <= 0;
{nxt_mul, nxt_mnr, nxt_sub, nxt_inv} <= 0;
{mul_en, sub_en, mnr_en, inv_en} <= 0;
end else begin
i_mul_fe6_if.rdy <= 1;
i_inv_fe6_if.rdy <= 1;
i_sub_fe_if.rdy <= 1;
i_mnr_fe6_if.rdy <= 1;
if (o_inv_fe12_if.rdy) o_inv_fe12_if.val <= 0;
if (o_mul_fe6_if.rdy) o_mul_fe6_if.val <= 0;
if (o_sub_fe_if.rdy) o_sub_fe_if.val <= 0;
if (o_mnr_fe6_if.rdy) o_mnr_fe6_if.val <= 0;
if (o_inv_fe6_if.rdy) o_inv_fe6_if.val <= 0;
if (~sub_en) get_next_sub();
if (~mul_en) get_next_mul();
if (~mnr_en) get_next_mnr();
if (~inv_en) get_next_inv();
if (rdy_l == 0) i_inv_fe12_if.rdy <= 1;
if (~o_inv_fe12_if.val || (o_inv_fe12_if.val && o_inv_fe12_if.rdy)) begin
o_inv_fe12_if.sop <= out_cnt == 0;
o_inv_fe12_if.eop <= out_cnt == 11;
if (eq_val[5] && out_cnt < 6) begin
o_inv_fe12_if.val <= 1;
out_cnt <= out_cnt + 1;
o_inv_fe12_if.dat <= t[1][out_cnt%6];
end else
if (eq_val[7] && out_cnt >= 6) begin
o_inv_fe12_if.val <= 1;
out_cnt <= out_cnt + 1;
o_inv_fe12_if.dat <= t[0][out_cnt%6];
end
if (out_cnt == 11) begin
eq_val <= 0;
eq_wait <= 0;
rdy_l <= 0;
t <= 0;
a <= 0;
{out_cnt, mul_cnt, sub_cnt, inv_cnt} <= 0;
{nxt_mul, nxt_mnr, nxt_sub, nxt_inv} <= 0;
{mul_en, sub_en, mnr_en, inv_en} <= 0;
end
end
// Latch input
if (i_inv_fe12_if.rdy && i_inv_fe12_if.val) begin
a <= {i_inv_fe12_if.dat, a[1], a[0][5:1]};
if (i_inv_fe12_if.eop) begin
i_inv_fe12_if.rdy <= 0;
rdy_l <= 1;
o_inv_fe6_if.ctl <= i_inv_fe12_if.ctl;
end
end
// Check any results from multiplier
if (i_mul_fe6_if.val && i_mul_fe6_if.rdy) begin
if (i_mul_fe6_if.eop) eq_val[i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
0: t[0] <= {i_mul_fe6_if.dat, t[0][5:1]};
1: t[1] <= {i_mul_fe6_if.dat, t[1][5:1]};
5: t[1] <= {i_mul_fe6_if.dat, t[1][5:1]};
6: t[0] <= {i_mul_fe6_if.dat, t[0][5:1]};
default: o_inv_fe12_if.err <= 1;
endcase
end
// Check any results from mnr
if (i_mnr_fe6_if.val && i_mnr_fe6_if.rdy) begin
if(i_mnr_fe6_if.eop) eq_val[i_mnr_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mnr_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
2: t[1] <= {i_mnr_fe6_if.dat, t[1][5:1]};
default: o_inv_fe12_if.err <= 1;
endcase
end
// Check any results from sub
if (i_sub_fe_if.val && i_sub_fe_if.rdy) begin
if(i_sub_fe_if.eop) eq_val[i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
3: t[0] <= {i_sub_fe_if.dat, t[0][5:1]};
7: t[0] <= {i_sub_fe_if.dat, t[0][5:1]};
default: o_inv_fe12_if.err <= 1;
endcase
end
// Check any results from inv_fe2
if (i_inv_fe6_if.val && i_inv_fe6_if.rdy) begin
if (i_inv_fe6_if.eop) eq_val[i_inv_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_inv_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
4: t[0] <= {i_inv_fe6_if.dat, t[0][5:1]};
default: o_inv_fe12_if.err <= 1;
endcase
end
// Issue new multiplies
if (mul_en)
case(nxt_mul)
0: fe6_multiply(0, a[0], a[0]);
1: fe6_multiply(1, a[1], a[1]);
5: fe6_multiply(5, a[0], t[0]);
6: fe6_multiply(6, a[1], t[0]);
endcase
// Issue new sub
if (sub_en)
case(nxt_sub)
3: fe6_subtraction(3, t[0], t[1]);
7: fe6_subtraction(7, 0, t[0]);
endcase
// Issue new mnr
if (mnr_en)
case(nxt_mnr)
2: fe6_mnr(2, t[1]);
endcase
// Issue new inv
if (inv_en)
fe6_inv(4, t[0]);
end
end
// Task for subtractions
task fe6_subtraction(input int unsigned ctl, input FE_TYPE [5:0] a, b);
if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin
o_sub_fe_if.val <= 1;
o_sub_fe_if.sop <= sub_cnt == 0;
o_sub_fe_if.eop <= sub_cnt == 5;
o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= a[sub_cnt];
o_sub_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[sub_cnt];
o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
sub_cnt <= sub_cnt + 1;
if (sub_cnt == 5) begin
sub_cnt <= 0;
get_next_sub();
end
end
endtask
// Task for using mult
task fe6_multiply(input int unsigned ctl, input FE_TYPE [5:0] a, b);
if (~o_mul_fe6_if.val || (o_mul_fe6_if.val && o_mul_fe6_if.rdy)) begin
o_mul_fe6_if.val <= 1;
o_mul_fe6_if.sop <= mul_cnt == 0;
o_mul_fe6_if.eop <= mul_cnt == 5;
o_mul_fe6_if.dat[0 +: $bits(FE_TYPE)] <= a[mul_cnt];
o_mul_fe6_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[mul_cnt];
o_mul_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
mul_cnt <= mul_cnt + 1;
if (mul_cnt == 5) begin
mul_cnt <= 0;
get_next_mul();
end
end
endtask
// Task for using mnr
task fe6_mnr(input int unsigned ctl, input FE_TYPE [5:0] a);
if (~o_mnr_fe6_if.val || (o_mnr_fe6_if.val && o_mnr_fe6_if.rdy)) begin
o_mnr_fe6_if.val <= 1;
o_mnr_fe6_if.sop <= mnr_cnt == 0;
o_mnr_fe6_if.eop <= mnr_cnt == 5;
o_mnr_fe6_if.dat <= a[mnr_cnt];
o_mnr_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
mnr_cnt <= mnr_cnt + 1;
if (mnr_cnt == 5) begin
mnr_cnt <= 0;
get_next_mnr();
end
end
endtask
// Task for using inv
task fe6_inv(input int unsigned ctl, input FE_TYPE [5:0] a);
if (~o_inv_fe6_if.val || (o_inv_fe6_if.val && o_inv_fe6_if.rdy)) begin
o_inv_fe6_if.val <= 1;
o_inv_fe6_if.sop <= inv_cnt == 0;
o_inv_fe6_if.eop <= inv_cnt == 5;
o_inv_fe6_if.dat <= a[inv_cnt];
o_inv_fe6_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
inv_cnt <= inv_cnt + 1;
if (inv_cnt == 5) begin
inv_cnt <= 0;
get_next_inv();
end
end
endtask
task get_next_mul();
mul_en <= 1;
if(~eq_wait[0] && rdy_l)
nxt_mul <= 0;
else if(~eq_wait[1] && rdy_l)
nxt_mul <= 1;
else if(~eq_wait[5] && eq_val[4])
nxt_mul <= 5;
else if(~eq_wait[6] && eq_val[4] && eq_wait[5])
nxt_mul <= 6;
else
mul_en <= 0;
endtask
task get_next_sub();
sub_en <= 1;
if(~eq_wait[3] && eq_val[0] && eq_val[2])
nxt_sub <= 3;
else if(~eq_wait[7] && eq_val[6])
nxt_sub <= 7;
else
sub_en <= 0;
endtask
task get_next_mnr();
mnr_en <= 1;
if(~eq_wait[2] && eq_val[1])
nxt_mnr <= 2;
else
mnr_en <= 0;
endtask
task get_next_inv();
inv_en <= 1;
if(~eq_wait[4] && eq_val[3])
inv_en <= 1;
else
inv_en <= 0;
endtask
endmodule

View File

@ -0,0 +1,231 @@
/*
This provides the interface to perform Fp2 inversion
Inputs must be interleaved starting at c0 (i.e. clock 0 = {b.c0, a.c0})
_s in the name represents the input is a stream starting at c0.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fe2_inv_s
#(
parameter type FE_TYPE, // Base field element type
parameter OVR_WRT_BIT = 8 // We use 2 bits
)(
input i_clk, i_rst,
// Interface to FE2_TYPE inverter (mod P) FE_TYPE data width
if_axi_stream.source o_inv_fe2_if,
if_axi_stream.sink i_inv_fe2_if,
// Interface to FE_TYPE inverter (mod P) FE_TYPE data width
if_axi_stream.source o_inv_fe_if,
if_axi_stream.sink i_inv_fe_if,
// Interface to FE_TYPE mul (mod P) 2*FE_TYPE data width
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if,
// Interface to FE_TYPE add (mod P) 2*FE_TYPE data width
if_axi_stream.source o_add_fe_if,
if_axi_stream.sink i_add_fe_if,
// Interface to FE_TYPE sub (mod P) 2*FE_TYPE data width
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if
);
localparam NUM_OVR_WRT = 2;
FE_TYPE [3:0] t; // Temp storage
logic [2:0] add_cnt, sub_cnt, inv_cnt, mul_cnt, out_cnt;
logic start, t_val, t1_sub_val;
// Point addtions are simple additions on each of the Fp elements
always_comb begin
i_inv_fe2_if.rdy = ~start;
i_inv_fe_if.rdy = start;
i_add_fe_if.rdy = (~o_inv_fe_if.val || (o_inv_fe_if.val && o_inv_fe_if.rdy));
i_sub_fe_if.rdy = 1;
case (i_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT]) inside
0,1: i_mul_fe_if.rdy = 1;
2,3: i_mul_fe_if.rdy = (~o_inv_fe2_if.val || (o_inv_fe2_if.val && o_inv_fe2_if.rdy));
default: i_mul_fe_if.rdy = 0;
endcase
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_inv_fe2_if.reset_source();
o_add_fe_if.reset_source();
o_sub_fe_if.reset_source();
o_mul_fe_if.reset_source();
o_inv_fe_if.reset_source();
t <= 0;
t_val <= 0;
t1_sub_val <= 0;
{add_cnt, sub_cnt, inv_cnt, mul_cnt} <= 0;
start <= 0;
end else begin
if (o_inv_fe2_if.rdy) o_inv_fe2_if.val <= 0;
if (o_add_fe_if.rdy) o_add_fe_if.val <= 0;
if (o_sub_fe_if.rdy) o_sub_fe_if.val <= 0;
if (o_mul_fe_if.rdy) o_mul_fe_if.val <= 0;
if (o_inv_fe_if.rdy) o_inv_fe_if.val <= 0;
if (i_inv_fe2_if.val && i_inv_fe2_if.rdy) begin
if(i_inv_fe2_if.eop) start <= 1;
if(i_inv_fe2_if.sop) begin
o_inv_fe2_if.ctl <= i_inv_fe2_if.ctl;
end
t[1:0] <= {i_inv_fe2_if.dat, t[1]}; // Latch input
end
// Latch t0 and t1
if (i_mul_fe_if.val && i_mul_fe_if.rdy && i_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 0) begin
t[2] <= i_mul_fe_if.dat;
end
if (i_mul_fe_if.val && i_mul_fe_if.rdy && i_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 1) begin
t[3] <= i_mul_fe_if.dat;
t_val <= 1;
end
if (i_inv_fe_if.val && i_inv_fe_if.rdy) begin
t[2] <= i_inv_fe_if.dat;
t_val <= 1;
end
if (i_sub_fe_if.val && i_sub_fe_if.rdy) begin
t[1] <= i_sub_fe_if.dat;
t1_sub_val <= 1;
end
// Issue new operations
case (mul_cnt) inside
0: fe_mul(start, t[0], t[0]);
1: fe_mul(1, t[1], t[1]);
2: fe_mul(inv_cnt >= 1 && t_val, t[0], t[2]);
3: fe_mul(t1_sub_val, t[1], t[2]);
endcase
case (add_cnt) inside
0: begin
fe_add(t_val, t[2], t[3]);
if (t_val) t_val <= 0;
end
endcase
case (inv_cnt) inside
0: begin
fe_inv(i_add_fe_if.val, i_add_fe_if.dat);
end
endcase
case (sub_cnt) inside
0: begin
fe_sub(add_cnt >= 1, 0, t[1]);
end
endcase
// Final output flow
if (~o_inv_fe2_if.val || (o_inv_fe2_if.val && o_inv_fe2_if.rdy)) begin
o_inv_fe2_if.sop <= out_cnt == 0;
o_inv_fe2_if.eop <= out_cnt == 1;
case (out_cnt) inside
0: begin
o_inv_fe2_if.dat <= i_mul_fe_if.dat;
if (i_mul_fe_if.val && i_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] == 2) begin
o_inv_fe2_if.val <= 1;
out_cnt <= out_cnt + 1;
end
end
1: begin
o_inv_fe2_if.dat <= i_mul_fe_if.dat;
if (i_mul_fe_if.val) begin
o_inv_fe2_if.val <= 1;
out_cnt <= out_cnt + 1;
end
end
default: begin
t <= 0;
inv_cnt <= 0;
mul_cnt <= 0;
add_cnt <= 0;
sub_cnt <= 0;
out_cnt <= 0;
start <= 0;
t_val <= 0;
t1_sub_val <= 0;
end
endcase
end
end
end
// Task for fe_mul
task fe_mul(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b);
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
o_mul_fe_if.sop <= 1;
o_mul_fe_if.eop <= 1;
o_mul_fe_if.dat <= {b, a};
o_mul_fe_if.val <= val;
o_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= mul_cnt;
if(val) mul_cnt <= mul_cnt + 1;
end
endtask
// Task for fe_add
task fe_add(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b);
if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin
o_add_fe_if.sop <= 1;
o_add_fe_if.eop <= 1;
o_add_fe_if.dat <= {b, a};
o_add_fe_if.val <= val;
o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= add_cnt;
if(val) add_cnt <= add_cnt + 1;
end
endtask
// Task for fe_sub
task fe_sub(input logic val, input logic [$bits(FE_TYPE)-1:0] a, b);
if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin
o_sub_fe_if.sop <= 1;
o_sub_fe_if.eop <= 1;
o_sub_fe_if.dat <= {b, a};
o_sub_fe_if.val <= val;
o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= sub_cnt;
if(val) sub_cnt <= sub_cnt + 1;
end
endtask
// Task for fe_inv
task fe_inv(input logic val, input logic [$bits(FE_TYPE)-1:0] a);
if (~o_inv_fe_if.val || (o_inv_fe_if.val && o_inv_fe_if.rdy)) begin
o_inv_fe_if.sop <= 1;
o_inv_fe_if.eop <= 1;
o_inv_fe_if.dat <= a;
o_inv_fe_if.val <= val;
o_inv_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT] <= inv_cnt;
if(val) inv_cnt <= inv_cnt + 1;
end
endtask
endmodule

View File

@ -0,0 +1,418 @@
/*
This provides the interface to perform
Fp^6 inverse
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fe6_inv_s
#(
parameter type FE_TYPE,
parameter type FE2_TYPE,
parameter OVR_WRT_BIT = 8 // From this bit 5 bits are used for internal control
)(
input i_clk, i_rst,
// Interface to FE2_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE2_TYPE adder (mod P)
if_axi_stream.source o_add_fe_if,
if_axi_stream.sink i_add_fe_if,
// Interface to FE2_TYPE subtractor (mod P)
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if,
// Interface to FE2_TYPE multiply by non-residue
if_axi_stream.source o_mnr_fe2_if,
if_axi_stream.sink i_mnr_fe2_if,
// Interface to FE2_TYPE inverse (mod P)
if_axi_stream.source o_inv_fe2_if,
if_axi_stream.sink i_inv_fe2_if,
// Interface to FE6_TYPE inverse (mod P)
if_axi_stream.source o_inv_fe6_if,
if_axi_stream.sink i_inv_fe6_if
);
localparam NUM_OVR_WRT_BIT = 5;
// Multiplications are calculated using the formula in bls12_381.pkg::fe6_inv()
FE2_TYPE [5:0] t;
FE2_TYPE [2:0] a;
logic [21:0] eq_val, eq_wait;
logic mul_cnt, add_cnt, sub_cnt, mnr_cnt, inv_cnt;
logic mul_en, add_en, sub_en, mnr_en, inv_en;
logic [4:0] nxt_fe2_mul, nxt_fe2_mnr, nxt_fe_add, nxt_fe_sub, nxt_fe2_inv;
logic [2:0] out_cnt;
logic rdy_l;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_inv_fe6_if.reset_source();
o_mnr_fe2_if.reset_source();
o_mul_fe2_if.reset_source();
o_inv_fe2_if.reset_source();
o_sub_fe_if.reset_source();
o_add_fe_if.reset_source();
i_inv_fe6_if.rdy <= 0;
i_mul_fe2_if.rdy <= 0;
i_sub_fe_if.rdy <= 0;
i_add_fe_if.rdy <= 0;
i_mnr_fe2_if.rdy <= 0;
i_inv_fe2_if.rdy <= 0;
eq_val <= 0;
eq_wait <= 0;
rdy_l <= 0;
t <= 0;
a <= 0;
{out_cnt, mul_cnt, add_cnt, sub_cnt, mnr_cnt, inv_cnt} <= 0;
{nxt_fe2_mul, nxt_fe2_mnr, nxt_fe_add, nxt_fe_sub, nxt_fe2_inv} <= 0;
{mul_en, add_en, sub_en, mnr_en, inv_en} <= 0;
end else begin
i_mul_fe2_if.rdy <= 1;
i_inv_fe2_if.rdy <= 1;
i_sub_fe_if.rdy <= 1;
i_add_fe_if.rdy <= 1;
i_mnr_fe2_if.rdy <= 1;
if (o_inv_fe6_if.rdy) o_inv_fe6_if.val <= 0;
if (o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0;
if (o_sub_fe_if.rdy) o_sub_fe_if.val <= 0;
if (o_add_fe_if.rdy) o_add_fe_if.val <= 0;
if (o_mnr_fe2_if.rdy) o_mnr_fe2_if.val <= 0;
if (o_inv_fe2_if.rdy) o_inv_fe2_if.val <= 0;
if (~sub_en) get_next_sub();
if (~add_en) get_next_add();
if (~mul_en) get_next_fe2_mul();
if (~mnr_en) get_next_fe2_mnr();
if (~inv_en) get_next_fe2_inv();
if (rdy_l == 0) i_inv_fe6_if.rdy <= 1;
if (~o_inv_fe6_if.val || (o_inv_fe6_if.val && o_inv_fe6_if.rdy)) begin
o_inv_fe6_if.sop <= out_cnt == 0;
o_inv_fe6_if.eop <= out_cnt == 5;
if (eq_val[19] && out_cnt/2 == 0) begin
o_inv_fe6_if.val <= 1;
out_cnt <= out_cnt + 1;
o_inv_fe6_if.dat <= t[3][out_cnt%2];
end
if (eq_val[20] && out_cnt/2 == 1) begin
o_inv_fe6_if.val <= 1;
out_cnt <= out_cnt + 1;
o_inv_fe6_if.dat <= t[4][out_cnt%2];
end
if (eq_val[21] && out_cnt/2 == 2) begin
o_inv_fe6_if.val <= 1;
out_cnt <= out_cnt + 1;
o_inv_fe6_if.dat <= t[5][out_cnt%2];
end
if (out_cnt == 5) begin
eq_val <= 0;
eq_wait <= 0;
rdy_l <= 0;
t <= 0;
a <= 0;
{out_cnt, mul_cnt, add_cnt, sub_cnt, inv_cnt} <= 0;
{nxt_fe2_mul, nxt_fe_add, nxt_fe_sub, nxt_fe2_mnr, nxt_fe2_inv} <= 0;
{mul_en, add_en, sub_en, mnr_en, inv_en} <= 0;
end
end
// Latch input
if (i_inv_fe6_if.rdy && i_inv_fe6_if.val) begin
a <= {i_inv_fe6_if.dat, a[2:1], a[0][1]};
if (i_inv_fe6_if.eop) begin
i_inv_fe6_if.rdy <= 0;
rdy_l <= 1;
o_inv_fe6_if.ctl <= i_inv_fe6_if.ctl;
end
end
// Check any results from multiplier
if (i_mul_fe2_if.val && i_mul_fe2_if.rdy) begin
if (i_mul_fe2_if.eop) eq_val[i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
1: t[3][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
3: t[0][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
5: t[4][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
7: t[2][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
9: t[5][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
10: t[2][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
12: t[0][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
13: t[1][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
16: t[0][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
19: t[3][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
20: t[4][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
21: t[5][i_mul_fe2_if.eop] <= i_mul_fe2_if.dat;
default: o_inv_fe6_if.err <= 1;
endcase
end
// Check any results from mnr
if (i_mnr_fe2_if.val && i_mnr_fe2_if.rdy) begin
if(i_mnr_fe2_if.eop) eq_val[i_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
0: t[3][i_mnr_fe2_if.eop] <= i_mnr_fe2_if.dat;
6: t[4][i_mnr_fe2_if.eop] <= i_mnr_fe2_if.dat;
15: t[1][i_mnr_fe2_if.eop] <= i_mnr_fe2_if.dat;
default: o_inv_fe6_if.err <= 1;
endcase
end
// Check any results from sub
if (i_sub_fe_if.val && i_sub_fe_if.rdy) begin
if(i_sub_fe_if.eop) eq_val[i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
2: t[3][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
8: t[4][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
11: t[5][i_sub_fe_if.eop] <= i_sub_fe_if.dat;
default: o_inv_fe6_if.err <= 1;
endcase
end
// Check any results from add
if (i_add_fe_if.val && i_add_fe_if.rdy) begin
if (i_add_fe_if.eop) eq_val[i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
4: t[3][i_add_fe_if.eop] <= i_add_fe_if.dat;
14: t[1][i_add_fe_if.eop] <= i_add_fe_if.dat;
17: t[1][i_add_fe_if.eop] <= i_add_fe_if.dat;
default: o_inv_fe6_if.err <= 1;
endcase
end
// Check any results from inv_fe2
if (i_inv_fe2_if.val && i_inv_fe2_if.rdy) begin
if (i_inv_fe2_if.eop) eq_val[i_inv_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_inv_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
18: t[1][i_inv_fe2_if.eop] <= i_inv_fe2_if.dat;
default: o_inv_fe6_if.err <= 1;
endcase
end
// Issue new multiplies
if (mul_en)
case(nxt_fe2_mul)
1: fe2_multiply(1, t[3], a[1]);
3: fe2_multiply(3, a[0], a[0]);
5: fe2_multiply(5, a[2], a[2]);
7: fe2_multiply(7, a[0], a[1]);
9: fe2_multiply(9, a[1], a[1]);
10: fe2_multiply(10, a[2], a[0]);
12: fe2_multiply(12, a[2], t[4]);
13: fe2_multiply(13, a[1], t[5]);
16: fe2_multiply(16, a[0], t[3]);
19: fe2_multiply(19, t[3], t[1]);
20: fe2_multiply(20, t[4], t[1]);
21: fe2_multiply(21, t[5], t[1]);
endcase
// Issue new adds
if (add_en)
case(nxt_fe_add)
4: fe2_addition(4, t[0], t[3]);
14: fe2_addition(14, t[0], t[1]);
17: fe2_addition(17, t[1], t[0]);
endcase
// Issue new sub
if (sub_en)
case(nxt_fe_sub)
2: fe2_subtraction(2, 0, t[3]);
8: fe2_subtraction(8, t[4], t[2]);
11: fe2_subtraction(11, t[5], t[2]);
endcase
// Issue new mnr
if (mnr_en)
case(nxt_fe2_mnr)
0: fe2_mnr(0, a[2]);
6: fe2_mnr(6, t[4]);
15: fe2_mnr(15, t[1]);
endcase
// Issue new inv
if (inv_en)
fe2_inv(18, t[1]);
end
end
// Task for subtractions
task fe2_subtraction(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin
o_sub_fe_if.val <= 1;
o_sub_fe_if.sop <= sub_cnt == 0;
o_sub_fe_if.eop <= sub_cnt == 1;
o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= a[sub_cnt];
o_sub_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[sub_cnt];
o_sub_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (sub_cnt == 1) begin
get_next_sub();
end
sub_cnt <= sub_cnt + 1;
end
endtask
// Task for addition
task fe2_addition(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin
o_add_fe_if.val <= 1;
o_add_fe_if.sop <= add_cnt == 0;
o_add_fe_if.eop <= add_cnt == 1;
o_add_fe_if.dat[0 +: $bits(FE_TYPE)] <= a[add_cnt];
o_add_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[add_cnt];
o_add_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (add_cnt == 1) begin
get_next_add();
end
add_cnt <= add_cnt + 1;
end
endtask
// Task for using mult
task fe2_multiply(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.val <= 1;
o_mul_fe2_if.sop <= mul_cnt == 0;
o_mul_fe2_if.eop <= mul_cnt == 1;
o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= a[mul_cnt];
o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b[mul_cnt];
o_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (mul_cnt == 1) begin
get_next_fe2_mul();
end
mul_cnt <= mul_cnt + 1;
end
endtask
// Task for using mnr
task fe2_mnr(input int unsigned ctl, input FE2_TYPE a);
if (~o_mnr_fe2_if.val || (o_mnr_fe2_if.val && o_mnr_fe2_if.rdy)) begin
o_mnr_fe2_if.val <= 1;
o_mnr_fe2_if.sop <= mnr_cnt == 0;
o_mnr_fe2_if.eop <= mnr_cnt == 1;
o_mnr_fe2_if.dat <= a[mnr_cnt];
o_mnr_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (mnr_cnt == 1) begin
get_next_fe2_mnr();
end
mnr_cnt <= mnr_cnt + 1;
end
endtask
// Task for using inv
task fe2_inv(input int unsigned ctl, input FE2_TYPE a);
if (~o_inv_fe2_if.val || (o_inv_fe2_if.val && o_inv_fe2_if.rdy)) begin
o_inv_fe2_if.val <= 1;
o_inv_fe2_if.sop <= inv_cnt == 0;
o_inv_fe2_if.eop <= inv_cnt == 1;
o_inv_fe2_if.dat <= a[inv_cnt];
o_inv_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
if (inv_cnt == 1) begin
get_next_fe2_inv();
end
inv_cnt <= inv_cnt + 1;
end
endtask
task get_next_fe2_mul();
mul_en <= 1;
if(~eq_wait[1] && eq_val[0])
nxt_fe2_mul <= 1;
else if(~eq_wait[3] && rdy_l)
nxt_fe2_mul <= 3;
else if(~eq_wait[5] && rdy_l)
nxt_fe2_mul <= 5;
else if(~eq_wait[7] && rdy_l)
nxt_fe2_mul <= 7;
else if(~eq_wait[9] && rdy_l)
nxt_fe2_mul <= 9;
else if(~eq_wait[10] && eq_wait[8] && rdy_l)
nxt_fe2_mul <= 10;
else if(~eq_wait[12] && eq_val[8] && eq_wait[4])
nxt_fe2_mul <= 12;
else if(~eq_wait[13] && eq_val[11])
nxt_fe2_mul <= 13;
else if(~eq_wait[16] && eq_val[4] && eq_wait[14])
nxt_fe2_mul <= 16;
else if(~eq_wait[19] && eq_val[4] && eq_val[18])
nxt_fe2_mul <= 19;
else if(~eq_wait[20] && eq_val[8] && eq_val[18])
nxt_fe2_mul <= 20;
else if(~eq_wait[21] && eq_val[11] && eq_val[18])
nxt_fe2_mul <= 21;
else
mul_en <= 0;
endtask
task get_next_add();
add_en <= 1;
if(~eq_wait[4] && eq_val[2] && eq_val[3])
nxt_fe_add <= 4;
else if(~eq_wait[14] && eq_val[12] && eq_val[13])
nxt_fe_add <= 14;
else if(~eq_wait[17] && eq_val[16] && eq_val[15])
nxt_fe_add <= 17;
else
add_en <= 0;
endtask
task get_next_sub();
sub_en <= 1;
if(~eq_wait[2] && eq_val[1])
nxt_fe_sub <= 2;
else if(~eq_wait[8] && eq_val[6] && eq_val[7])
nxt_fe_sub <= 8;
else if(~eq_wait[11] && eq_val[9] && eq_val[10])
nxt_fe_sub <= 11;
else
sub_en <= 0;
endtask
task get_next_fe2_mnr();
mnr_en <= 1;
if(~eq_wait[0] && rdy_l)
nxt_fe2_mnr <= 0;
else if(~eq_wait[6] && eq_val[5])
nxt_fe2_mnr <= 6;
else if(~eq_wait[15] && eq_val[14])
nxt_fe2_mnr <= 15;
else
mnr_en <= 0;
endtask
task get_next_fe2_inv();
inv_en <= 1;
if(~eq_wait[18] && eq_val[17])
inv_en <= 1;
else
inv_en <= 0;
endtask
endmodule

View File

@ -23,7 +23,6 @@ module ec_fe6_mul_s
parameter type FE_TYPE,
parameter type FE2_TYPE,
parameter type FE6_TYPE,
parameter CTL_BITS = 14,
parameter OVR_WRT_BIT = 8 // From this bit 4 bits are used for internal control, 2 bits for resource sharing - 6 total
)(
input i_clk, i_rst,

View File

@ -0,0 +1,268 @@
/*
Calculates inversion mod p using binary gcd algorithm.
Streaming version with internal adder and sub module to improve
critical path.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bin_inv_s #(
parameter P,
parameter BITS = $clog2(P),
parameter LEVEL = 1 // Pipelines when adding / subtracting / comparing
)(
input i_clk,
input i_rst,
if_axi_stream.source o_dat_if,
if_axi_stream.sink i_dat_if
);
logic [BITS:0] x1, x2, u, v;
logic wait_add;
logic [1:0] wait_sub;
logic sub_out;
if_axi_stream #(.DAT_BYTS(2*(BITS+8)/8), .DAT_BITS(2*(BITS+1)), .CTL_BITS(1)) add_i_if (i_clk);
if_axi_stream #(.DAT_BYTS((BITS+8)/8), .DAT_BITS(BITS+1), .CTL_BITS(1)) add_o_if (i_clk);
if_axi_stream #(.DAT_BYTS(2*(BITS+8)/8), .DAT_BITS(2*(BITS+1)), .CTL_BITS(1)) sub_i_if (i_clk);
if_axi_stream #(.DAT_BYTS((BITS+8)/8), .DAT_BITS(BITS+1), .CTL_BITS(1)) sub_o_if (i_clk);
enum {IDLE,
U_STATE,
V_STATE,
UPDATE_X1,
UPDATE_X2,
FINISHED} state;
always_comb begin
add_i_if.dat = 0;
add_i_if.dat[BITS+1 +: BITS+1] = P;
add_i_if.dat[0 +: BITS+1] = (state == U_STATE) ? x1 : x2;
add_i_if.sop = 0;
add_i_if.eop = 0;
add_i_if.err = 0;
add_i_if.mod = 0;
add_i_if.ctl = 0;
o_dat_if.sop = 1;
o_dat_if.eop = 1;
o_dat_if.err = 0;
o_dat_if.mod = 0;
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
x1 <= 0;
x2 <= 0;
u <= 0;
v <= 0;
i_dat_if.rdy <= 0;
o_dat_if.val <= 0;
o_dat_if.dat <= 0;
o_dat_if.ctl <= 0;
state <= IDLE;
add_i_if.val <= 0;
add_o_if.rdy <= 0;
sub_i_if.reset_source();
sub_o_if.rdy <= 0;
wait_add <= 0;
wait_sub <= 0;
sub_out <= 0;
end else begin
if (o_dat_if.rdy) o_dat_if.val <= 0;
if (add_i_if.rdy) add_i_if.val <= 0;
if (sub_i_if.rdy) sub_i_if.val <= 0;
add_o_if.rdy <= 1;
sub_o_if.rdy <= 1;
case(state)
IDLE: begin
i_dat_if.rdy <= 1;
if (i_dat_if.val && i_dat_if.rdy) begin
i_dat_if.rdy <= 0;
u <= i_dat_if.dat;
o_dat_if.ctl <= i_dat_if.ctl;
v <= P;
x1 <= 1;
x2 <= 0;
state <= U_STATE;
end
end
U_STATE: begin
if (~wait_add) begin
if (u % 2 == 1) begin
state <= (v % 2 == 1) ? (u >= v) ? UPDATE_X1 : UPDATE_X2 : V_STATE;
end else begin
u <= u/2;
if (x1 % 2 == 0) begin
x1 <= x1/2;
if ((u/2) % 2 == 1) state <= (v % 2 == 1) ? (u/2 >= v) ? UPDATE_X1 : UPDATE_X2 : V_STATE;
end else begin
wait_add <= 1;
add_i_if.val <= 1;
end
end
end else begin
if (add_o_if.val && add_o_if.rdy) begin
x1 <= add_o_if.dat/2;
wait_add <= 0;
if (u % 2 == 1) state <= (v % 2 == 1) ? (u >= v) ? UPDATE_X1 : UPDATE_X2 : V_STATE;
end
end
end
V_STATE: begin
if (~wait_add) begin
if (v % 2 == 1) begin
state <= (u >= v) ? UPDATE_X1 : UPDATE_X2;
end else begin
v <= v/2;
if (x2 % 2 == 0) begin
x2 <= x2/2;
if ((v/2) % 2 == 1) state <= (u >= v/2) ? UPDATE_X1 : UPDATE_X2;
end else begin
wait_add <= 1;
add_i_if.val <= 1;
end
end
end else begin
if (add_o_if.val && add_o_if.rdy) begin
x2 <= add_o_if.dat/2;
wait_add <= 0;
if (v % 2 == 1) state <= (u >= v) ? UPDATE_X1 : UPDATE_X2;
end
end
end
UPDATE_X1: begin
case(wait_sub)
0: begin //u <= u - v;
sub_i_if.dat[0 +: BITS+1] <= u;
sub_i_if.dat[BITS+1 +: BITS+1] <= v;
sub_i_if.val <= 1;
wait_sub <= wait_sub + 1;
end
1: begin
sub_i_if.dat[0 +: BITS+1] <= x1;
sub_i_if.dat[BITS+1 +: BITS+1] <= x2;
sub_i_if.val <= 1;
wait_sub <= wait_sub + 1;
end
2: begin
// Wait
end
endcase
if (sub_o_if.val && sub_o_if.rdy) begin
sub_out <= sub_out + 1;
case(sub_out)
0: begin
u <= sub_o_if.dat;
end
1: begin
x1 <= sub_o_if.dat;
wait_sub <= 0;
if (u == 1 || v == 1)
state <= FINISHED;
else
state <= (u % 2 == 1) ? (v % 2 == 1) ? (u >= v) ? UPDATE_X1 : UPDATE_X2 : V_STATE : U_STATE;
end
endcase
end
end
UPDATE_X2: begin
case(wait_sub)
0: begin
sub_i_if.dat[0 +: BITS+1] <= v;
sub_i_if.dat[BITS+1 +: BITS+1] <= u;
sub_i_if.val <= 1;
wait_sub <= wait_sub + 1;
end
1: begin
sub_i_if.dat[0 +: BITS+1] <= x2;
sub_i_if.dat[BITS+1 +: BITS+1] <= x1;
sub_i_if.val <= 1;
wait_sub <= wait_sub + 1;
end
2: begin
// Wait
end
endcase
if (sub_o_if.val && sub_o_if.rdy) begin
sub_out <= sub_out + 1;
case(sub_out)
0: begin
v <= sub_o_if.dat;
end
1: begin
wait_sub <= 0;
x2 <= sub_o_if.dat;
if (u == 1 || v == 1)
state <= FINISHED;
else
state <= (u % 2 == 1) ? (v % 2 == 1) ? (u >= v) ? UPDATE_X1 : UPDATE_X2 : V_STATE : U_STATE;
end
endcase
end
end
FINISHED: begin
o_dat_if.val <= 1;
o_dat_if.dat <= (u == 1) ? x1 : x2;
if (o_dat_if.val && o_dat_if.rdy) begin
o_dat_if.val <= 0;
i_dat_if.rdy <= 1;
state <= IDLE;
end
end
endcase
end
end
// Adder does not use modulus
adder_pipe # (
.P ( 0 ),
.BITS ( BITS+1 ),
.CTL_BITS ( 1 ),
.LEVEL ( LEVEL )
)
adder_pipe (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_add ( add_i_if ),
.o_add ( add_o_if )
);
subtractor_pipe # (
.P ( P ),
.BITS ( BITS+1 ),
.CTL_BITS ( 1 ),
.LEVEL ( LEVEL )
)
subtractor_pipe (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_sub ( sub_i_if ),
.o_sub ( sub_o_if )
);
endmodule

View File

@ -0,0 +1,313 @@
/*
This does the Fp12 inversion required in the final exponentiation.
Input is expected to be streamed in with Fp .c0 in the first clock cycle
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_fe12_inv_wrapper
import bls12_381_pkg::*;
#(
parameter type FE_TYPE = fe_t,
parameter type FE2_TYPE = fe2_t,
parameter type FE6_TYPE = fe6_t,
parameter CTL_BITS = 12,
parameter OVR_WRT_BIT = 8 // Need 32 bits for control
)(
input i_clk, i_rst,
// Input/Output interfaces for inversion result, FE_TYPE data width
if_axi_stream.source o_inv_fe12_if,
if_axi_stream.sink i_inv_fe12_if,
// Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if
);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if [1:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if [1:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if [5:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if [5:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [6:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [6:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_o_if [3:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_i_if [3:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe6_o_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe6_i_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_o_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_i_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) inv_fe_o_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) inv_fe_i_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) inv_fe2_o_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) inv_fe2_i_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) inv_fe6_o_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) inv_fe6_i_if (i_clk);
bin_inv_s #(
.P ( bls12_381_pkg::P ),
.LEVEL ( 2 )
)
bin_inv_s (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_dat_if ( inv_fe_i_if ),
.i_dat_if ( inv_fe_o_if )
);
ec_fe2_inv_s #(
.FE_TYPE ( FE_TYPE ),
.OVR_WRT_BIT ( OVR_WRT_BIT )
)
ec_fe2_inv_s(
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_inv_fe2_if ( inv_fe2_i_if ),
.i_inv_fe2_if ( inv_fe2_o_if ),
.o_inv_fe_if ( inv_fe_o_if ),
.i_inv_fe_if ( inv_fe_i_if ), //
.o_mul_fe_if ( mul_fe_o_if[0] ),
.i_mul_fe_if ( mul_fe_i_if[0] ),
.o_add_fe_if ( add_fe_o_if[0] ),
.i_add_fe_if ( add_fe_i_if[0] ),
.o_sub_fe_if ( sub_fe_o_if[0] ),
.i_sub_fe_if ( sub_fe_i_if[0] )
);
ec_fe2_mul_s #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS )
)
ec_fe2_mul_s (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_mul_fe2_if ( mul_fe2_i_if[2] ),
.i_mul_fe2_if ( mul_fe2_o_if[2] ),
.o_add_fe_if ( add_fe_o_if[1] ),
.i_add_fe_if ( add_fe_i_if[1] ),
.o_sub_fe_if ( sub_fe_o_if[1] ),
.i_sub_fe_if ( sub_fe_i_if[1] ),
.o_mul_fe_if ( mul_fe_o_if[1] ),
.i_mul_fe_if ( mul_fe_i_if[1] )
);
fe2_mul_by_nonresidue_s #(
.FE_TYPE ( FE_TYPE )
)
fe2_mul_by_nonresidue_s (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_mnr_fe2_if ( mnr_fe2_i_if[3] ),
.i_mnr_fe2_if ( mnr_fe2_o_if[3] ),
.o_add_fe_if ( add_fe_o_if[2] ),
.i_add_fe_if ( add_fe_i_if[2] ),
.o_sub_fe_if ( sub_fe_o_if[2] ),
.i_sub_fe_if ( sub_fe_i_if[2] )
);
ec_fe6_inv_s
#(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 2 )
)
ec_fe6_inv_s (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_mul_fe2_if ( mul_fe2_o_if[0] ),
.i_mul_fe2_if ( mul_fe2_i_if[0] ),
.o_add_fe_if ( add_fe_o_if[3] ),
.i_add_fe_if ( add_fe_i_if[3] ),
.o_sub_fe_if ( sub_fe_o_if[3] ),
.i_sub_fe_if ( sub_fe_i_if[3] ),
.o_mnr_fe2_if ( mnr_fe2_o_if[0] ),
.i_mnr_fe2_if ( mnr_fe2_i_if[0] ),
.o_inv_fe2_if ( inv_fe2_o_if ),
.i_inv_fe2_if ( inv_fe2_i_if ),
.o_inv_fe6_if ( inv_fe6_i_if ),
.i_inv_fe6_if ( inv_fe6_o_if )
);
ec_fe6_mul_s #(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.FE6_TYPE ( FE6_TYPE ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 7 )
)
ec_fe6_mul_s (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_mul_fe2_if ( mul_fe2_o_if[1] ),
.i_mul_fe2_if ( mul_fe2_i_if[1] ),
.o_add_fe_if ( add_fe_o_if[4] ),
.i_add_fe_if ( add_fe_i_if[4] ),
.o_sub_fe_if ( sub_fe_o_if[4] ),
.i_sub_fe_if ( sub_fe_i_if[4] ),
.o_mnr_fe2_if ( mnr_fe2_o_if[1] ),
.i_mnr_fe2_if ( mnr_fe2_i_if[1] ),
.o_mul_fe6_if ( mul_fe6_i_if ),
.i_mul_fe6_if ( mul_fe6_o_if )
);
fe6_mul_by_nonresidue_s #(
.FE_TYPE ( FE_TYPE )
)
fe6_mul_by_nonresidue_s (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_mnr_fe2_if ( mnr_fe2_o_if[2] ),
.i_mnr_fe2_if ( mnr_fe2_i_if[2] ),
.o_mnr_fe6_if ( mnr_fe6_i_if ),
.i_mnr_fe6_if ( mnr_fe6_o_if )
);
ec_fe12_inv_s #(
.FE_TYPE ( FE_TYPE ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 14 )
)
ec_fe12_inv_s (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_mul_fe6_if ( mul_fe6_o_if ),
.i_mul_fe6_if ( mul_fe6_i_if ),
.o_sub_fe_if ( sub_fe_o_if[5] ),
.i_sub_fe_if ( sub_fe_i_if[5] ),
.o_mnr_fe6_if ( mnr_fe6_o_if ),
.i_mnr_fe6_if ( mnr_fe6_i_if ),
.o_inv_fe6_if ( inv_fe6_o_if ),
.i_inv_fe6_if ( inv_fe6_i_if ),
.o_inv_fe12_if ( o_inv_fe12_if ),
.i_inv_fe12_if ( i_inv_fe12_if )
);
adder_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( bls12_381_pkg::P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_add ( add_fe_o_if[5] ),
.o_add ( add_fe_i_if[5] )
);
subtractor_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( bls12_381_pkg::P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_sub ( sub_fe_o_if[6] ),
.o_sub ( sub_fe_i_if[6] )
);
resource_share # (
.NUM_IN ( 5 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 18 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( add_fe_o_if[4:0] ),
.o_res ( add_fe_o_if[5] ),
.i_res ( add_fe_i_if[5] ),
.o_axi ( add_fe_i_if[4:0] )
);
resource_share # (
.NUM_IN ( 6 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 18 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe_sub (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( sub_fe_o_if[5:0] ),
.o_res ( sub_fe_o_if[6] ),
.i_res ( sub_fe_i_if[6] ),
.o_axi ( sub_fe_i_if[5:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 18 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe_mul (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( mul_fe_o_if[1:0] ),
.o_res ( o_mul_fe_if ),
.i_res ( i_mul_fe_if ),
.o_axi ( mul_fe_i_if[1:0] )
);
resource_share # (
.NUM_IN ( 3 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 24 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe2_mnr (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( mnr_fe2_o_if[2:0] ),
.o_res ( mnr_fe2_o_if[3] ),
.i_res ( mnr_fe2_i_if[3] ),
.o_axi ( mnr_fe2_i_if[2:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 24 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe2_mul (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( mul_fe2_o_if[1:0] ),
.o_res ( mul_fe2_o_if[2] ),
.i_res ( mul_fe2_i_if[2] ),
.o_axi ( mul_fe2_i_if[1:0] )
);
endmodule

View File

@ -39,21 +39,15 @@ module bls12_381_pairing_wrapper
if_axi_stream.source o_fe12_if,
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if,
// Interface to FE_TYPE adder (mod P)
if_axi_stream.source o_add_fe_if,
if_axi_stream.sink i_add_fe_if,
// Interface to FE_TYPE subtractor (mod P)
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if
if_axi_stream.sink i_mul_fe_if
);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if [1:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if [1:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if [4:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if [4:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [4:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [4:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if [5:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if [5:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [5:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [5:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if [2:0] (i_clk);
@ -75,7 +69,7 @@ bls12_381_pairing #(
.FE12_TYPE ( FE12_TYPE ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 0 ),// 0 to 15
.SQ_BIT ( OVR_WRT_BIT + 2 )
.SQ_BIT ( OVR_WRT_BIT + 2 )
)
bls12_381_pairing (
.i_clk ( i_clk ),
@ -165,7 +159,7 @@ fe6_mul_by_nonresidue_s (
ec_fe12_mul_s #(
.FE_TYPE ( FE_TYPE ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 20 ), // 20 to 23
.SQ_BIT ( OVR_WRT_BIT + 2 )
.SQ_BIT ( OVR_WRT_BIT + 2 )
)
ec_fe12_mul_s (
.i_clk ( i_clk ),
@ -182,6 +176,32 @@ ec_fe12_mul_s (
.i_mul_fe12_if ( mul_fe12_o_if )
);
adder_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( bls12_381_pkg::P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_add ( add_fe_o_if[5] ),
.o_add ( add_fe_i_if[5] )
);
subtractor_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( bls12_381_pkg::P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_sub ( sub_fe_o_if[5] ),
.o_sub ( sub_fe_i_if[5] )
);
resource_share # (
.NUM_IN ( 5 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
@ -194,8 +214,8 @@ resource_share_fe_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( add_fe_o_if[4:0] ),
.o_res ( o_add_fe_if ),
.i_res ( i_add_fe_if ),
.o_res ( add_fe_o_if[5] ),
.i_res ( add_fe_i_if[5] ),
.o_axi ( add_fe_i_if[4:0] )
);
@ -211,8 +231,8 @@ resource_share_fe_sub (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( sub_fe_o_if[4:0] ),
.o_res ( o_sub_fe_if ),
.i_res ( i_sub_fe_if ),
.o_res ( sub_fe_o_if[5] ),
.i_res ( sub_fe_i_if[5] ),
.o_axi ( sub_fe_i_if[4:0] )
);

View File

@ -467,47 +467,47 @@ package bls12_381_pkg;
endfunction
function fe6_t fe6_inv(fe6_t a);
fe2_t add_i0, add_i1, sub_i0, mul_i0;
fe6_inv[0] = fe2_mul_by_nonresidue(a[2]);
fe6_inv[0] = fe2_mul(fe6_inv[0], a[1]);
fe6_inv[0] = fe2_sub(0, fe6_inv[0]);
add_i0 = fe2_mul(a[0], a[0]);
fe6_inv[0] = fe2_add(add_i0, fe6_inv[0]);
fe2_t t0, t1, t2, t3, t4, t5;
fe6_inv[1] = fe2_mul(a[2], a[2]);
fe6_inv[1] = fe2_mul_by_nonresidue(fe6_inv[1]);
sub_i0 = fe2_mul(a[0], a[1]);
fe6_inv[1] = fe2_sub(fe6_inv[1], sub_i0);
fe6_inv[2] = fe2_mul(a[1], a[1]);
sub_i0 = fe2_mul(a[2], a[0]);
fe6_inv[2] = fe2_sub(fe6_inv[2], sub_i0);
add_i0 = fe2_mul(a[2], fe6_inv[1]);
add_i1 = fe2_mul(a[1], fe6_inv[2]);
add_i1 = fe2_add(add_i0, add_i1);
add_i1 = fe2_mul_by_nonresidue(add_i1);
add_i0 = fe2_mul(a[0], fe6_inv[0]);
add_i1 = fe2_add(add_i1, add_i0);
mul_i0 = fe2_inv(add_i1);
fe6_inv[0] = fe2_mul(fe6_inv[0], mul_i0);
fe6_inv[1] = fe2_mul(fe6_inv[1], mul_i0);
fe6_inv[2] = fe2_mul(fe6_inv[2], mul_i0);
t3 = fe2_mul_by_nonresidue(a[2]); // 0. [a]
t3 = fe2_mul(t3, a[1]); // 1. [0]
t3 = fe2_sub(0, t3); // 2. [1]
t0 = fe2_mul(a[0], a[0]); // 3. [a]
t3 = fe2_add(t0, t3); // 4. [2,3]
t4 = fe2_mul(a[2], a[2]); // 5. [a]
t4 = fe2_mul_by_nonresidue(t4); // 6. [5]
t2 = fe2_mul(a[0], a[1]); // 7. [a]
t4 = fe2_sub(t4, t2); // 8. [6,7]
t5 = fe2_mul(a[1], a[1]); // 9. [a]
t2 = fe2_mul(a[2], a[0]); // 10. [a, wait 8]
t5 = fe2_sub(t5, t2); // 11. [9, 10]
t0 = fe2_mul(a[2], t4); // 12. [8, wait 4]
t1 = fe2_mul(a[1], t5); // 13. [11]
t1 = fe2_add(t0, t1); // 14. [13, 12]
t1 = fe2_mul_by_nonresidue(t1); // 15. [14]
t0 = fe2_mul(a[0], t3); // 16. [4, wait 14]
t1 = fe2_add(t1, t0); // 17. [16, 15]
t1 = fe2_inv(t1); // 18. [17]
t3 = fe2_mul(t3, t1); // 19. [18, 4]
t4 = fe2_mul(t4, t1); // 20. [18, 8]
t5 = fe2_mul(t5, t1); // 21. [18, 11]
fe6_inv = {t5, t4, t3};
endfunction
function fe12_t fe12_inv(fe12_t a);
fe12_t sub_i0, sub_i1, mul_i0;
sub_i0 = fe6_mul(a[0], a[0]);
sub_i1 = fe6_mul(a[1], a[1]);
sub_i1 = fe6_mul_by_nonresidue(sub_i1);
sub_i0 = fe6_sub(sub_i0,sub_i1);
sub_i0 = fe6_inv(sub_i0);
fe12_inv[0] = fe6_mul(a[0], sub_i0);
fe12_inv[1] = fe6_mul(a[1], sub_i0);
fe12_inv[1] = fe6_sub(0, fe12_inv[1]);
fe6_t t0, t1;
t0 = fe6_mul(a[0], a[0]); // 0. [a]
t1 = fe6_mul(a[1], a[1]); // 1. [a]
t1 = fe6_mul_by_nonresidue(t1); // 2. [1]
t0 = fe6_sub(t0, t1); // 3. [0, 2]
t0 = fe6_inv(t0); // 4. [3]
t1 = fe6_mul(a[0], t0); // 5. [4]
t0 = fe6_mul(a[1], t0); // 6. [4, wait 5]
t0 = fe6_sub(0, t0); // 7. [6]
fe12_inv[0] = t1;
fe12_inv[1] = t0;
endfunction
function fe6_t fe6_add(fe6_t a, b);
@ -571,34 +571,34 @@ package bls12_381_pkg;
fe6_t aa, bb;
aa = fe6_mul(a[0], b[0]); // 0. add_i0 = mul(a[0], b[0])
bb = fe6_mul(a[1], b[1]); // 1. bb = mul(a[1], b[1])
fe12_mul[1] = fe6_add(a[1], a[0]); // 2. fe6_mul[1] = add(a[1], a[0])
fe12_mul[0] = fe6_add(b[0], b[1]); // 3. fe6_mul[0] = add(b[0], b[1])
fe12_mul[1] = fe6_mul(fe12_mul[1], fe12_mul[0]); // 4. fe6_mul[1] = mul(fe6_mul[1], fe6_mul[0]) [2, 3]
fe12_mul[1] = fe6_sub(fe12_mul[1], aa); // 5. fe6_mul[1] = sub(fe6_mul[1], add_i0) [4, 0]
fe12_mul[1] = fe6_sub(fe12_mul[1], bb); // 6. fe6_mul[1] = sub(fe6_mul[1], bb) [5, 1]
bb = fe6_mul_by_nonresidue(bb); // 7. bb = mnr(bb) [6]
fe12_mul[0] = fe6_add(bb, aa); // 8. fe6_mul[0] = add(add_i0, bb) [0, 1, 7]
endfunction
function fe12_t fe12_sqr(fe12_t a);
fe6_t ab, c0c1;
ab = fe6_mul(a[0], a[1]); // 0.
c0c1 = fe6_add(a[0], a[1]); // 1. (wait eq0)
fe12_sqr[0] = fe6_mul_by_nonresidue(a[1]);
fe12_sqr[0] = fe6_add(fe12_sqr[0], a[0]);
fe12_sqr[0] = fe6_mul(fe12_sqr[0], c0c1);
fe12_sqr[0] = fe6_sub(fe12_sqr[0], ab);
fe12_sqr[1] = fe6_add(ab, ab);
ab = fe6_mul_by_nonresidue(ab);
fe12_sqr[0] = fe6_sub(fe12_sqr[0], ab);
endfunction
@ -821,10 +821,8 @@ package bls12_381_pkg;
y0 = fe12_mul(r, r);
y1 = fe12_pow(y0, bls_x);
bls_x = bls_x >> 1;
y2 = fe12_pow(y1, bls_x);
bls_x = bls_x << 1;
@ -838,8 +836,6 @@ package bls12_381_pkg;
y2 = fe12_pow(y1, bls_x);
y3 = fe12_pow(y2, bls_x);
y1[1] = fe6_sub(0, y1[1]);
y3 = fe12_mul(y3, y1);

View File

@ -0,0 +1,137 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module bls12_381_fe12_inv_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
parameter type FE_TYPE = bls12_381_pkg::fe_t;
parameter type FE2_TYPE = bls12_381_pkg::fe2_t;
parameter type FE6_TYPE = bls12_381_pkg::fe6_t;
parameter type FE12_TYPE = bls12_381_pkg::fe12_t;
parameter P = bls12_381_pkg::P;
localparam CTL_BITS = 64;
localparam CLK_PERIOD = 100;
logic clk, rst;
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #(CLK_PERIOD/2) clk = ~clk;
end
if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) i_inv_fe12_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) o_inv_fe12_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if(clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if(clk);
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( CTL_BITS )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_mul ( mul_fe_o_if ),
.o_mul ( mul_fe_i_if )
);
bls12_381_fe12_inv_wrapper #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 0 )
)
bls12_381_fe12_inv_wrapper (
.i_clk ( clk ),
.i_rst ( rst ),
.o_inv_fe12_if ( i_inv_fe12_if ),
.i_inv_fe12_if ( o_inv_fe12_if ),
.o_mul_fe_if ( mul_fe_o_if ),
.i_mul_fe_if ( mul_fe_i_if )
);
task test();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] dat_in, get_dat;
integer start_time, finish_time;
FE12_TYPE f_in, f_out, f_exp;
$display("Running test ...");
for (int lp = 0; lp < 10; lp++) begin
$display("Loop %d", lp);
dat_in = 0;
for (int i = 0; i < 2; i++)
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++) begin
f_in[i][j][k] = random_vector(384/8) % P;
dat_in[(i*6+j*2+k)*384 +: $bits(FE_TYPE)] = {f_in[i][j][k]};
end
f_exp = fe12_inv(f_in);
start_time = $time;
fork
o_inv_fe12_if.put_stream(dat_in, 12*384/8);
i_inv_fe12_if.get_stream(get_dat, get_len);
join
finish_time = $time;
for (int i = 0; i < 2; i++)
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++)
f_out[i][j][k] = get_dat[(i*6+j*2+k)*384 +: $bits(FE_TYPE)];
$display("test finished in %d clocks", (finish_time-start_time)/(CLK_PERIOD));
if (f_exp != f_out) begin
$fatal(1, "%m %t ERROR: output was wrong", $time);
end
if (fe12_mul(f_out, f_in) != FE12_one) begin
$fatal(1, "%m %t ERROR: output did not reduce to one", $time);
end
end
$display("all tests PASSED");
end
endtask;
initial begin
o_inv_fe12_if.reset_source();
i_inv_fe12_if.rdy = 0;
#100ns;
test();
#1us $finish();
end
endmodule

View File

@ -92,11 +92,11 @@ subtractor_pipe (
.o_sub ( sub_fe_i_if )
);
ec_fe2_mul #(
ec_fe2_mul_s #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS )
)
ec_fe2_mul (
ec_fe2_mul_s (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe2_if ( mul_fe2_i_if ),