Add files for calculating the Frobenius map used in ate pairing.

This commit is contained in:
bsdevlin 2019-08-01 20:24:46 +08:00
parent 3b4693d407
commit f8371eba2e
7 changed files with 963 additions and 27 deletions

View File

@ -0,0 +1,164 @@
/*
This provides the interface to perform Fp2 field element mul. Using karabusta algorithm.
Inputs must be interleaved starting at c0 (i.e. clock 0 = {b.c0, a.c0})
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fe2_mul
#(
parameter type FE_TYPE, // Base field element type
parameter CTL_BITS = 12
)(
input i_clk, i_rst,
// Interface to FE(P)_TYPE adder (mod P) 2*FE_TYPE data width
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE_TYPE mul (mod P) 2*FE_TYPE data width
if_axi_stream.source o_add_fe_if,
if_axi_stream.sink i_add_fe_if,
// Interface to FE_TYPE mul (mod P) 2*FE_TYPE data width
if_axi_stream.source o_sub_fe_if,
if_axi_stream.sink i_sub_fe_if,
// Interface to FE_TYPE mul (mod P) 2*FE_TYPE data width
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if
);
FE_TYPE a, b; // Temp storage
logic [1:0] mul_cnt, add_sub_cnt;
logic out_cnt;
// Point addtions are simple additions on each of the Fp elements
always_comb begin
i_mul_fe2_if.rdy = (mul_cnt == 0 || mul_cnt == 1) && (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy));
i_mul_fe_if.rdy = (add_sub_cnt == 0 || add_sub_cnt == 1) ? ~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy) :
~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy);
i_add_fe_if.rdy = out_cnt == 1 && (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy));
i_sub_fe_if.rdy = out_cnt == 0 && (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy));
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mul_fe2_if.reset_source();
o_add_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_sub_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_mul_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0);
a <= 0;
b <= 0;
mul_cnt <= 0;
add_sub_cnt <= 0;
out_cnt <= 0;
end else begin
if (o_add_fe_if.val && o_add_fe_if.rdy) o_add_fe_if.val <= 0;
if (o_sub_fe_if.val && o_sub_fe_if.rdy) o_sub_fe_if.val <= 0;
if (o_mul_fe_if.val && o_mul_fe_if.rdy) o_mul_fe_if.val <= 0;
if (o_mul_fe2_if.val && o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0;
case(mul_cnt)
0: begin
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
o_mul_fe_if.dat <= i_mul_fe2_if.dat; // a0 * b0
o_mul_fe_if.val <= i_mul_fe2_if.val;
o_mul_fe_if.ctl <= i_mul_fe2_if.ctl;
{b, a} <= i_mul_fe2_if.dat;
if (i_mul_fe2_if.val) mul_cnt <= mul_cnt + 1;
end
end
1: begin
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
o_mul_fe_if.dat <= i_mul_fe2_if.dat; // a1 * b1
o_mul_fe_if.val <= i_mul_fe2_if.val;
if (i_mul_fe2_if.val) mul_cnt <= mul_cnt + 1;
end
end
2: begin
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
o_mul_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b; // a1 * b0
o_mul_fe_if.val <= 1;
mul_cnt <= mul_cnt + 1;
b <= o_mul_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)];
end
end
3: begin
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
o_mul_fe_if.dat <= {a, b}; // b1 * a0
o_mul_fe_if.val <= 1;
mul_cnt <= 0;
end
end
endcase
case(add_sub_cnt)
0: begin
if (~o_sub_fe_if.val || (o_sub_fe_if.val && o_sub_fe_if.rdy)) begin
o_sub_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
if (i_mul_fe_if.val) add_sub_cnt <= add_sub_cnt + 1;
end
end
1: begin
o_sub_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
o_sub_fe_if.ctl <= i_mul_fe_if.ctl; // a0b0 - a1b1
if (i_mul_fe_if.val) begin
o_sub_fe_if.val <= 1;
add_sub_cnt <= add_sub_cnt + 1;
end
end
2: begin
if (~o_add_fe_if.val || (o_add_fe_if.val && o_add_fe_if.rdy)) begin
o_add_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
if (i_mul_fe_if.val) add_sub_cnt <= add_sub_cnt + 1;
end
end
3: begin
o_add_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
o_add_fe_if.ctl <= i_mul_fe_if.ctl; // a1b0 + a0b1
if (i_mul_fe_if.val) begin
o_add_fe_if.val <= 1;
add_sub_cnt <= add_sub_cnt + 1;
end
end
endcase
case(out_cnt)
0: begin
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.dat <= i_sub_fe_if.dat;
o_mul_fe2_if.sop <= 1;
o_mul_fe2_if.eop <= 0;
o_mul_fe2_if.ctl <= i_sub_fe_if.ctl;
o_mul_fe2_if.val <= i_sub_fe_if.val;
if (i_sub_fe_if.val) out_cnt <= out_cnt + 1;
end
end
1: begin
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.dat <= i_add_fe_if.dat;
o_mul_fe2_if.sop <= 0;
o_mul_fe2_if.eop <= 1;
o_mul_fe2_if.ctl <= i_add_fe_if.ctl;
o_mul_fe2_if.val <= i_add_fe_if.val;
if (i_add_fe_if.val) out_cnt <= out_cnt + 1;
end
end
endcase
end
end
endmodule

View File

@ -0,0 +1,100 @@
/*
This does the for Frobenius map calculation required in final
exponentiation in the ate pairing on a Fp^2 element.
Input is expected to be streamed in with Fp .c0 in the first clock cycle
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_fe12_fmap
import bls12_381_pkg::*;
#(
parameter type FE_TYPE = fe_t, // Base field element type
parameter CTL_BITS = 12,
parameter CTL_BIT_POW = 8 // This is where we encode the power value with 2 bits - only 0,1,2,3 are supported
)(
input i_clk, i_rst,
// Input/Output intefaces for fmap result, FE_TYPE data width
if_axi_stream.source o_fmap_fe12_if,
if_axi_stream.sink i_fmap_fe12_if,
// Interface to FE6_TYPE fmap block, FE_TYPE data width
if_axi_stream.source o_fmap_fe6_if,
if_axi_stream.sink i_fmap_fe6_if,
// Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if
);
logic [4:0] out_cnt, out_cnt1, out_cnt2;
always_comb begin
i_fmap_fe12_if.rdy = ~o_fmap_fe6_if.val || (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy);
i_fmap_fe6_if.rdy = ~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy);
i_mul_fe2_if.rdy = ~o_fmap_fe12_if.val || (o_fmap_fe12_if.val && o_fmap_fe12_if.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_fmap_fe12_if.reset_source();
o_fmap_fe6_if.reset_source();
o_mul_fe2_if.reset_source();
out_cnt <= 0;
out_cnt1 <= 0;
out_cnt2 <= 0;
end else begin
if (o_fmap_fe12_if.val && o_fmap_fe12_if.rdy) o_fmap_fe12_if.val <= 0;
if (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy) o_fmap_fe6_if.val <= 0;
if (o_mul_fe2_if.val && o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0;
if (~o_fmap_fe6_if.val || (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy)) begin
o_fmap_fe6_if.val <= i_fmap_fe12_if.val;
o_fmap_fe6_if.sop <= out_cnt2 == 0;
o_fmap_fe6_if.eop <= out_cnt2 == 5;
o_fmap_fe6_if.ctl <= i_fmap_fe12_if.ctl;
o_fmap_fe6_if.dat <= i_fmap_fe12_if.dat;
out_cnt2 <= i_fmap_fe12_if.val ? out_cnt2 == 5 ? 0 : out_cnt2 + 1 : out_cnt2;
end
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.val <= i_fmap_fe6_if.val;
o_mul_fe2_if.sop <= out_cnt % 2 == 0;
o_mul_fe2_if.eop <= out_cnt % 2 == 1;
o_mul_fe2_if.ctl <= i_fmap_fe6_if.ctl;
case (out_cnt) inside
0,1,2,3,4,5: begin
o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= i_fmap_fe6_if.dat;
o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= out_cnt % 2 == 0 ? 1 : 0;
end
6,7,8,9,10,11: o_mul_fe2_if.dat <= {FROBENIUS_COEFF_FQ12_C1[i_fmap_fe6_if.ctl[CTL_BIT_POW +: 2]][out_cnt % 2], i_fmap_fe6_if.dat};
endcase
out_cnt <= i_fmap_fe6_if.val ? out_cnt == 11 ? 0 : out_cnt + 1 : out_cnt;
end
if (~o_fmap_fe12_if.val || (o_fmap_fe12_if.val && o_fmap_fe12_if.rdy)) begin
o_fmap_fe12_if.val <= i_mul_fe2_if.val;
o_fmap_fe12_if.sop <= out_cnt1 == 0;
o_fmap_fe12_if.eop <= out_cnt1 == 11;
o_fmap_fe12_if.ctl <= i_mul_fe2_if.ctl;
o_fmap_fe12_if.dat <= i_mul_fe2_if.dat;
out_cnt1 <= i_mul_fe2_if.val ? out_cnt1 == 11 ? 0 : out_cnt1 + 1 : out_cnt1;
end
end
end
endmodule

View File

@ -0,0 +1,114 @@
/*
This does the for Frobenius map calculation required in final
exponentiation in the ate pairing on a Fp^2 element.
Input is expected to be streamed in with Fp .c0 in the first clock cycle
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_fe12_fmap_wrapper
import bls12_381_pkg::*;
#(
parameter type FE_TYPE = fe_t,
parameter CTL_BITS = 12,
parameter CTL_BIT_POW = 8 // This is where we encode the power value with 2 bits - only 0,1,2,3 are supported - 1 extra bit required after this for control
)(
input i_clk, i_rst,
// Input/Output intefaces for fmap result, FE_TYPE data width
if_axi_stream.source o_fmap_fe12_if,
if_axi_stream.sink i_fmap_fe12_if,
// Interface to FE2_TYPE mul (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if
);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_if_o [1:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_if_i [1:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe6_if_o (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe6_if_i (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe2_if_o (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe2_if_i (i_clk);
bls12_381_fe2_fmap #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS ),
.CTL_BIT_POW ( CTL_BIT_POW )
)
bls12_381_fe2_fmap (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_fmap_fe2_if ( fmap_fe2_if_i ),
.i_fmap_fe2_if ( fmap_fe2_if_o ),
.o_mul_fe_if ( o_mul_fe_if ),
.i_mul_fe_if ( i_mul_fe_if )
);
bls12_381_fe6_fmap #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS ),
.CTL_BIT_POW ( CTL_BIT_POW )
)
bls12_381_fe6_fmap (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_fmap_fe6_if ( fmap_fe6_if_i ),
.i_fmap_fe6_if ( fmap_fe6_if_o ),
.o_fmap_fe2_if ( fmap_fe2_if_o ),
.i_fmap_fe2_if ( fmap_fe2_if_i ),
.o_mul_fe2_if ( mul_fe2_if_o[0] ),
.i_mul_fe2_if ( mul_fe2_if_i[0] )
);
bls12_381_fe12_fmap #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS ),
.CTL_BIT_POW ( CTL_BIT_POW )
)
bls12_381_fe12_fmap (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.o_fmap_fe12_if ( o_fmap_fe12_if ),
.i_fmap_fe12_if ( i_fmap_fe12_if ),
.o_fmap_fe6_if ( fmap_fe6_if_o ),
.i_fmap_fe6_if ( fmap_fe6_if_i ),
.o_mul_fe2_if ( mul_fe2_if_o[1] ),
.i_mul_fe2_if ( mul_fe2_if_i[1] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( CTL_BIT_POW+2 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_fe2_mul (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( mul_fe2_if_o[1:0] ),
.o_res ( o_mul_fe2_if ),
.i_res ( i_mul_fe2_if ),
.o_axi ( mul_fe2_if_i[1:0] )
);
endmodule

View File

@ -0,0 +1,84 @@
/*
This does the for Frobenius map calculation required in final
exponentiation in the ate pairing on a Fp^2 element.
Input is expected to be streamed in with Fp .c0 in the first clock cycle
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_fe2_fmap
import bls12_381_pkg::*;
#(
parameter type FE_TYPE = fe_t, // Base field element type
parameter CTL_BITS = 12,
parameter CTL_BIT_POW = 8 // This is where we encode the power value with 2 bits - only 0,1,2,3 are supported
)(
input i_clk, i_rst,
// Input/Output intefaces for fmap result, FE_TYPE data width
if_axi_stream.source o_fmap_fe2_if,
if_axi_stream.sink i_fmap_fe2_if,
// Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if
);
always_comb begin
i_fmap_fe2_if.rdy = ~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy);
i_mul_fe_if.rdy = ~o_fmap_fe2_if.val || (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy);
end
logic mul_cnt;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_fmap_fe2_if.reset_source();
o_mul_fe_if.reset_source();
mul_cnt <= 0;
end else begin
if (o_mul_fe_if.val && o_mul_fe_if.rdy) o_mul_fe_if.val <= 0;
if (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy) o_fmap_fe2_if.val <= 0;
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
case(mul_cnt)
0: begin
o_mul_fe_if.dat[0 +: $bits(FE_TYPE)] <= i_fmap_fe2_if.dat;
o_mul_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= 1;
end
1: begin
o_mul_fe_if.dat <= {i_fmap_fe2_if.dat, FROBENIUS_COEFF_FQ2_C1[i_fmap_fe2_if.ctl[CTL_BIT_POW +: 2]]};
end
endcase
o_mul_fe_if.val <= i_fmap_fe2_if.val;
o_mul_fe_if.ctl <= i_fmap_fe2_if.ctl;
o_mul_fe_if.sop <= 1;
o_mul_fe_if.eop <= 1;
mul_cnt <= i_fmap_fe2_if.val ? mul_cnt + 1 : mul_cnt;
end
if (~o_fmap_fe2_if.val || (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy)) begin
o_fmap_fe2_if.val <= i_mul_fe_if.val;
o_fmap_fe2_if.eop <= i_mul_fe_if.val ? o_fmap_fe2_if.sop : o_fmap_fe2_if.eop;
o_fmap_fe2_if.sop <= i_mul_fe_if.val ? ~o_fmap_fe2_if.sop : o_fmap_fe2_if.sop;
o_fmap_fe2_if.dat <= i_mul_fe_if.dat;
o_fmap_fe2_if.ctl <= i_mul_fe_if.ctl;
end
end
end
endmodule

View File

@ -0,0 +1,99 @@
/*
This does the for Frobenius map calculation required in final
exponentiation in the ate pairing on a Fp^2 element.
Input is expected to be streamed in with Fp .c0 in the first clock cycle
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_fe6_fmap
import bls12_381_pkg::*;
#(
parameter type FE_TYPE = fe_t, // Base field element type
parameter CTL_BITS = 12,
parameter CTL_BIT_POW = 8 // This is where we encode the power value with 2 bits - only 0,1,2,3 are supported
)(
input i_clk, i_rst,
// Input/Output intefaces for fmap result, FE_TYPE data width
if_axi_stream.source o_fmap_fe6_if,
if_axi_stream.sink i_fmap_fe6_if,
// Interface to FE2_TYPE fmap block, FE_TYPE data width
if_axi_stream.source o_fmap_fe2_if,
if_axi_stream.sink i_fmap_fe2_if,
// Interface to FE_TYPE mul (mod P), 2*FE_TYPE data width
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if
);
logic [2:0] out_cnt, out_cnt1;
always_comb begin
i_fmap_fe6_if.rdy = ~o_fmap_fe2_if.val || (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy);
i_fmap_fe2_if.rdy = ~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy);
i_mul_fe2_if.rdy = ~o_fmap_fe6_if.val || (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_fmap_fe6_if.reset_source();
o_fmap_fe2_if.reset_source();
o_mul_fe2_if.reset_source();
out_cnt <= 0;
out_cnt1 <= 0;
end else begin
if (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy) o_fmap_fe6_if.val <= 0;
if (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy) o_fmap_fe2_if.val <= 0;
if (o_mul_fe2_if.val && o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0;
if (~o_fmap_fe2_if.val || (o_fmap_fe2_if.val && o_fmap_fe2_if.rdy)) begin
o_fmap_fe2_if.val <= i_fmap_fe6_if.val;
o_fmap_fe2_if.dat <= i_fmap_fe6_if.dat;
o_fmap_fe2_if.sop <= i_fmap_fe6_if.val ? ~o_fmap_fe2_if.sop : o_fmap_fe2_if.sop;
o_fmap_fe2_if.eop <= i_fmap_fe6_if.val ? o_fmap_fe2_if.sop : o_fmap_fe2_if.eop;
o_fmap_fe2_if.ctl <= i_fmap_fe6_if.ctl;
end
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.val <= i_fmap_fe2_if.val;
o_mul_fe2_if.sop <= out_cnt % 2 == 0;
o_mul_fe2_if.eop <= out_cnt % 2 == 1;
o_mul_fe2_if.ctl <= i_fmap_fe2_if.ctl;
case (out_cnt) inside
0,1: begin
o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= i_fmap_fe2_if.dat;
o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= out_cnt == 0 ? 1 : 0;
end
2,3: o_mul_fe2_if.dat <= {FROBENIUS_COEFF_FQ6_C1[i_fmap_fe2_if.ctl[CTL_BIT_POW +: 2]][out_cnt % 2], i_fmap_fe2_if.dat};
4,5: o_mul_fe2_if.dat <= {FROBENIUS_COEFF_FQ6_C2[i_fmap_fe2_if.ctl[CTL_BIT_POW +: 2]][out_cnt % 2], i_fmap_fe2_if.dat};
endcase
out_cnt <= i_fmap_fe2_if.val ? out_cnt == 5 ? 0 : out_cnt + 1 : out_cnt;
end
if (~o_fmap_fe6_if.val || (o_fmap_fe6_if.val && o_fmap_fe6_if.rdy)) begin
o_fmap_fe6_if.val <= i_mul_fe2_if.val;
o_fmap_fe6_if.sop <= out_cnt1 == 0;
o_fmap_fe6_if.eop <= out_cnt1 == 5;
o_fmap_fe6_if.ctl <= i_mul_fe2_if.ctl;
o_fmap_fe6_if.dat <= i_mul_fe2_if.dat;
out_cnt1 <= i_mul_fe2_if.val ? out_cnt1 == 5 ? 0 : out_cnt1 + 1 : out_cnt1;
end
end
end
endmodule

View File

@ -26,7 +26,7 @@ package bls12_381_pkg;
fe_t Gx = 381'h17F1D3A73197D7942695638C4FA9AC0FC3688C4F9774B905A14E3A3F171BAC586C55E83FF97A1AEFFB3AF00ADB22C6BB;
fe_t Gy = 381'h08B3F481E3AAA0F1A09E30ED741D8AE4FCF5E095D5D00AF600DB18CB2C04B3EDD03CC744A2888AE40CAA232946C5E7E1;
localparam [63:0] ATE_X = 64'hd201000000010000;
localparam ATE_X_START = 63;
@ -71,7 +71,47 @@ package bls12_381_pkg;
typedef fe_t [1:0] fe2_t;
typedef fe2_t [2:0] fe6_t;
typedef fe6_t [1:0] fe12_t;
// These are used in the final exponentiation of the pairing.
// We only list coeff needed for powers of 0,1,2,3
parameter fe2_t FROBENIUS_COEFF_FQ12_C1 [3:0] = {
{381'h06af0e0437ff400b6831e36d6bd17ffe48395dabc2d3435e77f76e17009241c5ee67992f72ec05f4c81084fbede3cc09,
381'h135203e60180a68ee2e9c448d77a2cd91c3dedd930b1cf60ef396489f61eb45e304466cf3e67fa0af1ee7b04121bdea2},
{381'h0,
381'h00000000000000005f19672fdf76ce51ba69c6076a0f77eaddb3a93be6f89688de17d813620a00022e01fffffffeffff},
{381'h00fc3e2b36c4e03288e9e902231f9fb854a14787b6c7b36fec0c8ec971f63c5f282d5ac14d6c7ec22cf78a126ddc4af3,
381'h1904d3bf02bb0667c231beb4202c0d1f0fd603fd3cbd5f4f7b2443d784bab9c4f67ea53d63e7813d8d0775ed92235fb8},
{381'h0,
381'h1}};
parameter fe2_t FROBENIUS_COEFF_FQ6_C1 [3:0] = {
{381'h1,
381'h0},
{381'h0,
381'h00000000000000005f19672fdf76ce51ba69c6076a0f77eaddb3a93be6f89688de17d813620a00022e01fffffffefffe},
{381'h1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4897d29650fb85f9b409427eb4f49fffd8bfd00000000aaac,
381'h0},
{381'h0,
381'h1}};
parameter fe2_t FROBENIUS_COEFF_FQ6_C2 [3:0] = {
{381'h0,
381'h1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaaa},
{381'h0,
381'h1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4897d29650fb85f9b409427eb4f49fffd8bfd00000000aaac},
{381'h0,
381'h1a0111ea397fe699ec02408663d4de85aa0d857d89759ad4897d29650fb85f9b409427eb4f49fffd8bfd00000000aaad},
{381'h0,
381'h1}};
parameter fe_t FROBENIUS_COEFF_FQ2_C1 [1:0] = {
381'h1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaaa,
381'h000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001};
// Generator points for G2
fe2_t G2x = {381'h13e02b6052719f607dacd3a088274f65596bd0d09920b61ab5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e,
381'h024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8};
@ -326,7 +366,7 @@ package bls12_381_pkg;
endfunction
function jb_point_t point_mult(logic [DAT_BITS-1:0] c, jb_point_t p);
function jb_point_t point_mult(input logic [DAT_BITS-1:0] c, jb_point_t p);
jb_point_t result, addend;
result = 0;
addend = p;
@ -340,7 +380,7 @@ package bls12_381_pkg;
return result;
endfunction
function fp2_jb_point_t fp2_point_mult(logic [DAT_BITS-1:0] c, fp2_jb_point_t p);
function fp2_jb_point_t fp2_point_mult(input logic [DAT_BITS-1:0] c, fp2_jb_point_t p);
fp2_jb_point_t result, addend;
result = 0;
addend = p;
@ -425,6 +465,50 @@ package bls12_381_pkg;
fe6_mul_by_nonresidue[2] = a[1];
fe6_mul_by_nonresidue[0] = fe2_mul_by_nonresidue(a[2]);
endfunction
function fe6_t fe6_inv(fe6_t a);
fe2_t add_i0, add_i1, sub_i0, mul_i0;
fe6_inv[0] = fe2_mul_by_nonresidue(a[2]);
fe6_inv[0] = fe2_mul(fe6_inv[0], a[1]);
fe6_inv[0] = fe2_sub(0, fe6_inv[0]);
add_i0 = fe2_mul(a[0], a[0]);
fe6_inv[0] = fe2_add(add_i0, fe6_inv[0]);
fe6_inv[1] = fe2_mul(a[2], a[2]);
fe6_inv[1] = fe2_mul_by_nonresidue(fe6_inv[1]);
sub_i0 = fe2_mul(a[0], a[1]);
fe6_inv[1] = fe2_sub(fe6_inv[1], sub_i0);
fe6_inv[2] = fe2_mul(a[1], a[1]);
sub_i0 = fe2_mul(a[2], a[0]);
fe6_inv[2] = fe2_sub(fe6_inv[2], sub_i0);
add_i0 = fe2_mul(a[2], fe6_inv[1]);
add_i1 = fe2_mul(a[1], fe6_inv[2]);
add_i1 = fe2_add(add_i0, add_i1);
add_i1 = fe2_mul_by_nonresidue(add_i1);
add_i0 = fe2_mul(a[0], fe6_inv[0]);
add_i1 = fe2_add(add_i1, add_i0);
mul_i0 = fe2_inv(add_i1);
fe6_inv[0] = fe2_mul(fe6_inv[0], mul_i0);
fe6_inv[1] = fe2_mul(fe6_inv[1], mul_i0);
fe6_inv[2] = fe2_mul(fe6_inv[2], mul_i0);
endfunction
function fe12_t fe12_inv(fe12_t a);
fe12_t sub_i0, sub_i1, mul_i0;
sub_i0 = fe6_mul(a[0], a[0]);
sub_i1 = fe6_mul(a[1], a[1]);
sub_i1 = fe6_mul_by_nonresidue(sub_i1);
sub_i0 = fe6_sub(sub_i0,sub_i1);
sub_i0 = fe6_inv(sub_i0);
fe12_inv[0] = fe6_mul(a[0], sub_i0);
fe12_inv[1] = fe6_mul(a[1], sub_i0);
fe12_inv[1] = fe6_sub(0, fe12_inv[1]);
endfunction
function fe6_t fe6_add(fe6_t a, b);
for(int i = 0; i < 3; i++)
@ -495,6 +579,20 @@ package bls12_381_pkg;
bb = fe6_mul_by_nonresidue(bb); // 7. bb = mnr(bb) [6]
fe12_mul[0] = fe6_add(bb, aa); // 8. fe6_mul[0] = add(add_i0, bb) [0, 1, 7]
endfunction
function fe12_t fe12_sqr(fe12_t a);
fe6_t sub_i1, mul_i0, mul_i1;
sub_i1 = fe6_mul(a[0], a[1]); // 0.
mul_i0 = fe6_add(a[0], a[1]); // 1. (wait eq0)
mul_i1 = fe6_mul_by_nonresidue(a[1]);
mul_i1 = fe6_add(mul_i1, mul_i1);
fe12_sqr[0] = fe6_mul(mul_i1, mul_i0);
fe12_sqr[0] = fe6_sub(fe12_sqr[0], sub_i1);
fe12_sqr[1] = fe2_add(sub_i1, sub_i1);
sub_i1 = fe6_mul_by_nonresidue(sub_i1);
fe12_sqr[0] = fe6_sub(fe12_sqr[0], sub_i1);
endfunction
// This performs the miller loop
@ -520,7 +618,12 @@ package bls12_381_pkg;
end
endtask
task automatic ate_pairing(input af_point_t P, input fp2_af_point_t Q, ref fe12_t f);
miller_loop(P, Q, f);
final_exponent(f);
endtask;
// This performs both the line evaluation and the doubling
// Returns a sparse f12 element
task automatic miller_double_step(ref fp2_jb_point_t R, input af_point_t P, ref fe12_t f);
@ -656,30 +759,101 @@ package bls12_381_pkg;
f = {{FE2_zero, t10, FE2_zero}, {FE2_zero, t1, t9}};
endtask
// Calculates the final exponent used in ate pairing
/*task automatic final_exponent(ref fe12_t f);
f = fe12_sub(0, f); // TODO can remove this?
endtask*/
// Sparse multiplication by coefficients 0,1,4
function fe12_t f12_sparse_mul_014(fe12_t f, fe2_t c0, c1, c4);
fe6_t aa, bb;
fe2_t t;
aa = fe6_mul(f[0], {FE2_zero, c1, c0}); // TODO implement sparse fp6
bb = fe6_mul(f[1], {FE2_zero, c4, FE2_zero}); // TODO implement sparse fp6
t = fe2_add(c1, c4);
f[1] = fe6_add(f[1], f[0]);
f[1] = fe6_mul(f[1], {FE2_zero, t, c0});
f[1] = fe6_sub(f[1], aa);
f[1] = fe6_sub(f[1], bb);
f[0] = fe6_mul_by_nonresidue(bb);
f[0] = fe6_add(f[0], aa);
return f;
function fe2_t fe2_fmap(input fe2_t a, input int pow);
fe2_fmap[0] = a[0];
fe2_fmap[1] = fe_mul(a[1], FROBENIUS_COEFF_FQ2_C1[pow % 2]);
endfunction
function fe6_t fe6_fmap(input fe6_t a, input int pow);
fe6_fmap[0] = fe2_fmap(a[0], pow);
fe6_fmap[1] = fe2_fmap(a[1], pow);
fe6_fmap[2] = fe2_fmap(a[2], pow);
fe6_fmap[1] = fe2_mul(fe6_fmap[1], FROBENIUS_COEFF_FQ6_C1[pow % 6]);
fe6_fmap[2] = fe2_mul(fe6_fmap[2], FROBENIUS_COEFF_FQ6_C2[pow % 6]);
endfunction
function fe12_t fe12_fmap(input fe12_t a, input int pow);
fe12_fmap[0] = fe6_fmap(a[0], pow);
fe12_fmap[1] = fe6_fmap(a[1], pow);
fe12_fmap[1][0] = fe2_mul(fe12_fmap[1][0], FROBENIUS_COEFF_FQ12_C1[pow % 12]);
fe12_fmap[1][1] = fe2_mul(fe12_fmap[1][1], FROBENIUS_COEFF_FQ12_C1[pow % 12]);
fe12_fmap[1][2] = fe2_mul(fe12_fmap[1][2], FROBENIUS_COEFF_FQ12_C1[pow % 12]);
endfunction
// Max size is 1024 bit number
function fe12_t fe12_pow(input fe12_t a, input logic [1023:0] pow);
fe12_pow = FE12_one;
while (pow != 0) begin
if (pow[0])
fe12_pow = fe12_mul(fe12_pow, a);
a = fe12_mul(a, a);
pow = pow >> 1;
end
fe12_pow[1] = fe6_sub(0, fe12_pow[1]);
endfunction
// Calculates the final exponent used in ate pairing
task automatic final_exponent(ref fe12_t f);
fe12_t mul_i1, y0, y1, y2, y3, r, r_inv;
logic [63:0] bls_x;
bls_x = ATE_X;
r = f;
r[1] = fe6_sub(0, r[1]);
r_inv = fe12_inv(r);
r = fe12_mul(f, r_inv);
mul_i1 = fe12_fmap(r, 2);
r = fe12_mul(mul_i1, r);
y0 = fe12_mul(r, r);
y1 = fe12_pow(y0, bls_x);
bls_x = bls_x >> 1;
y2 = fe12_pow(y1, bls_x);
bls_x = bls_x << 1;
y3 = r;
y3[1] = fe6_sub(0, y3[1]);
y1 = fe12_mul(y1, y3);
y1[1] = fe6_sub(0, y1[1]);
y1 = fe12_mul(y1, y2);
y2 = fe12_pow(y1, bls_x);
y3 = fe12_pow(y2, bls_x);
y1[1] = fe6_sub(0, y1[1]);
y3 = fe12_mul(y3, y1);
y1[1] = fe6_sub(0, y1[1]);
y1 = fe12_fmap(y1, 3);
y2 = fe12_fmap(y2, 2);
y1 = fe12_mul(y1, y2);
y2 = fe12_pow(y3, bls_x);
y2 = fe12_mul(y2, y0);
y2 = fe12_mul(y2, r);
y1 = fe12_mul(y1, y2);
y2 = fe12_fmap(y3, 1);
y1 = fe12_mul(y1, y2);
f = y1;
endtask
function af_point_t to_affine(jb_point_t p);
fe_t z_;

View File

@ -0,0 +1,201 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module bls12_381_fmap_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
parameter type FE_TYPE = bls12_381_pkg::fe_t;
parameter P = bls12_381_pkg::P;
localparam CTL_BITS = 32;
localparam CLK_PERIOD = 100;
logic clk, rst;
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if [2:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if (clk);
if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) fmap_fe12_o_if (clk);
if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) fmap_fe12_i_if (clk);
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( CTL_BITS )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_mul ( mul_fe_o_if[2] ),
.o_mul ( mul_fe_i_if[2] )
);
adder_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_add ( add_fe_o_if ),
.o_add ( add_fe_i_if )
);
subtractor_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_sub ( sub_fe_o_if ),
.o_sub ( sub_fe_i_if )
);
ec_fe2_mul #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS )
)
ec_fe2_mul (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe2_if ( mul_fe2_i_if ),
.i_mul_fe2_if ( mul_fe2_o_if ),
.o_add_fe_if ( add_fe_o_if ),
.i_add_fe_if ( add_fe_i_if ),
.o_sub_fe_if ( sub_fe_o_if),
.i_sub_fe_if ( sub_fe_i_if ),
.o_mul_fe_if ( mul_fe_o_if[1] ),
.i_mul_fe_if ( mul_fe_i_if[1] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 4 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_fe_mul (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( mul_fe_o_if[1:0] ),
.o_res ( mul_fe_o_if[2] ),
.i_res ( mul_fe_i_if[2] ),
.o_axi ( mul_fe_i_if[1:0] )
);
bls12_381_fe12_fmap_wrapper #(
.FE_TYPE ( FE_TYPE ),
.CTL_BITS ( CTL_BITS ),
.CTL_BIT_POW ( 0 )
)
bls12_381_fe12_fmap_wrapper (
.i_clk ( clk ),
.i_rst ( rst ),
.o_fmap_fe12_if ( fmap_fe12_o_if ),
.i_fmap_fe12_if ( fmap_fe12_i_if ),
.o_mul_fe2_if ( mul_fe2_o_if ),
.i_mul_fe2_if ( mul_fe2_i_if ),
.o_mul_fe_if ( mul_fe_o_if[0] ),
.i_mul_fe_if ( mul_fe_i_if[0] )
);
task test();
fe12_t f, f_exp, f_out;
integer signed get_len;
integer pow;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat, dat_in;
$display("Running test ...");
dat_in = 0;
for (int pow = 0; pow < 4; pow++) begin
for (int i = 0; i < 2; i++)
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++) begin
dat_in[(i*6+j*2+k)*384 +: $bits(FE_TYPE)] = random_vector(384/8) % P;
f[i][j][k] = dat_in[(i*6+j*2+k)*384 +: $bits(FE_TYPE)];
end
f_exp = fe12_fmap(f, pow);
fork
fmap_fe12_i_if.put_stream(dat_in, 12*384/8, pow);
fmap_fe12_o_if.get_stream(get_dat, get_len, 0);
join
for (int i = 0; i < 2; i++)
for (int j = 0; j < 3; j++)
for (int k = 0; k < 2; k++)
f_out[i][j][k] = get_dat[(i*6+j*2+k)*384 +: $bits(FE_TYPE)];
if (f_exp != f_out) begin
$display("Input was:");
print_fe12(f);
$display("Output was:");
print_fe12(f_out);
$display("Output Expected:");
print_fe12(f_exp);
$fatal(1, "%m %t ERROR: output was wrong", $time);
end
$display("test OK with pow=%d", pow);
end
$display("test PASSED");
endtask
initial begin
fmap_fe12_i_if.reset_source();
fmap_fe12_o_if.rdy = 0;
#10ns;
test();
#50ns $finish();
end
endmodule