First cut of pairing engine for bls12-381

This commit is contained in:
bsdevlin 2019-07-24 21:37:37 +08:00
parent 73037d3612
commit 2044d52db1
5 changed files with 1380 additions and 12 deletions

View File

@ -0,0 +1,295 @@
/*
This is the top level for the bls12-381 pairing engine.
It performs both the miller loop and final exponentiation required for ate pairing (G2 x G1).
Inputs are points in G1 and G2 (affine coordinates)
Output is a Fp12 element.
TODO: Replace multiplications in fe12 with spare versions.
TODO: Implement squaring functions.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_pairing
import bls12_381_pkg::*;
#(
parameter type FE_TYPE = fe_t,
parameter type FE2_TYPE = fe2_t,
parameter type FE12_TYPE = fe12_t,
parameter type G1_FP_AF_TYPE = af_point_t,
parameter type G2_FP_AF_TYPE = fp2_af_point_t,
parameter type G2_FP_JB_TYPE = fp2_jb_point_t,
parameter CTL_BITS = 32,
parameter OVR_WRT_BIT = 8 // We override 16 bits from here
)(
input i_clk, i_rst,
// Inputs
input i_val,
output logic o_rdy,
input G1_FP_AF_TYPE i_g1_af,
input G2_FP_AF_TYPE i_g2_af,
// Outputs
output logic o_val,
input i_rdy,
output FE12_TYPE o_fe12,
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if,
// Interface to FE2_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE2_TYPE adder (mod P)
if_axi_stream.source o_add_fe2_if,
if_axi_stream.sink i_add_fe2_if,
// Interface to FE2_TYPE subtractor (mod P)
if_axi_stream.source o_sub_fe2_if,
if_axi_stream.sink i_sub_fe2_if,
// Interface to FE12_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe12_if,
if_axi_stream.sink i_mul_fe12_if
);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if [2:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if [2:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) add_fe2_i_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) add_fe2_o_if [2:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe2_i_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe2_o_if [2:0] (clk);
logic dbl_i_val, dbl_o_rdy, dbl_o_val, dbl_i_rdy, dbl_o_err;
logic add_i_val, add_o_rdy, add_o_val, add_i_rdy, add_o_err;
logic wait_dbl, wait_add;
G1_FP_AF_TYPE g1_af_i;
G2_FP_JB_TYPE g2_r_jb_i, add_g2_o, dbl_g2_o;
G2_FP_AF_TYPE g2_af_i;
FE12_TYPE add_f12_o, dbl_f12_o;
logic [$clog2(ATE_X_START)-1:0] ate_loop_cnt;
enum {IDLE, MILLER_LOOP, FINAL_EXP} pair_state;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_fe12 <= 0;
g1_af_i <= 0;
g2_r_jb_i <= 0;
i_mul_fe12_if.rdy <= 0;
o_mul_fe12_if.copy_if(0, 0, 1, 1, 0, 0, 0);
pair_state <= IDLE;
add_i_val <= 0;
dbl_i_val <= 0;
add_i_rdy <= 0;
dbl_i_rdy <= 0;
o_rdy <= 0;
wait_dbl <= 0;
wait_add <= 0;
ate_loop_cnt <= ATE_X_START;
end else begin
if (i_rdy && o_val) o_val <= 0;
if (add_i_val && add_o_rdy) add_i_val <= 0;
if (dbl_i_val && dbl_o_rdy) dbl_i_val <= 0;
if (o_mul_fe12_if.val && o_mul_fe12_if.rdy) o_mul_fe12_if.val <= 0;
i_mul_fe12_if.rdy <= 1;
case(pair_state)
IDLE: begin
ate_loop_cnt <= ATE_X_START;
o_fe12 <= 0;
o_rdy <= 1;
add_i_val <= 0;
dbl_i_val <= 0;
add_i_rdy <= 0;
dbl_i_rdy <= 0;
wait_dbl <= 0;
wait_add <= 0;
if (i_val && o_rdy) begin
pair_state <= MILLER_LOOP;
o_rdy <= 0;
g1_af_i <= i_g1_af;
g2_af_i <= i_g2_af;
g2_r_jb_i.x <= i_g2_af.x;
g2_r_jb_i.y <= i_g2_af.y;
g2_r_jb_i.z <= 1;
end
end
MILLER_LOOP: begin
if (~wait_dbl) begin
dbl_i_val <= 1;
end
if (ATE_X[ate_loop_cnt] == 1) begin
// Do add step in here as well
end
// Also three multiplications
add_i_rdy <= 0;
dbl_i_rdy <= 0;
end
FINAL_EXP: begin
end
endcase
end
end
bls12_381_pairing_miller_dbl #(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.FE12_TYPE ( FE12_TYPE ),
.G1_FP_AF_TYPE ( G1_FP_AF_TYPE ),
.G2_FP_JB_TYPE ( G2_FP_JB_TYPE ),
.OVR_WRT_BIT ( OVR_WRT_BIT )
)
bls12_381_pairing_miller_dbl (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_val ( dbl_i_val ),
.o_rdy ( dbl_o_rdy ),
.i_g1_af ( g1_af_i ),
.i_g2_jb ( g2_r_jb_i ),
.o_val ( dbl_o_val ),
.i_rdy ( dbl_i_rdy ),
.o_err ( dbl_o_err ),
.o_res_fe12 ( dbl_f12_o ),
.o_g2_jb ( dbl_g2_o ),
.o_mul_fe2_if ( mul_fe2_i_if[0] ),
.i_mul_fe2_if ( mul_fe2_o_if[0] ),
.o_add_fe2_if ( add_fe2_i_if[0] ),
.i_add_fe2_if ( add_fe2_o_if[0] ),
.o_sub_fe2_if ( sub_fe2_i_if[0] ),
.i_sub_fe2_if ( sub_fe2_i_if[0] ),
.o_mul_fe_if ( mul_fe_i_if[0] ),
.i_mul_fe_if ( mul_fe_i_if[0] )
);
bls12_381_pairing_miller_add #(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.FE12_TYPE ( FE12_TYPE ),
.G1_FP_AF_TYPE ( G1_FP_AF_TYPE ),
.G2_FP_JB_TYPE ( G2_FP_JB_TYPE ),
.G2_FP_AF_TYPE ( G2_FP_AF_TYPE ),
.OVR_WRT_BIT ( OVR_WRT_BIT )
)
bls12_381_pairing_miller_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_val ( add_i_val ),
.o_rdy ( add_o_rdy ),
.i_g1_af ( g1_af_i ),
.i_g2_jb ( g2_r_jb_i ),
.i_g2_q_af ( g2_af_i ),
.o_val ( add_o_val ),
.i_rdy ( add_i_rdy ),
.o_err ( add_o_err ),
.o_res_fe12 ( add_f12_o ),
.o_g2_jb ( add_g2_o ),
.o_mul_fe2_if ( mul_fe2_i_if[1] ),
.i_mul_fe2_if ( mul_fe2_o_if[1] ),
.o_add_fe2_if ( add_fe2_i_if[1] ),
.i_add_fe2_if ( add_fe2_o_if[1] ),
.o_sub_fe2_if ( sub_fe2_i_if[1] ),
.i_sub_fe2_if ( sub_fe2_i_if[1] ),
.o_mul_fe_if ( mul_fe_i_if[1] ),
.i_mul_fe_if ( mul_fe_i_if[1] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 8 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_fe_mul (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( mul_fe_i_if[1:0] ),
.o_res ( mul_fe_i_if[2] ),
.i_res ( mul_fe_o_if[2] ),
.o_axi ( mul_fe_o_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE2_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 8 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_fe2_mul (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( mul_fe2_i_if[1:0] ),
.o_res ( mul_fe2_i_if[2] ),
.i_res ( mul_fe2_o_if[2] ),
.o_axi ( mul_fe2_o_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE2_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 8 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_fe2_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( add_fe2_i_if[1:0] ),
.o_res ( add_fe2_i_if[2] ),
.i_res ( add_fe2_o_if[2] ),
.o_axi ( add_fe2_o_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE2_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( OVR_WRT_BIT + 8 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_fe2_sub (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( sub_fe2_i_if[1:0] ),
.o_res ( sub_fe2_i_if[2] ),
.i_res ( sub_fe2_o_if[2] ),
.o_axi ( sub_fe2_o_if[1:0] )
);
endmodule

View File

@ -0,0 +1,382 @@
/*
This performs the line evaluation and add required for the miller loop
in the ate pairing.
Inputs are points in G1 (Fp affine), G2 (Fp2 jacobian), G2_Q (Fp2 affine)
The output is a sparse Fe12.
Equations are mapped to bls12_381_pkg::miller_add_step()
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_pairing_miller_add
#(
parameter type FE_TYPE,
parameter type FE2_TYPE,
parameter type FE12_TYPE,
parameter type G1_FP_AF_TYPE,
parameter type G2_FP_JB_TYPE,
parameter type G2_FP_AF_TYPE,
parameter OVR_WRT_BIT = 8 // Require 6 bits from this for control
)(
input i_clk, i_rst,
// Inputs
input i_val,
output logic o_rdy,
input G1_FP_AF_TYPE i_g1_af,
input G2_FP_JB_TYPE i_g2_jb,
input G2_FP_AF_TYPE i_g2_q_af,
// Result is sparse Fe12 and added G2 point
output logic o_val,
input i_rdy,
output logic o_err,
output FE12_TYPE o_res_fe12,
output G2_FP_JB_TYPE o_g2_jb,
// Interface to FE2_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE2_TYPE adder (mod P)
if_axi_stream.source o_add_fe2_if,
if_axi_stream.sink i_add_fe2_if,
// Interface to FE2_TYPE subtractor (mod P)
if_axi_stream.source o_sub_fe2_if,
if_axi_stream.sink i_sub_fe2_if,
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if
);
localparam NUM_OVR_WRT_BIT = 6;
logic [42:0] eq_val, eq_wait;
FE2_TYPE zsquared, ysquared;
FE2_TYPE [10:0] t;
logic o_rdy_l;
always_comb begin
o_res_fe12 = {$bits(FE2_TYPE)'(0), t[10], $bits(FE2_TYPE)'(0), $bits(FE2_TYPE)'(0), t[1], t[9]};
o_val = eq_val[39] && eq_val[40] && eq_val[41] && eq_val[36] && eq_val[42];
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mul_fe2_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_add_fe2_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_sub_fe2_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_mul_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_g2_jb <= 0;
t <= 0;
zsquared <= 0;
ysquared <= 0;
i_mul_fe2_if.rdy <= 0;
i_add_fe2_if.rdy <= 0;
i_sub_fe2_if.rdy <= 0;
i_mul_fe_if.rdy <= 0;
eq_val <= 0;
eq_wait <= 0;
o_rdy <= 0;
o_rdy_l <= 0;
o_err <= 0;
end else begin
i_mul_fe2_if.rdy <= 1;
i_add_fe2_if.rdy <= 1;
i_sub_fe2_if.rdy <= 1;
i_mul_fe_if.rdy <= 1;
if (o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0;
if (o_add_fe2_if.rdy) o_add_fe2_if.val <= 0;
if (o_sub_fe2_if.rdy) o_sub_fe2_if.val <= 0;
if (o_mul_fe_if.rdy) o_mul_fe_if.val <= 0;
if (i_val && o_rdy) o_rdy <= 0;
if (o_val && i_rdy) begin
eq_val <= 0;
eq_wait <= 0;
t <= 0;
zsquared <= 0;
ysquared <= 0;
o_rdy_l <= 0;
end
if (eq_wait[39] && eq_wait[40] && eq_wait[41] && eq_wait[42] && ~o_rdy_l) begin
o_rdy <= 1;
o_rdy_l <= 1;
end
// Check any results from multiplier
if (i_mul_fe2_if.val && i_mul_fe2_if.rdy) begin
eq_val[i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
0: zsquared <= i_mul_fe2_if.dat;
1: ysquared <= i_mul_fe2_if.dat;
2: t[0] <= i_mul_fe2_if.dat;
4: t[1] <= i_mul_fe2_if.dat;
7: t[1] <= i_mul_fe2_if.dat;
9: t[3] <= i_mul_fe2_if.dat;
10: t[4] <= i_mul_fe2_if.dat;
11: t[5] <= i_mul_fe2_if.dat;
14: t[9] <= i_mul_fe2_if.dat;
15: t[7] <= i_mul_fe2_if.dat;
16: o_g2_jb.x <= i_mul_fe2_if.dat;
21: o_g2_jb.z <= i_mul_fe2_if.dat;
24: zsquared <= i_mul_fe2_if.dat;
27: t[8] <= i_mul_fe2_if.dat;
28: t[0] <= i_mul_fe2_if.dat;
31: t[10] <= i_mul_fe2_if.dat;
default: o_err <= 1;
endcase
end
// Check any results from sub
if (i_sub_fe2_if.val && i_sub_fe2_if.rdy) begin
eq_val[i_sub_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_sub_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
5: t[1] <= i_sub_fe2_if.dat;
6: t[1] <= i_sub_fe2_if.dat;
8: t[2] <= i_sub_fe2_if.dat;
12: t[6] <= i_sub_fe2_if.dat;
13: t[6] <= i_sub_fe2_if.dat;
17: o_g2_jb.x <= i_sub_fe2_if.dat;
18: o_g2_jb.x <= i_sub_fe2_if.dat;
19: o_g2_jb.x <= i_sub_fe2_if.dat;
22: o_g2_jb.z <= i_sub_fe2_if.dat;
23: o_g2_jb.z <= i_sub_fe2_if.dat;
26: t[8] <= i_sub_fe2_if.dat;
30: o_g2_jb.y <= i_sub_fe2_if.dat;
32: t[10] <= i_sub_fe2_if.dat;
33: t[10] <= i_sub_fe2_if.dat;
35: t[9] <= i_sub_fe2_if.dat;
37: t[6] <= i_sub_fe2_if.dat;
default: o_err <= 1;
endcase
end
// Check any results from add
if (i_add_fe2_if.val && i_add_fe2_if.rdy) begin
eq_val[i_add_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_add_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
3: t[1] <= i_add_fe2_if.dat;
20: o_g2_jb.z <= i_add_fe2_if.dat;
25: t[10] <= i_add_fe2_if.dat;
29: t[0] <= i_add_fe2_if.dat;
34: t[9] <= i_add_fe2_if.dat;
36: t[10] <= i_add_fe2_if.dat;
38: t[1] <= i_add_fe2_if.dat;
default: o_err <= 1;
endcase
end
// Check any results from fe multiplier
if (i_mul_fe_if.val && i_mul_fe_if.rdy) begin
eq_val[i_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
39: t[10][0] <= i_mul_fe_if.dat;
40: t[10][1] <= i_mul_fe_if.dat;
41: t[1][0] <= i_mul_fe_if.dat;
42: t[1][1] <= i_mul_fe_if.dat;
default: o_err <= 1;
endcase
end
// Issue new multiplies
if (~eq_wait[0] && i_val) begin
fe2_multiply(0, i_g2_jb.z, i_g2_jb.z);
end else
if (~eq_wait[1] && i_val) begin
fe2_multiply(1, i_g2_q_af.y, i_g2_q_af.y);
end else
if (~eq_wait[2] && eq_val[0]) begin
fe2_multiply(2, zsquared, i_g2_q_af.x);
end else
if (~eq_wait[4] && eq_val[3]) begin
fe2_multiply(4, t[1], t[1]);
end else
if (~eq_wait[7] && eq_val[6]) begin
fe2_multiply(7, t[1], zsquared);
end else
if (~eq_wait[9] && eq_val[8]) begin
fe2_multiply(9, t[2], t[2]);
end else
if (~eq_wait[10] && eq_val[9]) begin
fe2_multiply(10, t[3], 10);
end else
if (~eq_wait[11] && eq_val[8] && eq_val[10]) begin
fe2_multiply(11, t[2], t[4]);
end else
if (~eq_wait[14] && eq_val[13]) begin
fe2_multiply(14, t[6], i_g2_q_af.x);
end else
if (~eq_wait[15] && eq_val[10]) begin
fe2_multiply(15, t[4], i_g2_jb.x);
end else
if (~eq_wait[16] && eq_val[13]) begin
fe2_multiply(16, t[6], t[6]);
end else
if (~eq_wait[21] && eq_val[20]) begin
fe2_multiply(21, o_g2_jb.z, o_g2_jb.z);
end else
if (~eq_wait[24] && eq_val[23]) begin
fe2_multiply(24, o_g2_jb.z, o_g2_jb.z);
end else
if (~eq_wait[27] && eq_val[26] && eq_val[13]) begin
fe2_multiply(27, t[8], t[6]);
end else
if (~eq_wait[28] && eq_val[11]) begin
fe2_multiply(28, i_g2_jb.y, t[5]);
end else
if (~eq_wait[31] && eq_val[23]) begin
fe2_multiply(31, t[10], t[10]);
end
// Issue new adds
if (~eq_wait[3] && i_val) begin
fe2_addition(3, i_g2_jb.z, i_g2_q_af.y);
end else
if (~eq_wait[20] && eq_val[8]) begin
fe2_addition(20, i_g2_jb.z, t[2]);
end else
if (~eq_wait[25] && eq_val[23]) begin
fe2_addition(25, o_g2_jb.z, i_g2_q_af.y);
end else
if (~eq_wait[29] && eq_val[28]) begin
fe2_addition(29, t[0], t[0]);
end else
if (~eq_wait[34] && eq_val[14]) begin
fe2_addition(34, t[9], t[9]);
end else
if (~eq_wait[36] && eq_val[23] && eq_wait[35]) begin
fe2_addition(36, o_g2_jb.z, o_g2_jb.z);
end else
if (~eq_wait[38] && eq_val[37]) begin
fe2_addition(38, t[6], t[6]);
end
// Issue new sub
if (~eq_wait[5] && eq_val[4] && eq_val[1]) begin
fe2_subtraction(5, t[1], ysquared);
end else
if (~eq_wait[6] && eq_val[5] && eq_val[0]) begin
fe2_subtraction(6, t[1], zsquared);
end else
if (~eq_wait[8] && eq_val[2] && i_val) begin
fe2_subtraction(8, t[0], i_g2_jb.x);
end else
if (~eq_wait[12] && eq_val[3]) begin
fe2_subtraction(12, t[1], i_g2_jb.y);
end else
if (~eq_wait[13] && eq_val[12]) begin
fe2_subtraction(13, t[6], i_g2_jb.y);
end else
if (~eq_wait[17] && eq_val[11] && eq_val[16]) begin
fe2_subtraction(17, o_g2_jb.x, t[5]);
end else
if (~eq_wait[18] && eq_val[17] && eq_val[10]) begin
fe2_subtraction(18, o_g2_jb.x, t[7]);
end else
if (~eq_wait[19] && eq_val[18] && eq_val[15]) begin
fe2_subtraction(19, o_g2_jb.x, t[7]);
end else
if (~eq_wait[22] && eq_val[21] && eq_val[0]) begin
fe2_subtraction(22, o_g2_jb.z, zsquared);
end else
if (~eq_wait[23] && eq_val[22] && eq_val[9]) begin
fe2_subtraction(23, o_g2_jb.z, t[3]);
end else
if (~eq_wait[26] && eq_val[19] && eq_val[15]) begin
fe2_subtraction(26, t[7], o_g2_jb.x);
end else
if (~eq_wait[30] && eq_val[29] && eq_val[27]) begin
fe2_subtraction(30, t[8], t[0]);
end else
if (~eq_wait[32] && eq_val[31] && eq_val[1]) begin
fe2_subtraction(32, t[10], ysquared);
end else
if (~eq_wait[33] && eq_val[32] && eq_val[24]) begin
fe2_subtraction(33, t[10], zsquared);
end else
if (~eq_wait[35] && eq_val[34] && eq_val[33]) begin
fe2_subtraction(35, t[9], t[10]);
end else
if (~eq_wait[37] && eq_wait[27]) begin
fe2_subtraction(37, 0, t[6]);
end
// Issue final fe multiplications
if (~eq_wait[39] && eq_val[36]) begin
fe_multiply(39, t[10][0], i_g1_af.y);
end else
if (~eq_wait[40] && eq_val[36]) begin
fe_multiply(40, t[10][1], i_g1_af.y);
end else
if (~eq_wait[41] && eq_val[38]) begin
fe_multiply(41, t[1][0], i_g1_af.x);
end else
if (~eq_wait[42] && eq_val[38]) begin
fe_multiply(42, t[1][1], i_g1_af.x);
end
end
end
// Task for subtractions
task fe2_subtraction(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_sub_fe2_if.val || (o_sub_fe2_if.val && o_sub_fe2_if.rdy)) begin
o_sub_fe2_if.val <= 1;
o_sub_fe2_if.dat[0 +: $bits(FE2_TYPE)] <= a;
o_sub_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE2_TYPE)] <= b;
o_sub_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for addition
task fe2_addition(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_add_fe2_if.val || (o_add_fe2_if.val && o_add_fe2_if.rdy)) begin
o_add_fe2_if.val <= 1;
o_add_fe2_if.dat[0 +: $bits(FE2_TYPE)] <= a;
o_add_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE2_TYPE)] <= b;
o_add_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for using mult
task fe2_multiply(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.val <= 1;
o_mul_fe2_if.dat[0 +: $bits(FE2_TYPE)] <= a;
o_mul_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE2_TYPE)] <= b;
o_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for using mult (fe)
task fe_multiply(input int unsigned ctl, input FE_TYPE a, b);
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
o_mul_fe_if.val <= 1;
o_mul_fe_if.dat[0 +: $bits(FE_TYPE)] <= a;
o_mul_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b;
o_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
endmodule

View File

@ -0,0 +1,355 @@
/*
This performs the line evaluation and doubling required for the miller loop
in the ate pairing.
Inputs are points in G1 (Fp affine), G2 (Fp2 jacobian)
The output is a sparse Fe12.
Equations are mapped to bls12_381_pkg::miller_double_step()
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_pairing_miller_dbl
#(
parameter type FE_TYPE,
parameter type FE2_TYPE,
parameter type FE12_TYPE,
parameter type G1_FP_AF_TYPE,
parameter type G2_FP_JB_TYPE,
parameter OVR_WRT_BIT = 8 // Require 6 bits from this for control
)(
input i_clk, i_rst,
// Inputs
input i_val,
output logic o_rdy,
input G1_FP_AF_TYPE i_g1_af,
input G2_FP_JB_TYPE i_g2_jb,
// Result is sparse Fe12 and doubled G2 point
output logic o_val,
input i_rdy,
output logic o_err,
output FE12_TYPE o_res_fe12,
output G2_FP_JB_TYPE o_g2_jb,
// Interface to FE2_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe2_if,
if_axi_stream.sink i_mul_fe2_if,
// Interface to FE2_TYPE adder (mod P)
if_axi_stream.source o_add_fe2_if,
if_axi_stream.sink i_add_fe2_if,
// Interface to FE2_TYPE subtractor (mod P)
if_axi_stream.source o_sub_fe2_if,
if_axi_stream.sink i_sub_fe2_if,
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if
);
localparam NUM_OVR_WRT_BIT = 6;
logic [36:0] eq_val, eq_wait;
FE2_TYPE zsquared;
FE2_TYPE [6:0] t;
logic o_rdy_l;
always_comb begin
o_res_fe12 = {$bits(FE2_TYPE)'(0), t[0], $bits(FE2_TYPE)'(0), $bits(FE2_TYPE)'(0), t[3], t[6]};
o_val = eq_val[33] && eq_val[34] && eq_val[35] && eq_val[36] && eq_val[30];
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mul_fe2_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_add_fe2_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_sub_fe2_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_mul_fe_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_g2_jb <= 0;
o_err <= 0;
t <= 0;
zsquared <= 0;
i_mul_fe2_if.rdy <= 0;
i_add_fe2_if.rdy <= 0;
i_sub_fe2_if.rdy <= 0;
i_mul_fe_if.rdy <= 0;
eq_val <= 0;
eq_wait <= 0;
o_rdy <= 0;
o_rdy_l <= 0;
end else begin
i_mul_fe2_if.rdy <= 1;
i_add_fe2_if.rdy <= 1;
i_sub_fe2_if.rdy <= 1;
i_mul_fe_if.rdy <= 1;
if (o_mul_fe2_if.rdy) o_mul_fe2_if.val <= 0;
if (o_add_fe2_if.rdy) o_add_fe2_if.val <= 0;
if (o_sub_fe2_if.rdy) o_sub_fe2_if.val <= 0;
if (o_mul_fe_if.rdy) o_mul_fe_if.val <= 0;
if (i_val && o_rdy) o_rdy <= 0;
if (o_val && i_rdy) begin
eq_val <= 0;
eq_wait <= 0;
t <= 0;
zsquared <= 0;
o_rdy_l <= 0;
end
if (eq_wait[33] && eq_wait[33] && eq_wait[33] && eq_wait[33] && ~o_rdy_l) begin
o_rdy <= 1;
o_rdy_l <= 1;
end
// Check any results from multiplier
if (i_mul_fe2_if.val && i_mul_fe2_if.rdy) begin
eq_val[i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
0: zsquared <= i_mul_fe2_if.dat;
1: t[0] <= i_mul_fe2_if.dat;
4: t[1] <= i_mul_fe2_if.dat;
5: t[2] <= i_mul_fe2_if.dat;
7: t[3] <= i_mul_fe2_if.dat;
12: t[5] <= i_mul_fe2_if.dat;
16: o_g2_jb.z <= i_mul_fe2_if.dat;
20: o_g2_jb.y <= i_mul_fe2_if.dat;
21: t[2] <= i_mul_fe2_if.dat;
23: t[3] <= i_mul_fe2_if.dat;
26: t[6] <= i_mul_fe2_if.dat;
29: t[1] <= i_mul_fe2_if.dat;
31: t[0] <= i_mul_fe2_if.dat;
default: o_err <= 1;
endcase
end
// Check any results from sub
if (i_sub_fe2_if.val && i_sub_fe2_if.rdy) begin
eq_val[i_sub_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_sub_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
8: t[3] <= i_sub_fe2_if.dat;
9: t[3] <= i_sub_fe2_if.dat;
13: o_g2_jb.x <= i_sub_fe2_if.dat;
14: o_g2_jb.x <= i_sub_fe2_if.dat;
17: o_g2_jb.z <= i_sub_fe2_if.dat;
18: o_g2_jb.z <= i_sub_fe2_if.dat;
19: o_g2_jb.y <= i_sub_fe2_if.dat;
22: o_g2_jb.y <= i_sub_fe2_if.dat;
25: t[3] <= i_sub_fe2_if.dat;
27: t[6] <= i_sub_fe2_if.dat;
28: t[6] <= i_sub_fe2_if.dat;
30: t[6] <= i_sub_fe2_if.dat;
default: o_err <= 1;
endcase
end
// Check any results from add
if (i_add_fe2_if.val && i_add_fe2_if.rdy) begin
eq_val[i_add_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_add_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
2: t[5] <= i_add_fe2_if.dat;
3: t[5] <= i_add_fe2_if.dat;
6: t[3] <= i_add_fe2_if.dat;
10: t[3] <= i_add_fe2_if.dat;
11: t[6] <= i_add_fe2_if.dat;
15: o_g2_jb.z <= i_add_fe2_if.dat;
24: t[3] <= i_add_fe2_if.dat;
32: t[0] <= i_add_fe2_if.dat;
default: o_err <= 1;
endcase
end
// Check any results from fe multiplier
if (i_mul_fe_if.val && i_mul_fe_if.rdy) begin
eq_val[i_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]] <= 1;
case(i_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT]) inside
33: t[0][0] <= i_mul_fe_if.dat;
34: t[0][1] <= i_mul_fe_if.dat;
35: t[3][0] <= i_mul_fe_if.dat;
36: t[3][1] <= i_mul_fe_if.dat;
default: o_err <= 1;
endcase
end
// Issue new multiplies
if (~eq_wait[0] && i_val) begin
fe2_multiply(0, i_g2_jb.z, i_g2_jb.z);
end else
if (~eq_wait[1] && i_val) begin
fe2_multiply(1, i_g2_jb.x, i_g2_jb.x);
end else
if (~eq_wait[4] && i_val) begin
fe2_multiply(4, i_g2_jb.y, i_g2_jb.y);
end else
if (~eq_wait[5] && eq_val[4]) begin
fe2_multiply(5, t[1], t[1]);
end else
if (~eq_wait[7] && eq_val[6]) begin
fe2_multiply(7, t[3], t[3]);
end else
if (~eq_wait[12] && eq_val[3]) begin
fe2_multiply(12, t[5], t[5]);
end else
if (~eq_wait[16] && eq_val[15]) begin
fe2_multiply(16, o_g2_jb.z, o_g2_jb.z);
end else
if (~eq_wait[20] && eq_val[19] && eq_val[2]) begin
fe2_multiply(20, o_g2_jb.y, t[5]);
end else
if (~eq_wait[21] && eq_wait[9]) begin
fe2_multiply(21, 8, t[2]);
end else
if (~eq_wait[23] && eq_val[0] && eq_val[2] && eq_wait[14]) begin
fe2_multiply(23, t[5], zsquared);
end else
if (~eq_wait[26] && eq_val[11]) begin
fe2_multiply(26, t[6], t[6]);
end else
if (~eq_wait[29] && eq_wait[17] && eq_val[4] && eq_wait[5] && eq_wait[6]) begin
fe2_multiply(29, 4, t[1]);
end else
if (~eq_wait[31] && eq_val[0]) begin
fe2_multiply(31, o_g2_jb.z, zsquared);
end
// Issue new adds
if (~eq_wait[2] && eq_val[1]) begin
fe2_addition(2, t[0], t[0]);
end else
if (~eq_wait[3] && eq_val[2]) begin
fe2_addition(3, t[5], t[0]);
end else
if (~eq_wait[6] && eq_val[4]) begin
fe2_addition(6, i_g2_jb.x, t[1]);
end else
if (~eq_wait[10] && eq_val[9]) begin
fe2_addition(10, t[3], t[3]);
end else
if (~eq_wait[11] && eq_val[3]) begin
fe2_addition(11, i_g2_jb.x, t[5]);
end else
if (~eq_wait[15] && i_val) begin
fe2_addition(15, i_g2_jb.z, i_g2_jb.y);
end else
if (~eq_wait[24] && eq_val[23]) begin
fe2_addition(24, t[3], t[3]);
end else
if (~eq_wait[32] && eq_val[31]) begin
fe2_addition(32, t[0], t[0]);
end
// Issue new sub
if (~eq_wait[8] && eq_val[7] && eq_val[1]) begin
fe2_subtraction(8, t[3], t[0]);
end else
if (~eq_wait[9] && eq_val[8] && eq_val[5]) begin
fe2_subtraction(9, t[3], t[2]);
end else
if (~eq_wait[13] && eq_val[12] && eq_val[10]) begin
fe2_subtraction(13, t[5], t[3]);
end else
if (~eq_wait[14] && eq_val[13]) begin
fe2_subtraction(14, o_g2_jb.x, t[3]);
end else
if (~eq_wait[17] && eq_val[15] && eq_val[4]) begin
fe2_subtraction(17, o_g2_jb.z, t[1]);
end else
if (~eq_wait[18] && eq_val[17] && eq_val[0]) begin
fe2_subtraction(18, o_g2_jb.z, zsquared);
end else
if (~eq_wait[19] && eq_val[14] && eq_val[10]) begin
fe2_subtraction(19, t[3], o_g2_jb.x);
end else
if (~eq_wait[22] && eq_val[20] && eq_val[21]) begin
fe2_subtraction(22, o_g2_jb.y, t[2]);
end else
if (~eq_wait[25] && eq_val[24]) begin
fe2_subtraction(25, 0, t[3]);
end else
if (~eq_wait[27] && eq_val[26] && eq_val[1]) begin
fe2_subtraction(27, t[6], t[0]);
end else
if (~eq_wait[28] && eq_val[27] && eq_val[12]) begin
fe2_subtraction(28, t[6], t[5]);
end else
if (~eq_wait[30] && eq_val[29] && eq_val[28]) begin
fe2_subtraction(30, t[6], t[1]);
end
// Issue final fe multiplications
if (~eq_wait[33] && eq_val[31]) begin
fe_multiply(33, t[0][0], i_g1_af.y);
end else
if (~eq_wait[34] && eq_val[31]) begin
fe_multiply(34, t[0][1], i_g1_af.y);
end else
if (~eq_wait[35] && eq_val[25]) begin
fe_multiply(35, t[3][0], i_g1_af.x);
end else
if (~eq_wait[36] && eq_val[25]) begin
fe_multiply(36, t[3][1], i_g1_af.x);
end
end
end
// Task for subtractions
task fe2_subtraction(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_sub_fe2_if.val || (o_sub_fe2_if.val && o_sub_fe2_if.rdy)) begin
o_sub_fe2_if.val <= 1;
o_sub_fe2_if.dat[0 +: $bits(FE2_TYPE)] <= a;
o_sub_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE2_TYPE)] <= b;
o_sub_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for addition
task fe2_addition(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_add_fe2_if.val || (o_add_fe2_if.val && o_add_fe2_if.rdy)) begin
o_add_fe2_if.val <= 1;
o_add_fe2_if.dat[0 +: $bits(FE2_TYPE)] <= a;
o_add_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE2_TYPE)] <= b;
o_add_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for using mult
task fe2_multiply(input int unsigned ctl, input FE2_TYPE a, b);
if (~o_mul_fe2_if.val || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.val <= 1;
o_mul_fe2_if.dat[0 +: $bits(FE2_TYPE)] <= a;
o_mul_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE2_TYPE)] <= b;
o_mul_fe2_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for using mult (fe)
task fe_multiply(input int unsigned ctl, input FE_TYPE a, b);
if (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy)) begin
o_mul_fe_if.val <= 1;
o_mul_fe_if.dat[0 +: $bits(FE_TYPE)] <= a;
o_mul_fe_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b;
o_mul_fe_if.ctl[OVR_WRT_BIT +: NUM_OVR_WRT_BIT] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
endmodule

View File

@ -27,7 +27,8 @@ package bls12_381_pkg;
fe_t Gx = 381'h17F1D3A73197D7942695638C4FA9AC0FC3688C4F9774B905A14E3A3F171BAC586C55E83FF97A1AEFFB3AF00ADB22C6BB;
fe_t Gy = 381'h08B3F481E3AAA0F1A09E30ED741D8AE4FCF5E095D5D00AF600DB18CB2C04B3EDD03CC744A2888AE40CAA232946C5E7E1;
logic [63:0] ATE_X = 64'hd201000000010000;
localparam [63:0] ATE_X = 64'hd201000000010000;
localparam ATE_X_START = 63;
typedef enum logic [2:0] {
SCALAR = 0,
@ -61,27 +62,43 @@ package bls12_381_pkg;
fe_t x;
} jb_point_t;
// Affine points
typedef struct packed {
fe_t y;
fe_t x;
} af_point_t;
typedef fe_t [1:0] fe2_t;
typedef fe2_t [2:0] fe6_t;
typedef fe6_t [1:0] fe12_t;
fe2_t G2x = {381'd3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
381'd352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160};
fe2_t G2x = {381'h13e02b6052719f607dacd3a088274f65596bd0d09920b61ab5da61bbdc7f5049334cf11213945d57e5ac7d055d042b7e,
381'h024aa2b2f08f0a91260805272dc51051c6e47ad4fa403b02b4510b647ae3d1770bac0326a805bbefd48056c8c121bdb8};
fe2_t G2y = {381'd927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582,
381'd1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905};
fe2_t G2y = {381'h606c4a02ea734cc32acd2b02bc28b99cb3e287e85a763af267492ab572e99ab3f370d275cec1da1aaa9075ff05f79be,
381'hce5d527727d6e118cc9cdc6da2e351aadfd9baa8cbdd3a76d429a695160d12c923ac9cc3baca289e193548608b82801};
fe2_t FE2_one = {381'd0, 381'd1};
fe2_t FE2_one = {381'd0, 381'd1};
fe2_t FE2_zero = {381'd0, 381'd0};
fe6_t FE6_one = {FE2_zero, FE2_zero, FE2_one};
fe6_t FE6_zero = {FE2_zero, FE2_zero, FE2_zero};
fe12_t FE12_one = {FE6_zero, FE6_one};
fe12_t FE12_zero = {FE6_zero, FE6_zero};
jb_point_t g_point = '{x:Gx, y:Gy, z:381'd1};
// Jacobian coordinates for Fp^2 elements
// Jacobian coordinates for Fp^2, Fp^12 elements
typedef struct packed {
fe2_t z;
fe2_t y;
fe2_t x;
} fp2_jb_point_t;
typedef struct packed {
fe2_t y;
fe2_t x;
} fp2_af_point_t;
typedef struct packed {
fe12_t z;
fe12_t y;
@ -477,23 +494,213 @@ package bls12_381_pkg;
fe12_mul[0] = fe6_add(bb, aa); // 8. fe6_mul[0] = add(add_i0, bb) [0, 1, 7]
endfunction
function fp12_jb_point_t untiwst(fp2_jb_point_t P);
// This performs the miller loop
// P is an affine Fp point in G1
// Q is an affine Fp^2 point in G2 on the twisted curve
// f is a Fp^12 element, the result of the miller loop
task miller_loop(input af_point_t P, input fp2_af_point_t Q, output fe12_t f);
fp2_jb_point_t R;
fe12_t lv_d, lv_a, f_sq;
logic found_one = 0;
f = FE12_one;
R.x = Q.x;
R.y = Q.y;
R.z = 1;
for (int i = ATE_X_START; i >= 0; i--) begin
if (found_one == 0) begin
found_one = ATE_X[i];
continue;
end
miller_double_step(R, P, lv_d);
if (ATE_X[i] == 1) begin
miller_add_step(R, Q, P, lv_a);
lv_d = fe12_mul(lv_d, lv_a); // Very sparse multiplication
end
f_sq = fe12_mul(f, f); // Full multiplication
f = fe12_mul(f_sq, lv_d); // Sparse multiplication
end
f[1] = fe6_sub(0, f[1]);
endtask
// This performs both the line evaluation and the doubling
// Returns a sparse f12 element
task automatic miller_double_step(ref fp2_jb_point_t R, input af_point_t P, ref fe12_t f);
fe2_t t0, t1, t2, t3, t4, t5, t6, zsquared;
zsquared = fe2_mul(R.z, R.z); // 0. [R.val]
t0 = fe2_mul(R.x, R.x); // 1. [R.val]
t4 = fe2_add(t0, t0); // 2. [1]
t4 = fe2_add(t4, t0); // 3. [2]
t1 = fe2_mul(R.y, R.y); // 4. [R.val]
t2 = fe2_mul(t1, t1); // 5. [4]
t3 = fe2_add(R.x, t1); // 6. [4]
t3 = fe2_mul(t3, t3); // 7. [6]
t3 = fe2_sub(t3, t0); // 8. [7, 1]
t3 = fe2_sub(t3, t2); // 9. [8, 5]
t3 = fe2_add(t3, t3); // 10. [9]
t6 = fe2_add(R.x, t4); // 11. [3]
t5 = fe2_mul(t4, t4); // 12. [3]
R.x = fe2_sub(t5, t3); // 13. [12, 10]
R.x = fe2_sub(R.x, t3); // 14. [13]
R.z = fe2_add(R.z, R.y); // 15. [R.val ]
R.z = fe2_mul(R.z, R.z); // 16. [15]
R.z = fe2_sub(R.z, t1); // 17. [15, 4]
R.z = fe2_sub(R.z, zsquared); // 18. [17, 0]
R.y = fe2_sub(t3, R.x); // 19. [14, 10]
R.y = fe2_mul(R.y, t4); // 20. [19, 2],
t2 = fe2_mul(t2, 8); // 21. [9 wait]
R.y = fe2_sub(R.y, t2); // 22. [20, 21]
t3 = fe2_mul(t4, zsquared); // 23. [0, 2, wait 14]
t3 = fe2_add(t3, t3); // 24. [23]
t3 = fe2_sub(0, t3); // 25. [24]
t6 = fe2_mul(t6, t6); // 26. [11]
t6 = fe2_sub(t6, t0); // 27. [26, 1]
t6 = fe2_sub(t6, t5); // 28. [27, 12]
t1 = fe2_mul(4, t1); // 29. [wait 17, 4, wait 5, wait 6]
t6 = fe2_sub(t6, t1); // 30. [29, 28]
t0 = fe2_mul(R.z, zsquared); // 31. [0]
t0 = fe2_add(t0, t0); // 32. [31]
t0[0] = fe_mul(t0[0], P.y); // 33. [P val, 31]
t0[1] = fe_mul(t0[1], P.y); // 34. [P val, 31]
t3[0] = fe_mul(t3[0], P.x); // 35. [P val, 25]
t3[1] = fe_mul(t3[1], P.x); // 36. [P val, 25]
f = {{FE2_zero, t0, FE2_zero}, {FE2_zero, t3, t6}}; // [33, 34, 35, 36, 30]
endtask
// This performs both the line evaluation and the addition
task automatic miller_add_step(ref fp2_jb_point_t R, input fp2_af_point_t Q, input af_point_t P, ref fe12_t f);
fe2_t zsquared, ysquared, t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10;
zsquared = fe2_mul(R.z, R.z); // 0. [R.val]
ysquared = fe2_mul(Q.y, Q.y); // 1. [Q.val]
t0 = fe2_mul(zsquared, Q.x); // 2. [0]
t1 = fe2_add(R.z, Q.y); // 3. [R.val]
t1 = fe2_mul(t1, t1); // 4. [3]
t1 = fe2_sub(t1, ysquared); // 5. [4, 1]
t1 = fe2_sub(t1, zsquared); // 6. [5, 0]
t1 = fe2_mul(t1, zsquared); // 7. [6]
t2 = fe2_sub(t0, R.x); // 8. [2, R.val]
t3 = fe2_mul(t2, t2); // 9. [8]
t4 = fe2_mul(t3, 4); // 10. [9]
t5 = fe2_mul(t4, t2); // 11. [10, 8]
t6 = fe2_sub(t1, R.y); // 12. [3]
t6 = fe2_sub(t6, R.y); // 13. [12]
t9 = fe2_mul(t6, Q.x); // 14. [13]
t7 = fe2_mul(t4, R.x); // 15. [10]
R.x = fe2_mul(t6, t6); // 16. [13]
R.x = fe2_sub(R.x, t5); // 17. [11, 16]
R.x = fe2_sub(R.x, t7); // 18. [17, 10]
R.x = fe2_sub(R.x, t7); // 19. [18, 15]
R.z = fe2_add(R.z, t2); // 20. [8]
R.z = fe2_mul(R.z, R.z); // 21. [20]
R.z = fe2_sub(R.z, zsquared); // 22. [21, 0]
R.z = fe2_sub(R.z, t3); // 23. [22, 9]
zsquared = fe2_mul(R.z, R.z);// 24. [23]
t10 = fe2_add(Q.y, R.z); // 25.[23]
t8 = fe2_sub(t7, R.x); // 26. [19, 15]
t8 = fe2_mul(t8, t6); // 27. [26, 13]
t0 = fe2_mul(R.y, t5); // 28. [11]
t0 = fe2_add(t0, t0); // 29. [28]
R.y = fe2_sub(t8, t0); // 30. [29, 27]
t10 = fe2_mul(t10, t10); // 31. [23]
t10 = fe2_sub(t10, ysquared); // 32. [31, 1]
t10 = fe2_sub(t10, zsquared); // 33. [32, 24]
t9 = fe2_add(t9, t9); // 34. [14]
t9 = fe2_sub(t9, t10); // 35. [34, 33]
t10 = fe2_add(R.z, R.z); // 36. [wait 35, 23]
t6 = fe2_sub(0, t6); // 37. [wait 27]
t1 = fe2_add(t6, t6); // 38. [37]
t10[0] = fe_mul(t10[0], P.y); // 39. [36]
t10[1] = fe_mul(t10[1], P.y); // 40. [36]
t1[0] = fe_mul(t1[0], P.x); // 41. [38]
t1[1] = fe_mul(t1[1], P.x); // 42. [38]
f = {{FE2_zero, t10, FE2_zero}, {FE2_zero, t1, t9}};
endtask
// Calculates the final exponent used in ate pairing
/*task automatic final_exponent(ref fe12_t f);
f = fe12_sub(0, f); // TODO can remove this?
endtask*/
// Sparse multiplication by coefficients 0,1,4
function fe12_t f12_sparse_mul_014(fe12_t f, fe2_t c0, c1, c4);
fe6_t aa, bb;
fe2_t t;
aa = fe6_mul(f[0], {FE2_zero, c1, c0}); // TODO implement sparse fp6
bb = fe6_mul(f[1], {FE2_zero, c4, FE2_zero}); // TODO implement sparse fp6
t = fe2_add(c1, c4);
f[1] = fe6_add(f[1], f[0]);
f[1] = fe6_mul(f[1], {FE2_zero, t, c0});
f[1] = fe6_sub(f[1], aa);
f[1] = fe6_sub(f[1], bb);
f[0] = fe6_mul_by_nonresidue(bb);
f[0] = fe6_add(f[0], aa);
return f;
endfunction
function jb_point_t to_affine(jb_point_t p);
function af_point_t to_affine(jb_point_t p);
fe_t z_;
z_ = fe_mul(p.z, p.z);
to_affine.z = 1;
to_affine.x = fe_mul(p.x, fe_inv(z_));
z_ = fe_mul(z_, p.z);
to_affine.y = fe_mul(p.y, fe_inv(z_));
endfunction
function fp2_jb_point_t fp2_to_affine(fp2_jb_point_t p);
function fp2_af_point_t fp2_to_affine(fp2_jb_point_t p);
fe2_t z_;
z_ = fe2_mul(p.z, p.z);
fp2_to_affine.z = FE2_one;
fp2_to_affine.x = fe2_mul(p.x, fe2_inv(z_));
z_ = fe2_mul(z_, p.z);
fp2_to_affine.y = fe2_mul(p.y, fe2_inv(z_));

View File

@ -0,0 +1,129 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module bls12_381_pairing_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
parameter type FE_TYPE = bls12_381_pkg::fe_t;
parameter type FE2_TYPE = bls12_381_pkg::fe2_t;
parameter type FE12_TYPE = bls12_381_pkg::fe12_t;
parameter P = bls12_381_pkg::P;
localparam CTL_BITS = 32;
localparam CLK_PERIOD = 100;
logic clk, rst;
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
if_axi_stream #(.DAT_BYTS(($bits(af_point_t) + $bits(fp2_af_point_t)+7)/8), .CTL_BITS(CTL_BITS)) in_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(fe12_t)+7)/8), .CTL_BITS(CTL_BITS)) out_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_in_if(clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_out_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) add_fe2_in_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) add_fe2_out_if (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe2_in_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe2_out_if (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_in_if(clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_out_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE12_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe12_in_if(clk);
if_axi_stream #(.DAT_BITS($bits(FE12_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe12_out_if(clk);
bls12_381_pairing #(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.FE12_TYPE ( FE12_TYPE ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 0 )
)
bls12_381_pairing (
.i_clk ( clk ),
.i_rst ( rst ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.i_g1_af ( in_if.dat[0 +: $bits(af_point_t)] ),
.i_g2_af ( in_if.dat[$bits(af_point_t) +: $bits(fp2_af_point_t)] ),
.o_val ( out_if.val ),
.i_rdy ( out_if.rdy ),
.o_fe12 ( out_if.dat ),
.o_mul_fe2_if ( mul_fe2_in_if ),
.i_mul_fe2_if ( mul_fe2_out_if ),
.o_add_fe2_if ( add_fe2_in_if ),
.i_add_fe2_if ( add_fe2_out_if ),
.o_sub_fe2_if ( sub_fe2_in_if ),
.i_sub_fe2_if ( sub_fe2_out_if ),
.o_mul_fe12_if ( mul_fe12_in_if ),
.i_mul_fe12_if ( mul_fe12_out_if ),
.o_mul_fe_if ( mul_fe_in_if ),
.i_mul_fe_if ( mul_fe_out_if )
);
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
end
initial begin
af_point_t P;
fp2_af_point_t Q;
fe12_t f, f_exp;
in_if.reset_source();
out_if.rdy = 0;
#100ns;
P.x = Gx;
P.y = Gy;
Q.x = G2x;
Q.y = G2y;
f = FE12_zero;
f_exp = {381'h1562633d4f2387ff79a0f625a6989072296a946ca6bbfa3fef879defde15ed96d205b2eebb454f48fb76fa8a845bcba7,
381'h1868172fbbeb861d69c6c10f315c273d08312812c643dbf60588d0de3d2c4b3e9b21acd402f7ddee53f1c4797646ba96,
381'h07508024863ec263bded120e45deb29c1f1303a056b279e116cb5fdb03013db19f81e78fa2b2b409cb2ce8e3ba96f4e6,
381'h1431225e128c5e2bfafb9eba23746150907688583f52e07fcde4cc93452b0c2bcd0f0893b48a696c403c6980d0940741,
381'h159bfbbdc31bb5cb0082c59e5f744773335ef1fdddb8ed86a1c23f61f18800b647ff7dae335fb9ab5fcf2188cb64d72d,
381'h05d928cb508feeb3329e51aa0bec4f33ba865a22da5a4e97eb31b78c0150c0c6134f0f94bd0154b28430ee4c6052e82b,
381'h087d1320fe5bad5c2d8e12c49e6aff41a0b80e1497bbe85682e22ed853f256041bdf97ef02bdb5d80a5f9bc31d85f25e,
381'h159ef660e2d84185f55c0ccae1dd7f8f71b12c0beb7a431fede9e62794d9154e9a0ce4715f64b032492459076224c99b,
381'h0cbc592a19a3f60c9938676b257b9c01ed9d708f9428b29e272a811d13d734485970d9d3f1c097b12bfa3d1678096b1d,
381'h0751a051e0beb4a0e2351a7527d813b371e189056307d718a446e4016a3df787568a842f3401768dc03b966bd1db90ac,
381'h0e760e96f911ae38a6042da82d7b0e30787864e725e9d5462d224c91c4497104d838d566d894564bc19e09d8af706c3f,
381'h05194f5785436c8debf0eb2bab4c6ef3de7dc0633c85769173777b782bf897fa45025fd03e7be941123c4ee19910e62e};
miller_loop(P, Q, f);
assert(f == f_exp) else $fatal(1, "Miller loop did not match result");
print_fe12(f);
#1us $finish();
end
endmodule