From 5f74545e778cfb2383eed2a471721e84a120045a Mon Sep 17 00:00:00 2001 From: bsdevlin Date: Wed, 14 Aug 2019 19:46:29 +0800 Subject: [PATCH] Update pairing engine to calculate full ate pairing, along with testbench with several cases, will add more self checking and optimizations next. --- .../bls12_381/bls12_381_fe12_inv_wrapper.sv | 2 +- .../src/rtl/bls12_381/bls12_381_pairing.sv | 205 +++++++++------ .../bls12_381/bls12_381_pairing_wrapper.sv | 237 ++++++++++++------ zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv | 22 +- zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv | 34 ++- zcash_fpga/src/tb/bls12_381_pairing_tb.sv | 100 +++++--- 6 files changed, 377 insertions(+), 223 deletions(-) diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_inv_wrapper.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_inv_wrapper.sv index 0e87f9b..ec90ca7 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_inv_wrapper.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_fe12_inv_wrapper.sv @@ -84,7 +84,7 @@ ec_fe2_inv_s( .o_inv_fe2_if ( inv_fe2_i_if ), .i_inv_fe2_if ( inv_fe2_o_if ), .o_inv_fe_if ( inv_fe_o_if ), - .i_inv_fe_if ( inv_fe_i_if ), // + .i_inv_fe_if ( inv_fe_i_if ), .o_mul_fe_if ( mul_fe_o_if[0] ), .i_mul_fe_if ( mul_fe_i_if[0] ), .o_add_fe_if ( add_fe_o_if[0] ), diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_pairing.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_pairing.sv index e86b419..9fa6440 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_pairing.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_pairing.sv @@ -33,8 +33,10 @@ module bls12_381_pairing parameter type G2_FP_AF_TYPE = fp2_af_point_t, parameter type G2_FP_JB_TYPE = fp2_jb_point_t, parameter CTL_BITS = 32, - parameter OVR_WRT_BIT = 8, // We override 16 bits from here - parameter SQ_BIT = OVR_WRT_BIT + 2 // We can re-use this bit as it is not used by multiplier + parameter OVR_WRT_BIT = 8, // We override 16 bits from here for internal control + parameter SQ_BIT = OVR_WRT_BIT + 16, // We can re-use this bit as it is not used by multiplier + parameter FMAP_BIT = OVR_WRT_BIT + 17, // Bit used to store power for fmap operation + parameter POW_BIT = OVR_WRT_BIT + 19 // These bits hold the value for the exponentiation (need $bits(bls12_381_pkg::ATE_X), 64 bits) )( input i_clk, i_rst, // Inputs @@ -57,24 +59,33 @@ module bls12_381_pairing if_axi_stream.sink i_sub_fe_if, // Interface to FE12_TYPE multiplier (mod P) if_axi_stream.source o_mul_fe12_if, - if_axi_stream.sink i_mul_fe12_if + if_axi_stream.sink i_mul_fe12_if, + // Interface to FE12_TYPE exponentiation (mod P) + if_axi_stream.source o_pow_fe12_if, + if_axi_stream.sink i_pow_fe12_if, + // Interface to FE12_TYPE frobenius map (mod P) + if_axi_stream.source o_fmap_fe12_if, + if_axi_stream.sink i_fmap_fe12_if, + // Interface to FE12_TYPE inversion (mod P) + if_axi_stream.source o_inv_fe12_if, + if_axi_stream.sink i_inv_fe12_if ); -FE_TYPE temp_a, temp_b; -always_comb begin - temp_a = o_mul_fe12_if.dat[0 +: $bits(FE_TYPE)]; - temp_b = o_mul_fe12_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)]; -end if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if [1:0] (i_clk); if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if [1:0] (i_clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe12_o_if [1:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe12_i_if [1:0] (i_clk); + if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if [1:0] (i_clk); if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if [1:0] (i_clk); if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if [1:0] (i_clk); if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if [1:0] (i_clk); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [1:0] (i_clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [1:0] (i_clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [2:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [2:0] (i_clk); + +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) final_exp_fe12_o_if (i_clk); logic dbl_i_val, dbl_o_rdy; @@ -99,17 +110,24 @@ logic f_val; logic [3:0] out_cnt; always_comb begin - dbl_f12_o_if.rdy = f_val && (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4)); // As this is a sparse f12 using full f12_mul - add_f12_o_if.rdy = f_val && (~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4)); // As this is a sparse f12 using full f12_mul + dbl_f12_o_if.rdy = f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4)); // As this is a sparse f12 using full f12_mul + add_f12_o_if.rdy = f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4)); // As this is a sparse f12 using full f12_mul + + final_exp_fe12_o_if.dat = f[0][0][0]; + final_exp_fe12_o_if.err = 0; + final_exp_fe12_o_if.ctl = 0; + final_exp_fe12_o_if.mod = 0; end always_ff @ (posedge i_clk) begin if (i_rst) begin - o_fe12_if.reset_source(); + final_exp_fe12_o_if.val <= 0; + final_exp_fe12_o_if.sop <= 0; + final_exp_fe12_o_if.eop <= 0; g1_af_i <= 0; g2_r_jb_i <= 0; - i_mul_fe12_if.rdy <= 0; - o_mul_fe12_if.reset_source(); + mul_fe12_i_if[0].rdy <= 0; + mul_fe12_o_if[0].reset_source(); pair_state <= IDLE; add_i_val <= 0; dbl_i_val <= 0; @@ -124,15 +142,15 @@ always_ff @ (posedge i_clk) begin out_cnt <= 0; end else begin - if (o_fe12_if.rdy) o_fe12_if.val <= 0; if (add_o_rdy) add_i_val <= 0; if (dbl_o_rdy) dbl_i_val <= 0; - if (o_mul_fe12_if.rdy) o_mul_fe12_if.val <= 0; + if (mul_fe12_o_if[0].rdy) mul_fe12_o_if[0].val <= 0; + if (final_exp_fe12_o_if.rdy) final_exp_fe12_o_if.val <= 0; - i_mul_fe12_if.rdy <= 1; - if (i_mul_fe12_if.val && i_mul_fe12_if.rdy) begin - f <= {i_mul_fe12_if.dat, f[1], f[0][2:1], f[0][0][1]}; - f_val <= i_mul_fe12_if.eop; + mul_fe12_i_if[0].rdy <= 1; + if (mul_fe12_i_if[0].val && mul_fe12_i_if[0].rdy) begin + f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]}; + f_val <= mul_fe12_i_if[0].eop; end case(pair_state) @@ -177,15 +195,15 @@ always_ff @ (posedge i_clk) begin // Also three multiplications case(miller_mult_cnt) 0: begin // Square first - if(~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)) begin - o_mul_fe12_if.val <= 1; - o_mul_fe12_if.sop <= out_cnt == 0; - o_mul_fe12_if.eop <= out_cnt == 11; - o_mul_fe12_if.dat <= {f[0][0][0], f[0][0][0]}; //square - o_mul_fe12_if.ctl <= miller_mult_cnt; - o_mul_fe12_if.ctl[SQ_BIT] <= 1; + if(~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) begin + mul_fe12_o_if[0].val <= 1; + mul_fe12_o_if[0].sop <= out_cnt == 0; + mul_fe12_o_if[0].eop <= out_cnt == 11; + mul_fe12_o_if[0].dat <= {f[0][0][0], f[0][0][0]}; //square + mul_fe12_o_if[0].ctl <= miller_mult_cnt; + mul_fe12_o_if[0].ctl[SQ_BIT] <= 1; out_cnt <= out_cnt + 1; - f <= {i_mul_fe12_if.dat, f[1], f[0][2:1], f[0][0][1]}; + f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]}; if (out_cnt == 11) begin out_cnt <= 0; miller_mult_cnt <= 1; @@ -193,19 +211,19 @@ always_ff @ (posedge i_clk) begin end end 1: begin // Multiply by double result - if(~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)) begin + if(~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) begin if ((dbl_f12_o_if.val && f_val) || (out_cnt/2 == 5)) begin - o_mul_fe12_if.sop <= out_cnt == 0; - o_mul_fe12_if.eop <= out_cnt == 11; - o_mul_fe12_if.val <= 1; + mul_fe12_o_if[0].sop <= out_cnt == 0; + mul_fe12_o_if[0].eop <= out_cnt == 11; + mul_fe12_o_if[0].val <= 1; case (out_cnt/2) inside - 0,1,4: o_mul_fe12_if.dat <= {dbl_f12_o_if.dat, f[0][0][0]}; - default: o_mul_fe12_if.dat <= {381'd0, f[0][0][0]}; + 0,1,4: mul_fe12_o_if[0].dat <= {dbl_f12_o_if.dat, f[0][0][0]}; + default: mul_fe12_o_if[0].dat <= {381'd0, f[0][0][0]}; endcase out_cnt <= out_cnt + 1; - f <= {i_mul_fe12_if.dat, f[1], f[0][2:1], f[0][0][1]}; - o_mul_fe12_if.ctl <= miller_mult_cnt; - o_mul_fe12_if.ctl[SQ_BIT] <= 0; + f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]}; + mul_fe12_o_if[0].ctl <= miller_mult_cnt; + mul_fe12_o_if[0].ctl[SQ_BIT] <= 0; if (out_cnt == 11) begin f_val <= 0; out_cnt <= 0; @@ -215,20 +233,20 @@ always_ff @ (posedge i_clk) begin end end 2: begin // Multiply by add result - if(~o_mul_fe12_if.val || (o_mul_fe12_if.val && o_mul_fe12_if.rdy)) begin + if(~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) begin if ((add_f12_o_if.val && f_val) || (out_cnt/2 == 5)) begin g2_r_jb_i <= add_g2_o; - o_mul_fe12_if.ctl <= miller_mult_cnt; - o_mul_fe12_if.ctl[SQ_BIT] <= 0; - o_mul_fe12_if.sop <= out_cnt == 0; - o_mul_fe12_if.eop <= out_cnt == 11; - o_mul_fe12_if.val <= 1; + mul_fe12_o_if[0].ctl <= miller_mult_cnt; + mul_fe12_o_if[0].ctl[SQ_BIT] <= 0; + mul_fe12_o_if[0].sop <= out_cnt == 0; + mul_fe12_o_if[0].eop <= out_cnt == 11; + mul_fe12_o_if[0].val <= 1; out_cnt <= out_cnt + 1; case (out_cnt/2) inside - 0,1,4: o_mul_fe12_if.dat <= {add_f12_o_if.dat, f[0][0][0]}; - default: o_mul_fe12_if.dat <= {381'd0, f[0][0][0]}; - endcase - f <= {i_mul_fe12_if.dat, f[1], f[0][2:1], f[0][0][1]}; + 0,1,4: mul_fe12_o_if[0].dat <= {add_f12_o_if.dat, f[0][0][0]}; + default: mul_fe12_o_if[0].dat <= {381'd0, f[0][0][0]}; + endcase + f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]}; if (out_cnt == 11) begin f_val <= 0; out_cnt <= 0; @@ -254,13 +272,12 @@ always_ff @ (posedge i_clk) begin end FINAL_EXP: begin - if (~o_fe12_if.val || (o_fe12_if.val && o_fe12_if.rdy)) begin - o_fe12_if.dat <= f[0][0][0]; - o_fe12_if.val <= 1; - o_fe12_if.sop <= out_cnt == 0; - o_fe12_if.eop <= out_cnt == 11; + if (~final_exp_fe12_o_if.val || (final_exp_fe12_o_if.val && final_exp_fe12_o_if.rdy)) begin + final_exp_fe12_o_if.val <= 1; + final_exp_fe12_o_if.sop <= out_cnt == 0; + final_exp_fe12_o_if.eop <= out_cnt == 11; out_cnt <= out_cnt + 1; - f <= {i_mul_fe12_if.dat, f[1], f[0][2:1], f[0][0][1]}; + if (final_exp_fe12_o_if.val) f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]}; if (out_cnt == 11) begin pair_state <= IDLE; end @@ -289,12 +306,12 @@ bls12_381_pairing_miller_dbl ( .o_g2_jb ( dbl_g2_o ), .o_mul_fe2_if ( mul_fe2_i_if[0] ), .i_mul_fe2_if ( mul_fe2_o_if[0] ), - .o_add_fe_if ( add_fe_i_if[0] ), - .i_add_fe_if ( add_fe_o_if[0] ), - .o_sub_fe_if ( sub_fe_i_if[0] ), - .i_sub_fe_if ( sub_fe_o_if[0] ), - .o_mul_fe_if ( mul_fe_i_if[0] ), - .i_mul_fe_if ( mul_fe_o_if[0] ) + .o_add_fe_if ( add_fe_i_if[0] ), + .i_add_fe_if ( add_fe_o_if[0] ), + .o_sub_fe_if ( sub_fe_i_if[0] ), + .i_sub_fe_if ( sub_fe_o_if[0] ), + .o_mul_fe_if ( mul_fe_i_if[0] ), + .i_mul_fe_if ( mul_fe_o_if[0] ) ); bls12_381_pairing_miller_add #( @@ -313,23 +330,63 @@ bls12_381_pairing_miller_add ( .i_g1_af ( g1_af_i ), .i_g2_jb ( dbl_g2_o ), .i_g2_q_af ( g2_af_i ), - .o_res_fe12_sparse_if ( add_f12_o_if ), - .o_g2_jb ( add_g2_o ), + .o_res_fe12_sparse_if ( add_f12_o_if ), + .o_g2_jb ( add_g2_o ), .o_mul_fe2_if ( mul_fe2_i_if[1] ), .i_mul_fe2_if ( mul_fe2_o_if[1] ), - .o_add_fe_if ( add_fe_i_if[1] ), - .i_add_fe_if ( add_fe_o_if[1] ), - .o_sub_fe_if ( sub_fe_i_if[1] ), - .i_sub_fe_if ( sub_fe_o_if[1] ), - .o_mul_fe_if ( mul_fe_i_if[1] ), - .i_mul_fe_if ( mul_fe_o_if[1] ) + .o_add_fe_if ( add_fe_i_if[1] ), + .i_add_fe_if ( add_fe_o_if[1] ), + .o_sub_fe_if ( sub_fe_i_if[1] ), + .i_sub_fe_if ( sub_fe_o_if[1] ), + .o_mul_fe_if ( mul_fe_i_if[1] ), + .i_mul_fe_if ( mul_fe_o_if[1] ) +); + +bls12_381_final_exponent #( + .OVR_WRT_BIT ( OVR_WRT_BIT ), // Control can overlap + .FMAP_BIT ( FMAP_BIT ), + .POW_BIT ( POW_BIT ), + .SQ_BIT ( SQ_BIT ) +) +bls12_381_final_exponent ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .o_mul_fe12_if ( mul_fe12_o_if[1] ), + .i_mul_fe12_if ( mul_fe12_i_if[1] ), + .o_pow_fe12_if ( o_pow_fe12_if ), + .i_pow_fe12_if ( i_pow_fe12_if ), + .o_fmap_fe12_if ( o_fmap_fe12_if ), + .i_fmap_fe12_if ( i_fmap_fe12_if ), + .o_inv_fe12_if ( o_inv_fe12_if ), + .i_inv_fe12_if ( i_inv_fe12_if ), + .o_sub_fe_if ( sub_fe_i_if[2] ), + .i_sub_fe_if ( sub_fe_o_if[2] ), + .o_final_exp_fe12_if ( o_fe12_if ), + .i_final_exp_fe12_if ( final_exp_fe12_o_if ) +); + +resource_share # ( + .NUM_IN ( 2 ), + .DAT_BITS ( 2*$bits(FE_TYPE) ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( OVR_WRT_BIT + 12 ), + .PIPELINE_IN ( 0 ), + .PIPELINE_OUT ( 0 ) +) +resource_share_fe12_mul ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .i_axi ( mul_fe12_o_if[1:0] ), + .o_res ( o_mul_fe12_if ), + .i_res ( i_mul_fe12_if ), + .o_axi ( mul_fe12_i_if[1:0] ) ); resource_share # ( .NUM_IN ( 2 ), .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 8 ), + .OVR_WRT_BIT ( OVR_WRT_BIT + 10 ), .PIPELINE_IN ( 0 ), .PIPELINE_OUT ( 0 ) ) @@ -367,7 +424,7 @@ resource_share # ( .PIPELINE_IN ( 0 ), .PIPELINE_OUT ( 0 ) ) -resource_share_fe2_add ( +resource_share_fe_add ( .i_clk ( i_clk ), .i_rst ( i_rst ), .i_axi ( add_fe_i_if[1:0] ), @@ -377,20 +434,20 @@ resource_share_fe2_add ( ); resource_share # ( - .NUM_IN ( 2 ), + .NUM_IN ( 3 ), .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), .OVR_WRT_BIT ( OVR_WRT_BIT + 8 ), .PIPELINE_IN ( 0 ), .PIPELINE_OUT ( 0 ) ) -resource_share_fe2_sub ( +resource_share_fe_sub ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_axi ( sub_fe_i_if[1:0] ), + .i_axi ( sub_fe_i_if[2:0] ), .o_res ( o_sub_fe_if ), .i_res ( i_sub_fe_if ), - .o_axi ( sub_fe_o_if[1:0] ) + .o_axi ( sub_fe_o_if[2:0] ) ); diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_pairing_wrapper.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_pairing_wrapper.sv index ea83b2f..53c794a 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_pairing_wrapper.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_pairing_wrapper.sv @@ -28,29 +28,29 @@ module bls12_381_pairing_wrapper parameter type G2_FP_AF_TYPE = fp2_af_point_t, parameter type G2_FP_JB_TYPE = fp2_jb_point_t, parameter CTL_BITS = 32, - parameter OVR_WRT_BIT = 8 // Need 32 bits for control + parameter OVR_WRT_BIT = 8 // Need 82 bits for control )( input i_clk, i_rst, // Inputs input i_val, output logic o_rdy, - input G1_FP_AF_TYPE i_g1_af, - input G2_FP_AF_TYPE i_g2_af, - if_axi_stream.source o_fe12_if, + input G1_FP_AF_TYPE i_g1_af, // G1 input point + input G2_FP_AF_TYPE i_g2_af, // G2 input point + if_axi_stream.source o_fe12_if, // Result fe12 of ate pairing // Interface to FE_TYPE multiplier (mod P) if_axi_stream.source o_mul_fe_if, if_axi_stream.sink i_mul_fe_if ); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if [1:0] (i_clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if [1:0] (i_clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if [3:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if [3:0] (i_clk); if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if [5:0] (i_clk); if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if [5:0] (i_clk); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [5:0] (i_clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [5:0] (i_clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if [6:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if [6:0] (i_clk); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if [2:0] (i_clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if [2:0] (i_clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if [3:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if [3:0] (i_clk); if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_o_if [2:0] (i_clk); if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_i_if [2:0] (i_clk); @@ -59,17 +59,27 @@ if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe6_i_if if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_o_if (i_clk); if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_i_if (i_clk); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe12_o_if (i_clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe12_i_if (i_clk); +if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe12_o_if [2:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe12_i_if [2:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) pow_fe12_o_if (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) pow_fe12_i_if (i_clk); + +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe12_o_if (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) fmap_fe12_i_if (i_clk); + +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) inv_fe12_o_if (i_clk); +if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) inv_fe12_i_if (i_clk); bls12_381_pairing #( .FE_TYPE ( FE_TYPE ), .FE2_TYPE ( FE2_TYPE ), .FE12_TYPE ( FE12_TYPE ), .CTL_BITS ( CTL_BITS ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 0 ),// 0 to 15 - .SQ_BIT ( OVR_WRT_BIT + 2 ) + .OVR_WRT_BIT ( OVR_WRT_BIT ), // 16 bits + .SQ_BIT ( OVR_WRT_BIT + 16 ), + .FMAP_BIT ( OVR_WRT_BIT + 17 ), + .POW_BIT ( OVR_WRT_BIT + 17 ) ) bls12_381_pairing ( .i_clk ( i_clk ), @@ -78,17 +88,70 @@ bls12_381_pairing ( .o_rdy ( o_rdy ), .i_g1_af ( i_g1_af ), .i_g2_af ( i_g2_af ), - .o_fe12_if ( o_fe12_if ), - .o_mul_fe2_if ( mul_fe2_o_if[1] ), - .i_mul_fe2_if ( mul_fe2_i_if[1] ), - .o_add_fe_if ( add_fe_o_if[4] ), - .i_add_fe_if ( add_fe_i_if[4] ), - .o_sub_fe_if ( sub_fe_o_if[4] ), - .i_sub_fe_if ( sub_fe_i_if[4] ), - .o_mul_fe12_if ( mul_fe12_o_if ), - .i_mul_fe12_if ( mul_fe12_i_if ), - .o_mul_fe_if ( mul_fe_o_if[1] ), - .i_mul_fe_if ( mul_fe_i_if[1] ) + .o_fe12_if ( o_fe12_if ), + .o_mul_fe2_if ( mul_fe2_o_if[1] ), + .i_mul_fe2_if ( mul_fe2_i_if[1] ), + .o_add_fe_if ( add_fe_o_if[4] ), + .i_add_fe_if ( add_fe_i_if[4] ), + .o_sub_fe_if ( sub_fe_o_if[4] ), + .i_sub_fe_if ( sub_fe_i_if[4] ), + .o_mul_fe12_if ( mul_fe12_o_if[0] ), + .i_mul_fe12_if ( mul_fe12_i_if[0] ), + .o_mul_fe_if ( mul_fe_o_if[1] ), + .i_mul_fe_if ( mul_fe_i_if[1] ), + .o_pow_fe12_if ( pow_fe12_o_if ), + .i_pow_fe12_if ( pow_fe12_i_if ), + .o_fmap_fe12_if ( fmap_fe12_o_if ), + .i_fmap_fe12_if ( fmap_fe12_i_if ), + .o_inv_fe12_if ( inv_fe12_o_if ), + .i_inv_fe12_if ( inv_fe12_i_if ) +); + +bls12_381_fe12_fmap_wrapper #( + .FE_TYPE ( FE_TYPE ), + .CTL_BITS ( CTL_BITS ), + .CTL_BIT_POW ( OVR_WRT_BIT + 17 ) +) +bls12_381_fe12_fmap_wrapper ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .o_fmap_fe12_if ( fmap_fe12_i_if ), + .i_fmap_fe12_if ( fmap_fe12_o_if ), + .o_mul_fe2_if ( mul_fe2_o_if[2] ), + .i_mul_fe2_if ( mul_fe2_i_if[2] ), + .o_mul_fe_if ( mul_fe_o_if[2] ), + .i_mul_fe_if ( mul_fe_i_if[2] ) +); + +bls12_381_fe12_inv_wrapper #( + .FE_TYPE ( FE_TYPE ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( OVR_WRT_BIT + 0 ) // Can overlap as we restore control on output when valid +) +bls12_381_fe12_inv_wrapper ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .o_inv_fe12_if ( inv_fe12_i_if ), + .i_inv_fe12_if ( inv_fe12_o_if ), + .o_mul_fe_if ( mul_fe_o_if[3] ), + .i_mul_fe_if ( mul_fe_i_if[3] ) +); + +ec_fe12_pow_s #( + .FE_TYPE ( FE_TYPE ), + .CTL_BIT_POW ( OVR_WRT_BIT + 17 ), + .POW_BITS ( $bits(bls12_381_pkg::ATE_X) ), + .SQ_BIT ( OVR_WRT_BIT + 16 ) +) +ec_fe12_pow_s ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .o_mul_fe12_if ( mul_fe12_o_if[1] ), + .i_mul_fe12_if ( mul_fe12_i_if[1] ), + .o_sub_fe_if ( sub_fe_o_if[5] ), + .i_sub_fe_if ( sub_fe_i_if[5] ), + .o_pow_fe12_if ( pow_fe12_i_if ), + .i_pow_fe12_if ( pow_fe12_o_if ) ); ec_fe2_mul_s #( @@ -98,14 +161,14 @@ ec_fe2_mul_s #( ec_fe2_mul_s ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .o_mul_fe2_if ( mul_fe2_i_if[2] ), - .i_mul_fe2_if ( mul_fe2_o_if[2] ), - .o_add_fe_if ( add_fe_o_if[0] ), - .i_add_fe_if ( add_fe_i_if[0] ), - .o_sub_fe_if ( sub_fe_o_if[0] ), - .i_sub_fe_if ( sub_fe_i_if[0] ), - .o_mul_fe_if ( mul_fe_o_if[0] ), - .i_mul_fe_if ( mul_fe_i_if[0] ) + .o_mul_fe2_if ( mul_fe2_i_if[3] ), + .i_mul_fe2_if ( mul_fe2_o_if[3] ), + .o_add_fe_if ( add_fe_o_if[0] ), + .i_add_fe_if ( add_fe_i_if[0] ), + .o_sub_fe_if ( sub_fe_o_if[0] ), + .i_sub_fe_if ( sub_fe_i_if[0] ), + .o_mul_fe_if ( mul_fe_o_if[0] ), + .i_mul_fe_if ( mul_fe_i_if[0] ) ); fe2_mul_by_nonresidue_s #( @@ -116,32 +179,31 @@ fe2_mul_by_nonresidue_s ( .i_rst ( i_rst ), .o_mnr_fe2_if ( mnr_fe2_i_if[2] ), .i_mnr_fe2_if ( mnr_fe2_o_if[2] ), - .o_add_fe_if ( add_fe_o_if[1] ), - .i_add_fe_if ( add_fe_i_if[1] ), - .o_sub_fe_if ( sub_fe_o_if[1] ), - .i_sub_fe_if ( sub_fe_i_if[1] ) + .o_add_fe_if ( add_fe_o_if[1] ), + .i_add_fe_if ( add_fe_i_if[1] ), + .o_sub_fe_if ( sub_fe_o_if[1] ), + .i_sub_fe_if ( sub_fe_i_if[1] ) ); ec_fe6_mul_s #( .FE_TYPE ( FE_TYPE ), .FE2_TYPE ( FE2_TYPE ), .FE6_TYPE ( FE6_TYPE ), - .CTL_BITS ( CTL_BITS ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 16 ) // 16 to 19 + .OVR_WRT_BIT ( OVR_WRT_BIT + 8 ) // 3 bits ) ec_fe6_mul_s ( .i_clk ( i_clk ), .i_rst ( i_rst ), .o_mul_fe2_if ( mul_fe2_o_if[0] ), .i_mul_fe2_if ( mul_fe2_i_if[0] ), - .o_add_fe_if ( add_fe_o_if[2] ), - .i_add_fe_if ( add_fe_i_if[2] ), - .o_sub_fe_if ( sub_fe_o_if[2] ), - .i_sub_fe_if ( sub_fe_i_if[2] ), + .o_add_fe_if ( add_fe_o_if[2] ), + .i_add_fe_if ( add_fe_i_if[2] ), + .o_sub_fe_if ( sub_fe_o_if[2] ), + .i_sub_fe_if ( sub_fe_i_if[2] ), .o_mnr_fe2_if ( mnr_fe2_o_if[0] ), .i_mnr_fe2_if ( mnr_fe2_i_if[0] ), - .o_mul_fe6_if ( mul_fe6_i_if ), - .i_mul_fe6_if ( mul_fe6_o_if ) + .o_mul_fe6_if ( mul_fe6_i_if ), + .i_mul_fe6_if ( mul_fe6_o_if ) ); fe6_mul_by_nonresidue_s #( @@ -152,28 +214,28 @@ fe6_mul_by_nonresidue_s ( .i_rst ( i_rst ), .o_mnr_fe2_if ( mnr_fe2_o_if[1] ), .i_mnr_fe2_if ( mnr_fe2_i_if[1] ), - .o_mnr_fe6_if ( mnr_fe6_i_if ), - .i_mnr_fe6_if ( mnr_fe6_o_if ) + .o_mnr_fe6_if ( mnr_fe6_i_if ), + .i_mnr_fe6_if ( mnr_fe6_o_if ) ); ec_fe12_mul_s #( .FE_TYPE ( FE_TYPE ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 20 ), // 20 to 23 - .SQ_BIT ( OVR_WRT_BIT + 2 ) + .OVR_WRT_BIT ( OVR_WRT_BIT + 0 ), // 3 bits + .SQ_BIT ( OVR_WRT_BIT + 16 ) ) ec_fe12_mul_s ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .o_mul_fe6_if ( mul_fe6_o_if ), - .i_mul_fe6_if ( mul_fe6_i_if ), - .o_add_fe_if ( add_fe_o_if[3] ), - .i_add_fe_if ( add_fe_i_if[3] ), - .o_sub_fe_if ( sub_fe_o_if[3] ), - .i_sub_fe_if ( sub_fe_i_if[3] ), - .o_mnr_fe6_if ( mnr_fe6_o_if ), - .i_mnr_fe6_if ( mnr_fe6_i_if ), - .o_mul_fe12_if ( mul_fe12_i_if ), - .i_mul_fe12_if ( mul_fe12_o_if ) + .o_mul_fe6_if ( mul_fe6_o_if ), + .i_mul_fe6_if ( mul_fe6_i_if ), + .o_add_fe_if ( add_fe_o_if[3] ), + .i_add_fe_if ( add_fe_i_if[3] ), + .o_sub_fe_if ( sub_fe_o_if[3] ), + .i_sub_fe_if ( sub_fe_i_if[3] ), + .o_mnr_fe6_if ( mnr_fe6_o_if ), + .i_mnr_fe6_if ( mnr_fe6_i_if ), + .o_mul_fe12_if ( mul_fe12_i_if[2] ), + .i_mul_fe12_if ( mul_fe12_o_if[2] ) ); adder_pipe # ( @@ -198,15 +260,15 @@ subtractor_pipe # ( subtractor_pipe ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_sub ( sub_fe_o_if[5] ), - .o_sub ( sub_fe_i_if[5] ) + .i_sub ( sub_fe_o_if[6] ), + .o_sub ( sub_fe_i_if[6] ) ); resource_share # ( .NUM_IN ( 5 ), .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 24 ), // 24 to 27 + .OVR_WRT_BIT ( OVR_WRT_BIT + 32 ), // 3 bits .PIPELINE_IN ( 1 ), .PIPELINE_OUT ( 1 ) ) @@ -220,61 +282,78 @@ resource_share_fe_add ( ); resource_share # ( - .NUM_IN ( 5 ), + .NUM_IN ( 6 ), .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 24 ), // 24 to 27 + .OVR_WRT_BIT ( OVR_WRT_BIT + 32 ), // 3 bits .PIPELINE_IN ( 1 ), .PIPELINE_OUT ( 1 ) ) resource_share_fe_sub ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_axi ( sub_fe_o_if[4:0] ), - .o_res ( sub_fe_o_if[5] ), - .i_res ( sub_fe_i_if[5] ), - .o_axi ( sub_fe_i_if[4:0] ) + .i_axi ( sub_fe_o_if[5:0] ), + .o_res ( sub_fe_o_if[6] ), + .i_res ( sub_fe_i_if[6] ), + .o_axi ( sub_fe_i_if[5:0] ) ); resource_share # ( - .NUM_IN ( 2 ), + .NUM_IN ( 4 ), .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 24 ), // 24 to 27 + .OVR_WRT_BIT ( OVR_WRT_BIT + 32 ), // 3 bits .PIPELINE_IN ( 1 ), .PIPELINE_OUT ( 1 ) ) resource_share_fe_mul ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_axi ( mul_fe_o_if[1:0] ), + .i_axi ( mul_fe_o_if[3:0] ), .o_res ( o_mul_fe_if ), .i_res ( i_mul_fe_if ), - .o_axi ( mul_fe_i_if[1:0] ) + .o_axi ( mul_fe_i_if[3:0] ) ); resource_share # ( - .NUM_IN ( 2 ), + .NUM_IN ( 3 ), .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 30 ), // 30 to 31 + .OVR_WRT_BIT ( OVR_WRT_BIT + 38 ), // 2 bits .PIPELINE_IN ( 1 ), .PIPELINE_OUT ( 1 ) ) resource_share_fe2_mul ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_axi ( mul_fe2_o_if[1:0] ), - .o_res ( mul_fe2_o_if[2] ), - .i_res ( mul_fe2_i_if[2] ), - .o_axi ( mul_fe2_i_if[1:0] ) + .i_axi ( mul_fe2_o_if[2:0] ), + .o_res ( mul_fe2_o_if[3] ), + .i_res ( mul_fe2_i_if[3] ), + .o_axi ( mul_fe2_i_if[2:0] ) ); resource_share # ( .NUM_IN ( 2 ), .DAT_BITS ( 2*$bits(FE_TYPE) ), .CTL_BITS ( CTL_BITS ), - .OVR_WRT_BIT ( OVR_WRT_BIT + 32 ), // 32 to 33 + .OVR_WRT_BIT ( OVR_WRT_BIT + 42 ), // 2 bits + .PIPELINE_IN ( 1 ), + .PIPELINE_OUT ( 1 ) +) +resource_share_fe12_mul ( + .i_clk ( i_clk ), + .i_rst ( i_rst ), + .i_axi ( mul_fe12_o_if[1:0] ), + .o_res ( mul_fe12_o_if[2] ), + .i_res ( mul_fe12_i_if[2] ), + .o_axi ( mul_fe12_i_if[1:0] ) +); + +resource_share # ( + .NUM_IN ( 2 ), + .DAT_BITS ( 2*$bits(FE_TYPE) ), + .CTL_BITS ( CTL_BITS ), + .OVR_WRT_BIT ( OVR_WRT_BIT + 46 ), // 2 bits .PIPELINE_IN ( 1 ), .PIPELINE_OUT ( 1 ) ) diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv index d04dad5..a7757c5 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv @@ -654,7 +654,6 @@ package bls12_381_pkg; t3 = fe2_add(t3, t3); // 10. [9] t6 = fe2_add(R.x, t4); // 11. [3] - t5 = fe2_mul(t4, t4); // 12. [3] R.x = fe2_sub(t5, t3); // 13. [12, 10] @@ -896,20 +895,25 @@ package bls12_381_pkg; endtask task print_jb_point(jb_point_t p); - $display("x:%h", p.x); - $display("y:%h", p.y); - $display("z:%h", p.z); + $display("x:0x%h", p.x); + $display("y:0x%h", p.y); + $display("z:0x%h", p.z); endtask task print_fp2_jb_point(fp2_jb_point_t p); - $display("x:(c1:%h, c0:%h)", p.x[1], p.x[0]); - $display("y:(c1:%h, c0:%h)", p.y[1], p.y[0]); - $display("z:(c1:%h, c0:%h)", p.z[1], p.z[0]); + $display("x:(c1:0x%h, c0:0x%h)", p.x[1], p.x[0]); + $display("y:(c1:0x%h, c0:0x%h)", p.y[1], p.y[0]); + $display("z:(c1:0x%h, c0:0x%h)", p.z[1], p.z[0]); endtask + + task print_af_point(af_point_t p); + $display("x:(0x%h)", p.x); + $display("y:(0x%h)", p.y); + endtask task print_fp2_af_point(fp2_af_point_t p); - $display("x:(c1:%h, c0:%h)", p.x[1], p.x[0]); - $display("y:(c1:%h, c0:%h)", p.y[1], p.y[0]); + $display("x:(c1:0x%h, c0:0x%h)", p.x[1], p.x[0]); + $display("y:(c1:0x%h, c0:0x%h)", p.y[1], p.y[0]); endtask endpackage \ No newline at end of file diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv index f0399b6..2902bea 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv @@ -78,10 +78,10 @@ localparam CTL_BITS = 70; // 67:32 Pairing engine - TODO conslidate the logic used here with the point multiplication if_axi_stream #(.DAT_BITS(2*$bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) mul_in_if [4:0] (i_clk) ; if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) mul_out_if [4:0](i_clk); -if_axi_stream #(.DAT_BITS(2*$bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) add_in_if [4:0] (i_clk); -if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) add_out_if [4:0] (i_clk); -if_axi_stream #(.DAT_BITS(2*$bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) sub_in_if [4:0] (i_clk); -if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) sub_out_if [4:0] (i_clk); +if_axi_stream #(.DAT_BITS(2*$bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) add_in_if [3:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) add_out_if [3:0] (i_clk); +if_axi_stream #(.DAT_BITS(2*$bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) sub_in_if [3:0] (i_clk); +if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) sub_out_if [3:0] (i_clk); if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t))) binv_i_if(i_clk); if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t))) binv_o_if(i_clk); @@ -356,11 +356,7 @@ bls12_381_pairing_wrapper ( .i_g2_af ( pair_i_g2 ), .o_fe12_if ( pair_o_res_if ), .o_mul_fe_if ( mul_in_if[3] ), - .i_mul_fe_if ( mul_out_if[3] ), - .o_add_fe_if ( add_in_if[3] ), - .i_add_fe_if ( add_out_if[3] ), - .o_sub_fe_if ( sub_in_if[3] ), - .i_sub_fe_if ( sub_out_if[3] ) + .i_mul_fe_if ( mul_out_if[3] ) ); resource_share # ( @@ -381,7 +377,7 @@ resource_share_mul ( ); resource_share # ( - .NUM_IN ( 4 ), + .NUM_IN ( 3 ), .DAT_BITS ( 2*$bits(bls12_381_pkg::fe_t) ), .CTL_BITS ( CTL_BITS ), .OVR_WRT_BIT ( 24 ), @@ -391,14 +387,14 @@ resource_share # ( resource_share_sub ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_axi ( sub_in_if[3:0] ), - .o_res ( sub_in_if[4] ), - .i_res ( sub_out_if[4] ), - .o_axi ( sub_out_if[3:0] ) + .i_axi ( sub_in_if[2:0] ), + .o_res ( sub_in_if[3] ), + .i_res ( sub_out_if[3] ), + .o_axi ( sub_out_if[2:0] ) ); resource_share # ( - .NUM_IN ( 4 ), + .NUM_IN ( 3 ), .DAT_BITS ( 2*$bits(bls12_381_pkg::fe_t) ), .CTL_BITS ( CTL_BITS ), .OVR_WRT_BIT ( 24 ), @@ -408,10 +404,10 @@ resource_share # ( resource_share_add ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_axi ( add_in_if[3:0] ), - .o_res ( add_in_if[4] ), - .i_res ( add_out_if[4] ), - .o_axi ( add_out_if[3:0] ) + .i_axi ( add_in_if[2:0] ), + .o_res ( add_in_if[3] ), + .i_res ( add_out_if[3] ), + .o_axi ( add_out_if[2:0] ) ); ec_fp_mult_mod #( diff --git a/zcash_fpga/src/tb/bls12_381_pairing_tb.sv b/zcash_fpga/src/tb/bls12_381_pairing_tb.sv index 59051a1..53778b5 100644 --- a/zcash_fpga/src/tb/bls12_381_pairing_tb.sv +++ b/zcash_fpga/src/tb/bls12_381_pairing_tb.sv @@ -30,7 +30,7 @@ parameter P = bls12_381_pkg::P; af_point_t G1 = {Gy, Gx}; fp2_af_point_t G2 = {G2y, G2x}; -localparam CTL_BITS = 48; +localparam CTL_BITS = 84; localparam CLK_PERIOD = 100; @@ -51,10 +51,6 @@ if_axi_stream #(.DAT_BYTS(($bits(FE_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) out_if(clk if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if(clk); if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if(clk); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if (clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if (clk); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if (clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if (clk); ec_fp_mult_mod #( .P ( P ), @@ -68,32 +64,6 @@ ec_fp_mult_mod ( .o_mul ( mul_fe_i_if ) ); -adder_pipe # ( - .BITS ( bls12_381_pkg::DAT_BITS ), - .P ( P ), - .CTL_BITS ( CTL_BITS ), - .LEVEL ( 2 ) -) -adder_pipe ( - .i_clk ( clk ), - .i_rst ( rst ), - .i_add ( add_fe_o_if ), - .o_add ( add_fe_i_if ) -); - -subtractor_pipe # ( - .BITS ( bls12_381_pkg::DAT_BITS ), - .P ( P ), - .CTL_BITS ( CTL_BITS ), - .LEVEL ( 2 ) -) -subtractor_pipe ( - .i_clk ( clk ), - .i_rst ( rst ), - .i_sub ( sub_fe_o_if ), - .o_sub ( sub_fe_i_if ) -); - bls12_381_pairing_wrapper #( .CTL_BITS ( CTL_BITS ), .OVR_WRT_BIT ( 0 ) @@ -107,14 +77,9 @@ bls12_381_pairing_wrapper ( .i_g2_af ( in_if.dat[$bits(af_point_t) +: $bits(fp2_af_point_t)] ), .o_fe12_if ( out_if ), .o_mul_fe_if ( mul_fe_o_if ), - .i_mul_fe_if ( mul_fe_i_if ), - .o_add_fe_if ( add_fe_o_if ), - .i_add_fe_if ( add_fe_i_if ), - .o_sub_fe_if ( sub_fe_o_if ), - .i_sub_fe_if ( sub_fe_i_if ) + .i_mul_fe_if ( mul_fe_i_if ) ); - // This just tests our software model vs a known good result task test0(); af_point_t P; @@ -144,7 +109,6 @@ task test0(); endtask - task test1(input af_point_t G1_p, fp2_af_point_t G2_p); begin integer signed get_len; @@ -153,7 +117,7 @@ begin FE12_TYPE f_out, f_exp; $display("Running test1 ..."); - miller_loop(G1_p, G2_p, f_exp); + ate_pairing(G1_p, G2_p, f_exp); start_time = $time; fork @@ -182,14 +146,68 @@ begin end endtask; +task test_linear(); +begin + integer signed get_len; + logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat; + integer start_time, finish_time, n; + FE12_TYPE f_out, f_exp0, f_exp1; + af_point_t G1_a, G1_a_n; + fp2_af_point_t G1_j, G2_a_n; + fp2_af_point_t G2_a; + fp2_jb_point_t G2_j; + + $display("Running test_linear ..."); + + G1_a = {Gy, Gx}; + G2_a = {G2y, G2x}; + G1_j = {381'd1, Gy, Gx}; + G2_j = {381'd1, G2y, G2x}; + n = 2; + G1_a_n = to_affine(point_mult(n, G1_j)); + G2_a_n = fp2_to_affine(fp2_point_mult(n, G2_j)); + + ate_pairing(G1_a, G2_a_n, f_exp0); + ate_pairing(G1_a_n, G2_a, f_exp1); + + assert(f_exp0 == f_exp1) else $fatal(1, "Error in test_linear with sw model"); + + start_time = $time; + fork + in_if.put_stream({G2_a, G1_a_n}, (($bits(af_point_t) + $bits(fp2_af_point_t))+7)/8); + out_if.get_stream(get_dat, get_len); + join + finish_time = $time; + + for (int i = 0; i < 2; i++) + for (int j = 0; j < 3; j++) + for (int k = 0; k < 2; k++) + f_out[i][j][k] = get_dat[(i*6+j*2+k)*384 +: $bits(FE_TYPE)]; + + $display("Expected:"); + print_fe12(f_exp1); + $display("Was:"); + print_fe12(f_out); + + $display("test_linear finished in %d clocks", (finish_time-start_time)/(CLK_PERIOD)); + + if (f_exp1 != f_out) begin + $fatal(1, "%m %t ERROR: output was wrong", $time); + end + + $display("test_linear PASSED"); +end +endtask; + initial begin in_if.reset_source(); out_if.rdy = 0; #100ns; test0(); // Test SW model - test1(G1, G2); - + test1(G1, G2); // Pairing of generators + test_linear(); // test linear properties e(n*G1,G2) == e(G1, n*G2), ... + #1us $finish(); end