diff --git a/ip_cores/ec/src/rtl/ec_fp2_point_add.sv b/ip_cores/ec/src/rtl/ec_fp2_point_add.sv index 558c460..b563194 100644 --- a/ip_cores/ec/src/rtl/ec_fp2_point_add.sv +++ b/ip_cores/ec/src/rtl/ec_fp2_point_add.sv @@ -49,236 +49,10 @@ module ec_fp2_point_add if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) mul_if_fe2_i(i_clk); if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) mul_if_fe2_o(i_clk); - -localparam ADD_CTL_BIT = 8; if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) add_if_fe2_i(i_clk); if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) add_if_fe2_o(i_clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_i [2] (i_clk); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [2] (i_clk); - if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) sub_if_fe2_i(i_clk); if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) sub_if_fe2_o(i_clk); -if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_i [2] (i_clk); -if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_o [2] (i_clk); - - - - -// Point addtions are simple additions on each of the Fp elements -enum {ADD0, ADD1} add_state; -always_comb begin - add_if_fe2_i.rdy = add_state == ADD1 && (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)); - add_if_fe_i[0].rdy = ~add_if_fe2_o.val || (add_if_fe2_o.val && add_if_fe2_o.rdy); -end - -always_ff @ (posedge i_clk) begin - if (i_rst) begin - add_if_fe2_o.reset_source(); - add_state <= ADD0; - add_if_fe_o[0].reset_source(); - end else begin - - if (add_if_fe_o[0].val && add_if_fe_o[0].rdy) add_if_fe_o[0].val <= 0; - if (add_if_fe2_o.val && add_if_fe2_o.rdy) add_if_fe2_o.val <= 0; - - // One process to parse inputs and send them to the adder - case(add_state) - ADD0: begin - if (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)) begin - add_if_fe_o[0].copy_if({add_if_fe2_i.dat[0 +: $bits(FE_TYPE)], - add_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]}, - add_if_fe2_i.val, 1, 1, add_if_fe2_i.err, add_if_fe2_i.mod, add_if_fe2_i.ctl); - add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0; - if (add_if_fe2_i.val) add_state <= ADD1; - end - end - ADD1: begin - if (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)) begin - add_if_fe_o[0].copy_if({add_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], - add_if_fe2_i.dat[$bits(FE2_TYPE)+$bits(FE_TYPE) +: $bits(FE_TYPE)]}, - add_if_fe2_i.val, 1, 1, add_if_fe2_i.err, add_if_fe2_i.mod, add_if_fe2_i.ctl); - add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1; - if (add_if_fe2_i.val) add_state <= ADD0; - end - end - endcase - - // One process to assign outputs - if (~add_if_fe2_o.val || (add_if_fe2_o.val && add_if_fe2_o.rdy)) begin - add_if_fe2_o.ctl <= add_if_fe_i[0].ctl; - if (add_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin - if (add_if_fe_i[0].val) - add_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat; - end else begin - add_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat; - add_if_fe2_o.val <= add_if_fe_i[0].val; - end - end - end -end - -// Point subtractions are simple subtractions on each of the Fp elements -enum {SUB0, SUB1} sub_state; -always_comb begin - sub_if_fe2_i.rdy = sub_state == ADD1 && (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)); - sub_if_fe_i[0].rdy = ~sub_if_fe2_o.val || (sub_if_fe2_o.val && sub_if_fe2_o.rdy); -end - -always_ff @ (posedge i_clk) begin - if (i_rst) begin - sub_if_fe2_o.reset_source(); - sub_state <= SUB0; - sub_if_fe_o[0].reset_source(); - end else begin - - if (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy) sub_if_fe_o[0].val <= 0; - if (sub_if_fe2_o.val && sub_if_fe2_o.rdy) sub_if_fe2_o.val <= 0; - - // One process to parse inputs and send them to the subtractor - case(sub_state) - SUB0: begin - if (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)) begin - sub_if_fe_o[0].copy_if({sub_if_fe2_i.dat[0 +: $bits(FE_TYPE)], - sub_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]}, - sub_if_fe2_i.val, 1, 1, sub_if_fe2_i.err, sub_if_fe2_i.mod, sub_if_fe2_i.ctl); - sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0; - if (sub_if_fe2_i.val) sub_state <= SUB1; - end - end - SUB1: begin - if (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)) begin - sub_if_fe_o[0].copy_if({sub_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], - sub_if_fe2_i.dat[$bits(FE_TYPE) + $bits(FE2_TYPE) +: $bits(FE_TYPE)]}, - sub_if_fe2_i.val, 1, 1, sub_if_fe2_i.err, sub_if_fe2_i.mod, sub_if_fe2_i.ctl); - sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1; - if (sub_if_fe2_i.val) sub_state <= SUB0; - end - end - endcase - - // One process to assign outputs - if (~sub_if_fe2_o.val || (sub_if_fe2_o.val && sub_if_fe2_o.rdy)) begin - sub_if_fe2_o.ctl <= sub_if_fe_i[0].ctl; - if (sub_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin - if (sub_if_fe_i[0].val) - sub_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat; - end else begin - sub_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat; - sub_if_fe2_o.val <= sub_if_fe_i[0].val; - end - end - end -end - -// Multiplications are calculated as (a + bi)x(a' +b'i) = (aa' - bb') + (ab' + a'b)i -// First 4 multiplications are issued, then 1 add and 1 subtraction (so we need arbitrator) -enum {MUL0, MUL1, MUL2, MUL3} mul_state; -logic [1:0] add_sub_val; -always_comb begin - mul_if_fe2_i.rdy = mul_state == MUL3 && (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy)); - i_mul_if.rdy = (mul_state == MUL1 || mul_state == MUL2) ? (~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy)) : - (~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy)); - - // TODO check - - mul_if_fe2_o.val = &add_sub_val; - sub_if_fe_i[1].rdy = ~add_sub_val[1] || (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy)); - add_if_fe_i[1].rdy = ~add_sub_val[0] || (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy)); -end - -always_ff @ (posedge i_clk) begin - if (i_rst) begin - add_sub_val <= 0; - mul_if_fe2_o.sop <= 0; - mul_if_fe2_o.eop <= 0; - mul_if_fe2_o.ctl <= 0; - mul_if_fe2_o.dat <= 0; - mul_if_fe2_o.mod <= 0; - mul_state <= MUL0; - o_mul_if.reset_source(); - sub_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0); - add_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0); - end else begin - - if (mul_if_fe2_o.val && mul_if_fe2_o.rdy) begin - add_sub_val <= 0; - end - if (o_mul_if.val && o_mul_if.rdy) o_mul_if.val <= 0; - if (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy) sub_if_fe_o[1].val <= 0; - if (add_if_fe_o[1].val && add_if_fe_o[1].rdy) add_if_fe_o[1].val <= 0; - - // One process to parse inputs and send them to the multiplier - if (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy)) begin - case (mul_state) - MUL0: begin - o_mul_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)], - mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]}, - mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl); - o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 0; - if (mul_if_fe2_i.val) mul_state <= MUL1; - end - MUL1: begin - o_mul_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], - mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]}, - mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl); - o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 1; - if (mul_if_fe2_i.val) mul_state <= MUL2; - end - MUL2: begin - o_mul_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)], - mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]}, - mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl); - o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 2; - if (mul_if_fe2_i.val) mul_state <= MUL3; - end - MUL3: begin - o_mul_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)], - mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]}, - mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl); - o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 3; - if (mul_if_fe2_i.val) mul_state <= MUL0; - end - endcase - end - - // Process multiplications and do subtraction - if (~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy)) begin - if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 0) begin - if (i_mul_if.val) sub_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_if.dat; - end - if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 1) begin - sub_if_fe_o[1].val <= i_mul_if.val; - sub_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_if.dat; - end - sub_if_fe_o[1].ctl <= i_mul_if.ctl; - end - - // Process multiplications and do addition - if (~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy)) begin - if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 2) begin - if (i_mul_if.val) add_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_if.dat; - end - if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 3) begin - add_if_fe_o[1].val <= i_mul_if.val; - add_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_if.dat; - end - add_if_fe_o[1].ctl <= i_mul_if.ctl; - end - - // One process to assign output - if (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy)) begin - mul_if_fe2_o.ctl <= add_if_fe_i[1].ctl; - if (~add_sub_val[0]) begin - mul_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[1].dat; - add_sub_val[0] <= add_if_fe_i[1].val; - end - if (~add_sub_val[1]) begin - mul_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[1].dat; - add_sub_val[1] <= sub_if_fe_i[1].val; - end - end - end -end ec_point_add #( .FP_TYPE ( FP2_TYPE ), @@ -304,34 +78,25 @@ ec_point_add ( .i_sub_if ( sub_if_fe2_o ) ); -resource_share # ( - .NUM_IN ( 2 ), - .OVR_WRT_BIT ( 10 ), - .PIPELINE_IN ( 0 ), - .PIPELINE_OUT ( 0 ) +ec_fe2_arithmetic +#( + .FE_TYPE ( FE_TYPE ), + .FE2_TYPE ( FE2_TYPE ) ) -resource_share_sub ( +ec_fe2_arithmetic ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_axi ( sub_if_fe_o ), - .o_res ( o_sub_if ), - .i_res ( i_sub_if ), - .o_axi ( sub_if_fe_i ) + .o_mul_fe_if ( o_mul_if ), + .i_mul_fe_if ( i_mul_if ), + .o_add_fe_if ( o_add_if ), + .i_add_fe_if ( i_add_if ), + .o_sub_fe_if ( o_sub_if ), + .i_sub_fe_if ( i_sub_if ), + .o_mul_fe2_if ( mul_if_fe2_o ), + .i_mul_fe2_if ( mul_if_fe2_i ), + .o_add_fe2_if ( add_if_fe2_o ), + .i_add_fe2_if ( add_if_fe2_i ), + .o_sub_fe2_if ( sub_if_fe2_o ), + .i_sub_fe2_if ( sub_if_fe2_i ) ); - -resource_share # ( - .NUM_IN ( 2 ), - .OVR_WRT_BIT ( 10 ), - .PIPELINE_IN ( 0 ), - .PIPELINE_OUT ( 0 ) -) -resource_share_add ( - .i_clk ( i_clk ), - .i_rst ( i_rst ), - .i_axi ( add_if_fe_o ), - .o_res ( o_add_if ), - .i_res ( i_add_if ), - .o_axi ( add_if_fe_i ) -); - endmodule \ No newline at end of file diff --git a/ip_cores/ec/src/tb/ec_fp2_point_add_tb.sv b/ip_cores/ec/src/tb/ec_fp2_point_add_tb.sv index 519f977..b107b7d 100644 --- a/ip_cores/ec/src/tb/ec_fp2_point_add_tb.sv +++ b/ip_cores/ec/src/tb/ec_fp2_point_add_tb.sv @@ -184,12 +184,14 @@ begin p_out = get_dat; - $display("Expected:"); - print_fp2_jb_point(p_exp); $display("Was:"); print_fp2_jb_point(p_out); if (p_exp != p_out) begin + $display("Expected (affine):"); + print_fp2_jb_point(fp2_to_affine(p_exp)); + $display("Was (affine):"); + print_fp2_jb_point(fp2_to_affine(p_out)); $fatal(1, "%m %t ERROR: test_0 point was wrong", $time); end @@ -198,28 +200,12 @@ begin end endtask; -fp2_jb_point_t one_point = '{x:FE2_one, y:FE2_one, z:FE2_one}; -fp2_jb_point_t two_point = '{x:'{c1:381'd2, c0:381'd2}, y:'{c1:381'd2, c0:381'd2}, z:FE2_one}; - -fp2_jb_point_t g2_point_dbl = '{x:'{c0:381'd2004569552561385659566932407633616698939912674197491321901037400001042336021538860336682240104624979660689237563240, - c1:381'd3955604752108186662342584665293438104124851975447411601471797343177761394177049673802376047736772242152530202962941}, - y:'{c0:381'd978142457653236052983988388396292566217089069272380812666116929298652861694202207333864830606577192738105844024927, - c1:381'd2248711152455689790114026331322133133284196260289964969465268080325775757898907753181154992709229860715480504777099}, - z:'{c0:381'd3145673658656250241340817105688138628074744674635286712244193301767486380727788868972774468795689607869551989918920, - c1:381'd968254395890002185853925600926112283510369004782031018144050081533668188797348331621250985545304947843412000516197}}; - initial begin out_if.rdy = 0; in_if.val = 0; #(40*CLK_PERIOD); - test(g2_point, g2_point_dbl, add_fp2_jb_point(g2_point, g2_point_dbl) - /*'{x:'{c0:381'd2260316515795278483227354417550273673937385151660885802822200676798473320332386191812885909324314180009401590033496, - c1:381'd3157705674295752746643045744187038651144673626385096899515739718638356953289853357506730468806346866010850469607484}, - y:'{c0:381'd3116406908094559010983016654096953279342014296159903648784769141704444407188785914041577477129027384530629024324101, - c1:381'd624739198846365065958511422206549337298084868949577950118937104460230094422413163466712508875838914229203179007739}, - z:'{c0:381'd1372365362697527824661960056804989242334959973433633343888520294361286317391588271032081626721722944066233963018813, - c1:381'd135340553306575460225879133388402231094623862625345515492709522456301372944095308361691014711792956665222682354141}}*/); + test(g2_point, dbl_fp2_jb_point(g2_point), add_fp2_jb_point(g2_point, dbl_fp2_jb_point(g2_point))); #1us $finish(); end diff --git a/ip_cores/ec/src/tb/ec_point_add_tb.sv b/ip_cores/ec/src/tb/ec_point_add_tb.sv index dabcd76..0b1aa17 100644 --- a/ip_cores/ec/src/tb/ec_point_add_tb.sv +++ b/ip_cores/ec/src/tb/ec_point_add_tb.sv @@ -82,8 +82,8 @@ ec_point_add ( .o_err ( out_if.err ), .i_rdy ( out_if.rdy ), .o_val ( out_if.val ) , - .o_mult_if ( mult_in_if ), - .i_mult_if ( mult_out_if ), + .o_mul_if ( mult_in_if ), + .i_mul_if ( mult_out_if ), .o_add_if ( add_in_if ), .i_add_if ( add_out_if ), .o_sub_if ( sub_in_if ), diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv index 538c643..8b3dc2b 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv @@ -99,7 +99,7 @@ package bls12_381_pkg; endfunction function jb_point_t add_jb_point(jb_point_t p1, p2); - logic signed [1023:0] A, U1, U2, S1, S2, H, H3, R; + fe_t A, U1, U2, S1, S2, H, H3, R; if (p1.z == 0) return p2; if (p2.z == 0) return p1; @@ -107,33 +107,33 @@ package bls12_381_pkg; if (p1.y == p2.y && p1.x == p2.x) return (dbl_jb_point(p1)); - U1 = (p1.x*p2.z) % P; - U1 = (U1*p2.z) % P; + U1 = fe_mul(p1.x, p2.z); + U1 = fe_mul(U1, p2.z); - U2 = (p2.x*p1.z) % P; - U2 = (U2 *p1.z) % P; - S1 = p1.y *p2.z % P; - S1 = (S1*p2.z % P) *p2.z % P; - S2 = p2.y * p1.z % P; - S2 = (S2*p1.z % P) *p1.z % P; + U2 = fe_mul(p2.x, p1.z); + U2 = fe_mul(U2, p1.z); + S1 = fe_mul(p1.y, p2.z); + S1 = fe_mul(fe_mul(S1, p2.z), p2.z); + S2 = fe_mul(p2.y, p1.z); + S2 = fe_mul(fe_mul(S2, p1.z), p1.z); - H = U2 + (U1 > U2 ? P : 0) -U1; - R = S2 + (S1 > S2 ? P : 0) -S1; - H3 = ((H * H %P ) * H ) % P; - A = (((2*U1 % P) *H % P) * H % P); + H = fe_sub(U2, U1); + R = fe_sub(S2, S1); + H3 = fe_mul(fe_mul(H, H), H); + A = fe_mul(fe_mul(fe_mul(2, U1), H), H); - add_jb_point.z = ((H * p1.z % P) * p2.z) % P; - add_jb_point.x = R*R % P; + add_jb_point.z = fe_mul(fe_mul(H, p1.z), p2.z); + add_jb_point.x = fe_mul(R, R); - add_jb_point.x = add_jb_point.x + (H3 > add_jb_point.x ? P : 0) - H3; - add_jb_point.x = add_jb_point.x + (A > add_jb_point.x ? P : 0) - A; + add_jb_point.x = fe_sub(add_jb_point.x, H3); + add_jb_point.x = fe_sub(add_jb_point.x, A); - A = (U1*H % P) * H % P; - A = A + (add_jb_point.x > A ? P : 0) - add_jb_point.x; - A = A*R % P; - add_jb_point.y = S1*H3 % P; + A = fe_mul(fe_mul(U1, H), H); + A = fe_sub(A, add_jb_point.x); + A = fe_mul(A, R); + add_jb_point.y = fe_mul(S1, H3); - add_jb_point.y = A + (add_jb_point.y > A ? P : 0) - add_jb_point.y; + add_jb_point.y = fe_sub(A, add_jb_point.y); endfunction @@ -223,39 +223,43 @@ package bls12_381_pkg; endfunction function fp2_jb_point_t add_fp2_jb_point(fp2_jb_point_t p1, p2); - fe2_t A, U1, U2, S1, S2, H, H3, R; + fe2_t A, U1, U2, S1, S2, H, H3, R; + + if (p1.z == 0) return p2; + if (p2.z == 0) return p1; + + if (p1.y == p2.y && p1.x == p2.x) + return (dbl_fp2_jb_point(p1)); + + U1 = fe2_mul(p1.x, p2.z); + U1 = fe2_mul(U1, p2.z); + + U2 = fe2_mul(p2.x, p1.z); + U2 = fe2_mul(U2, p1.z); + S1 = fe2_mul(p1.y, p2.z); + S1 = fe2_mul(fe2_mul(S1, p2.z), p2.z); + S2 = fe2_mul(p2.y, p1.z); + S2 = fe2_mul(fe2_mul(S2, p1.z), p1.z); + + H = fe2_sub(U2, U1); + R = fe2_sub(S2, S1); + H3 = fe2_mul(fe2_mul(H, H), H); + A = fe2_mul(fe2_mul(fe2_mul(2, U1), H), H); + + add_fp2_jb_point.z = fe2_mul(fe2_mul(H, p1.z), p2.z); + add_fp2_jb_point.x = fe2_mul(R, R); + + add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, H3); + add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, A); + + A = fe2_mul(fe2_mul(U1, H), H); + A = fe2_sub(A, add_fp2_jb_point.x); + A = fe2_mul(A, R); + add_fp2_jb_point.y = fe2_mul(S1, H3); + + add_fp2_jb_point.y = fe2_sub(A, add_fp2_jb_point.y); - if (p1.y == p2.y && p1.x == p2.x) - return (dbl_fp2_jb_point(p1)); - - U1 = fe2_mul(p1.x, p2.z); - U1 = fe2_mul(U1, p2.z); - - U2 = fe2_mul(p2.x, p1.z); - U2 = fe2_mul(U2, p1.z); - S1 = fe2_mul(p1.y, p2.z); - S1 = fe2_mul(fe2_mul(S1, p2.z), p2.z); - S2 = fe2_mul(p2.y, p1.z); - S2 = fe2_mul(fe2_mul(S2, p1.z), p1.z); - H = fe2_sub(U2, U1); - R = fe2_sub(S2, S1); - H3 = fe2_mul(fe2_mul(H, H), H); - A = fe2_mul(fe2_mul(fe2_mul(2, U1), H), H); - - add_fp2_jb_point.z = fe2_mul(fe2_mul(H, p1.z), p2.z); - add_fp2_jb_point.x = fe2_mul(R, R); - - add_fp2_jb_point.x = fe2_add(add_fp2_jb_point.x, H3); - add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, A); - - A = fe2_mul(fe2_mul(U1, H), H); - A = fe2_sub(A, add_fp2_jb_point.x); - A = fe2_mul(A, R); - add_fp2_jb_point.y = fe2_mul(S1, H3); - - add_fp2_jb_point.y = fe2_sub(A, add_fp2_jb_point.y); - - endfunction + endfunction function jb_point_t point_mult(logic [DAT_BITS-1:0] c, jb_point_t p); jb_point_t result, addend;