Fp2 point addition

This commit is contained in:
bsdevlin 2019-06-12 18:46:05 +08:00
parent 18115bc7eb
commit c3e527b87e
4 changed files with 82 additions and 327 deletions

View File

@ -49,236 +49,10 @@ module ec_fp2_point_add
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) mul_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) mul_if_fe2_o(i_clk);
localparam ADD_CTL_BIT = 8;
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) add_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) add_if_fe2_o(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) sub_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) sub_if_fe2_o(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_o [2] (i_clk);
// Point addtions are simple additions on each of the Fp elements
enum {ADD0, ADD1} add_state;
always_comb begin
add_if_fe2_i.rdy = add_state == ADD1 && (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy));
add_if_fe_i[0].rdy = ~add_if_fe2_o.val || (add_if_fe2_o.val && add_if_fe2_o.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
add_if_fe2_o.reset_source();
add_state <= ADD0;
add_if_fe_o[0].reset_source();
end else begin
if (add_if_fe_o[0].val && add_if_fe_o[0].rdy) add_if_fe_o[0].val <= 0;
if (add_if_fe2_o.val && add_if_fe2_o.rdy) add_if_fe2_o.val <= 0;
// One process to parse inputs and send them to the adder
case(add_state)
ADD0: begin
if (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)) begin
add_if_fe_o[0].copy_if({add_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
add_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
add_if_fe2_i.val, 1, 1, add_if_fe2_i.err, add_if_fe2_i.mod, add_if_fe2_i.ctl);
add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0;
if (add_if_fe2_i.val) add_state <= ADD1;
end
end
ADD1: begin
if (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)) begin
add_if_fe_o[0].copy_if({add_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
add_if_fe2_i.dat[$bits(FE2_TYPE)+$bits(FE_TYPE) +: $bits(FE_TYPE)]},
add_if_fe2_i.val, 1, 1, add_if_fe2_i.err, add_if_fe2_i.mod, add_if_fe2_i.ctl);
add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1;
if (add_if_fe2_i.val) add_state <= ADD0;
end
end
endcase
// One process to assign outputs
if (~add_if_fe2_o.val || (add_if_fe2_o.val && add_if_fe2_o.rdy)) begin
add_if_fe2_o.ctl <= add_if_fe_i[0].ctl;
if (add_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin
if (add_if_fe_i[0].val)
add_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
end else begin
add_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
add_if_fe2_o.val <= add_if_fe_i[0].val;
end
end
end
end
// Point subtractions are simple subtractions on each of the Fp elements
enum {SUB0, SUB1} sub_state;
always_comb begin
sub_if_fe2_i.rdy = sub_state == ADD1 && (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy));
sub_if_fe_i[0].rdy = ~sub_if_fe2_o.val || (sub_if_fe2_o.val && sub_if_fe2_o.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
sub_if_fe2_o.reset_source();
sub_state <= SUB0;
sub_if_fe_o[0].reset_source();
end else begin
if (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy) sub_if_fe_o[0].val <= 0;
if (sub_if_fe2_o.val && sub_if_fe2_o.rdy) sub_if_fe2_o.val <= 0;
// One process to parse inputs and send them to the subtractor
case(sub_state)
SUB0: begin
if (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)) begin
sub_if_fe_o[0].copy_if({sub_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
sub_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
sub_if_fe2_i.val, 1, 1, sub_if_fe2_i.err, sub_if_fe2_i.mod, sub_if_fe2_i.ctl);
sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0;
if (sub_if_fe2_i.val) sub_state <= SUB1;
end
end
SUB1: begin
if (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)) begin
sub_if_fe_o[0].copy_if({sub_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
sub_if_fe2_i.dat[$bits(FE_TYPE) + $bits(FE2_TYPE) +: $bits(FE_TYPE)]},
sub_if_fe2_i.val, 1, 1, sub_if_fe2_i.err, sub_if_fe2_i.mod, sub_if_fe2_i.ctl);
sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1;
if (sub_if_fe2_i.val) sub_state <= SUB0;
end
end
endcase
// One process to assign outputs
if (~sub_if_fe2_o.val || (sub_if_fe2_o.val && sub_if_fe2_o.rdy)) begin
sub_if_fe2_o.ctl <= sub_if_fe_i[0].ctl;
if (sub_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin
if (sub_if_fe_i[0].val)
sub_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
end else begin
sub_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
sub_if_fe2_o.val <= sub_if_fe_i[0].val;
end
end
end
end
// Multiplications are calculated as (a + bi)x(a' +b'i) = (aa' - bb') + (ab' + a'b)i
// First 4 multiplications are issued, then 1 add and 1 subtraction (so we need arbitrator)
enum {MUL0, MUL1, MUL2, MUL3} mul_state;
logic [1:0] add_sub_val;
always_comb begin
mul_if_fe2_i.rdy = mul_state == MUL3 && (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy));
i_mul_if.rdy = (mul_state == MUL1 || mul_state == MUL2) ? (~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy)) :
(~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy));
// TODO check
mul_if_fe2_o.val = &add_sub_val;
sub_if_fe_i[1].rdy = ~add_sub_val[1] || (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy));
add_if_fe_i[1].rdy = ~add_sub_val[0] || (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy));
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
add_sub_val <= 0;
mul_if_fe2_o.sop <= 0;
mul_if_fe2_o.eop <= 0;
mul_if_fe2_o.ctl <= 0;
mul_if_fe2_o.dat <= 0;
mul_if_fe2_o.mod <= 0;
mul_state <= MUL0;
o_mul_if.reset_source();
sub_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
add_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
end else begin
if (mul_if_fe2_o.val && mul_if_fe2_o.rdy) begin
add_sub_val <= 0;
end
if (o_mul_if.val && o_mul_if.rdy) o_mul_if.val <= 0;
if (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy) sub_if_fe_o[1].val <= 0;
if (add_if_fe_o[1].val && add_if_fe_o[1].rdy) add_if_fe_o[1].val <= 0;
// One process to parse inputs and send them to the multiplier
if (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy)) begin
case (mul_state)
MUL0: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 0;
if (mul_if_fe2_i.val) mul_state <= MUL1;
end
MUL1: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 1;
if (mul_if_fe2_i.val) mul_state <= MUL2;
end
MUL2: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 2;
if (mul_if_fe2_i.val) mul_state <= MUL3;
end
MUL3: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 3;
if (mul_if_fe2_i.val) mul_state <= MUL0;
end
endcase
end
// Process multiplications and do subtraction
if (~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy)) begin
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 0) begin
if (i_mul_if.val) sub_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 1) begin
sub_if_fe_o[1].val <= i_mul_if.val;
sub_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
sub_if_fe_o[1].ctl <= i_mul_if.ctl;
end
// Process multiplications and do addition
if (~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy)) begin
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 2) begin
if (i_mul_if.val) add_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 3) begin
add_if_fe_o[1].val <= i_mul_if.val;
add_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
add_if_fe_o[1].ctl <= i_mul_if.ctl;
end
// One process to assign output
if (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy)) begin
mul_if_fe2_o.ctl <= add_if_fe_i[1].ctl;
if (~add_sub_val[0]) begin
mul_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[1].dat;
add_sub_val[0] <= add_if_fe_i[1].val;
end
if (~add_sub_val[1]) begin
mul_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[1].dat;
add_sub_val[1] <= sub_if_fe_i[1].val;
end
end
end
end
ec_point_add #(
.FP_TYPE ( FP2_TYPE ),
@ -304,34 +78,25 @@ ec_point_add (
.i_sub_if ( sub_if_fe2_o )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 10 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
ec_fe2_arithmetic
#(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE )
)
resource_share_sub (
ec_fe2_arithmetic (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( sub_if_fe_o ),
.o_res ( o_sub_if ),
.i_res ( i_sub_if ),
.o_axi ( sub_if_fe_i )
.o_mul_fe_if ( o_mul_if ),
.i_mul_fe_if ( i_mul_if ),
.o_add_fe_if ( o_add_if ),
.i_add_fe_if ( i_add_if ),
.o_sub_fe_if ( o_sub_if ),
.i_sub_fe_if ( i_sub_if ),
.o_mul_fe2_if ( mul_if_fe2_o ),
.i_mul_fe2_if ( mul_if_fe2_i ),
.o_add_fe2_if ( add_if_fe2_o ),
.i_add_fe2_if ( add_if_fe2_i ),
.o_sub_fe2_if ( sub_if_fe2_o ),
.i_sub_fe2_if ( sub_if_fe2_i )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 10 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( add_if_fe_o ),
.o_res ( o_add_if ),
.i_res ( i_add_if ),
.o_axi ( add_if_fe_i )
);
endmodule

View File

@ -184,12 +184,14 @@ begin
p_out = get_dat;
$display("Expected:");
print_fp2_jb_point(p_exp);
$display("Was:");
print_fp2_jb_point(p_out);
if (p_exp != p_out) begin
$display("Expected (affine):");
print_fp2_jb_point(fp2_to_affine(p_exp));
$display("Was (affine):");
print_fp2_jb_point(fp2_to_affine(p_out));
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
end
@ -198,28 +200,12 @@ begin
end
endtask;
fp2_jb_point_t one_point = '{x:FE2_one, y:FE2_one, z:FE2_one};
fp2_jb_point_t two_point = '{x:'{c1:381'd2, c0:381'd2}, y:'{c1:381'd2, c0:381'd2}, z:FE2_one};
fp2_jb_point_t g2_point_dbl = '{x:'{c0:381'd2004569552561385659566932407633616698939912674197491321901037400001042336021538860336682240104624979660689237563240,
c1:381'd3955604752108186662342584665293438104124851975447411601471797343177761394177049673802376047736772242152530202962941},
y:'{c0:381'd978142457653236052983988388396292566217089069272380812666116929298652861694202207333864830606577192738105844024927,
c1:381'd2248711152455689790114026331322133133284196260289964969465268080325775757898907753181154992709229860715480504777099},
z:'{c0:381'd3145673658656250241340817105688138628074744674635286712244193301767486380727788868972774468795689607869551989918920,
c1:381'd968254395890002185853925600926112283510369004782031018144050081533668188797348331621250985545304947843412000516197}};
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test(g2_point, g2_point_dbl, add_fp2_jb_point(g2_point, g2_point_dbl)
/*'{x:'{c0:381'd2260316515795278483227354417550273673937385151660885802822200676798473320332386191812885909324314180009401590033496,
c1:381'd3157705674295752746643045744187038651144673626385096899515739718638356953289853357506730468806346866010850469607484},
y:'{c0:381'd3116406908094559010983016654096953279342014296159903648784769141704444407188785914041577477129027384530629024324101,
c1:381'd624739198846365065958511422206549337298084868949577950118937104460230094422413163466712508875838914229203179007739},
z:'{c0:381'd1372365362697527824661960056804989242334959973433633343888520294361286317391588271032081626721722944066233963018813,
c1:381'd135340553306575460225879133388402231094623862625345515492709522456301372944095308361691014711792956665222682354141}}*/);
test(g2_point, dbl_fp2_jb_point(g2_point), add_fp2_jb_point(g2_point, dbl_fp2_jb_point(g2_point)));
#1us $finish();
end

View File

@ -82,8 +82,8 @@ ec_point_add (
.o_err ( out_if.err ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ) ,
.o_mult_if ( mult_in_if ),
.i_mult_if ( mult_out_if ),
.o_mul_if ( mult_in_if ),
.i_mul_if ( mult_out_if ),
.o_add_if ( add_in_if ),
.i_add_if ( add_out_if ),
.o_sub_if ( sub_in_if ),

View File

@ -99,7 +99,7 @@ package bls12_381_pkg;
endfunction
function jb_point_t add_jb_point(jb_point_t p1, p2);
logic signed [1023:0] A, U1, U2, S1, S2, H, H3, R;
fe_t A, U1, U2, S1, S2, H, H3, R;
if (p1.z == 0) return p2;
if (p2.z == 0) return p1;
@ -107,33 +107,33 @@ package bls12_381_pkg;
if (p1.y == p2.y && p1.x == p2.x)
return (dbl_jb_point(p1));
U1 = (p1.x*p2.z) % P;
U1 = (U1*p2.z) % P;
U1 = fe_mul(p1.x, p2.z);
U1 = fe_mul(U1, p2.z);
U2 = (p2.x*p1.z) % P;
U2 = (U2 *p1.z) % P;
S1 = p1.y *p2.z % P;
S1 = (S1*p2.z % P) *p2.z % P;
S2 = p2.y * p1.z % P;
S2 = (S2*p1.z % P) *p1.z % P;
U2 = fe_mul(p2.x, p1.z);
U2 = fe_mul(U2, p1.z);
S1 = fe_mul(p1.y, p2.z);
S1 = fe_mul(fe_mul(S1, p2.z), p2.z);
S2 = fe_mul(p2.y, p1.z);
S2 = fe_mul(fe_mul(S2, p1.z), p1.z);
H = U2 + (U1 > U2 ? P : 0) -U1;
R = S2 + (S1 > S2 ? P : 0) -S1;
H3 = ((H * H %P ) * H ) % P;
A = (((2*U1 % P) *H % P) * H % P);
H = fe_sub(U2, U1);
R = fe_sub(S2, S1);
H3 = fe_mul(fe_mul(H, H), H);
A = fe_mul(fe_mul(fe_mul(2, U1), H), H);
add_jb_point.z = ((H * p1.z % P) * p2.z) % P;
add_jb_point.x = R*R % P;
add_jb_point.z = fe_mul(fe_mul(H, p1.z), p2.z);
add_jb_point.x = fe_mul(R, R);
add_jb_point.x = add_jb_point.x + (H3 > add_jb_point.x ? P : 0) - H3;
add_jb_point.x = add_jb_point.x + (A > add_jb_point.x ? P : 0) - A;
add_jb_point.x = fe_sub(add_jb_point.x, H3);
add_jb_point.x = fe_sub(add_jb_point.x, A);
A = (U1*H % P) * H % P;
A = A + (add_jb_point.x > A ? P : 0) - add_jb_point.x;
A = A*R % P;
add_jb_point.y = S1*H3 % P;
A = fe_mul(fe_mul(U1, H), H);
A = fe_sub(A, add_jb_point.x);
A = fe_mul(A, R);
add_jb_point.y = fe_mul(S1, H3);
add_jb_point.y = A + (add_jb_point.y > A ? P : 0) - add_jb_point.y;
add_jb_point.y = fe_sub(A, add_jb_point.y);
endfunction
@ -223,39 +223,43 @@ package bls12_381_pkg;
endfunction
function fp2_jb_point_t add_fp2_jb_point(fp2_jb_point_t p1, p2);
fe2_t A, U1, U2, S1, S2, H, H3, R;
fe2_t A, U1, U2, S1, S2, H, H3, R;
if (p1.z == 0) return p2;
if (p2.z == 0) return p1;
if (p1.y == p2.y && p1.x == p2.x)
return (dbl_fp2_jb_point(p1));
U1 = fe2_mul(p1.x, p2.z);
U1 = fe2_mul(U1, p2.z);
U2 = fe2_mul(p2.x, p1.z);
U2 = fe2_mul(U2, p1.z);
S1 = fe2_mul(p1.y, p2.z);
S1 = fe2_mul(fe2_mul(S1, p2.z), p2.z);
S2 = fe2_mul(p2.y, p1.z);
S2 = fe2_mul(fe2_mul(S2, p1.z), p1.z);
H = fe2_sub(U2, U1);
R = fe2_sub(S2, S1);
H3 = fe2_mul(fe2_mul(H, H), H);
A = fe2_mul(fe2_mul(fe2_mul(2, U1), H), H);
add_fp2_jb_point.z = fe2_mul(fe2_mul(H, p1.z), p2.z);
add_fp2_jb_point.x = fe2_mul(R, R);
add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, H3);
add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, A);
A = fe2_mul(fe2_mul(U1, H), H);
A = fe2_sub(A, add_fp2_jb_point.x);
A = fe2_mul(A, R);
add_fp2_jb_point.y = fe2_mul(S1, H3);
add_fp2_jb_point.y = fe2_sub(A, add_fp2_jb_point.y);
if (p1.y == p2.y && p1.x == p2.x)
return (dbl_fp2_jb_point(p1));
U1 = fe2_mul(p1.x, p2.z);
U1 = fe2_mul(U1, p2.z);
U2 = fe2_mul(p2.x, p1.z);
U2 = fe2_mul(U2, p1.z);
S1 = fe2_mul(p1.y, p2.z);
S1 = fe2_mul(fe2_mul(S1, p2.z), p2.z);
S2 = fe2_mul(p2.y, p1.z);
S2 = fe2_mul(fe2_mul(S2, p1.z), p1.z);
H = fe2_sub(U2, U1);
R = fe2_sub(S2, S1);
H3 = fe2_mul(fe2_mul(H, H), H);
A = fe2_mul(fe2_mul(fe2_mul(2, U1), H), H);
add_fp2_jb_point.z = fe2_mul(fe2_mul(H, p1.z), p2.z);
add_fp2_jb_point.x = fe2_mul(R, R);
add_fp2_jb_point.x = fe2_add(add_fp2_jb_point.x, H3);
add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, A);
A = fe2_mul(fe2_mul(U1, H), H);
A = fe2_sub(A, add_fp2_jb_point.x);
A = fe2_mul(A, R);
add_fp2_jb_point.y = fe2_mul(S1, H3);
add_fp2_jb_point.y = fe2_sub(A, add_fp2_jb_point.y);
endfunction
endfunction
function jb_point_t point_mult(logic [DAT_BITS-1:0] c, jb_point_t p);
jb_point_t result, addend;