updates to point multiplication

This commit is contained in:
bsdevlin 2019-06-19 20:53:36 +08:00
parent c996d487e4
commit fef2e26a1d
8 changed files with 212 additions and 99 deletions

View File

@ -1,6 +1,8 @@
/*
Commonly used interfaces:
Commonly used interfaces and tasks:
- AXI stream
- AXI 4
- AXI lite
- RAM
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@ -19,6 +21,7 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
// This is a simplified version of axi stream
interface if_axi_stream # (
parameter DAT_BYTS = 8,
parameter DAT_BITS = DAT_BYTS*8,
@ -160,64 +163,73 @@ interface if_axi_stream # (
endinterface
// This uses byte addressing
interface if_axi_mm # (
parameter D_BITS = 64,
parameter A_BITS = 8
interface if_axi_lite # (
parameter A_BITS = 32
)(
input i_clk
);
logic [A_BITS-1:0] addr;
logic [D_BITS-1:0] rd_dat;
logic [D_BITS-1:0] wr_dat;
logic wr;
logic rd;
logic rd_dat_val;
logic wait_rq;
logic [A_BITS-1:0] awaddr;
logic awvalid;
logic awready;
logic [31:0] wdata;
logic [3:0] wstrb;
logic wvalid;
logic wready;
logic [1:0] bresp;
logic bvalid;
logic bready;
logic [A_BITS-1:0] araddr;
logic arvalid;
logic arready;
logic [31:0] rdata;
logic [1:0] rresp;
logic rvalid;
logic rready;
modport sink (input addr, wr_dat, wr, rd, i_clk, output rd_dat, rd_dat_val, wait_rq, import task reset_sink());
modport source (input rd_dat, rd_dat_val, wait_rq , i_clk, output addr, wr_dat, wr, rd, import task reset_source());
modport sink (input awaddr, awvalid, wdata, wstrb, wvalid, bready, araddr, arvalid, rready,
output awready, wready, bresp, bvalid, arready, rdata, rresp, rvalid);
modport source (input awready, wready, bresp, bvalid, arready, rdata, rresp, rvalid,
output awaddr, awvalid, wdata, wstrb, wvalid, bready, araddr, arvalid, rready);
task reset_source();
addr <= 0;
wr_dat <= 0;
wr <= 0;
rd <= 0;
awaddr <= 0;
awvalid <= 0;
wdata <= 0;
wstrb <= 0;
wvalid <= 0;
bready <= 0;
araddr <= 0;
arvalid <= 0;
rready <= 0;
endtask
task reset_sink();
rd_dat <= 0;
rd_dat_val <= 0;
wait_rq <= 0;
awready <= 0;
wready <= 0;
bresp <= 0;
bvalid <= 0;
arready <= 0;
rdata <= 0;
rresp <= 0;
rvalid <= 0;
endtask
task automatic put_data(input logic [D_BITS-1:0] data, [A_BITS-1:0] addr_in);
reset_source();
task automatic poke(input logic [31:0] data, [A_BITS-1:0] addr_in);
/* reset_source();
@(posedge i_clk);
wr = 1;
wr_dat = data;
addr = addr_in;
@(posedge i_clk); // Go to next clock edge
while (wait_rq) @(posedge i_clk); // If not rdy then wait here
reset_source();
endtask
// For writing multiple words
task automatic put_data_multiple(input logic [common_pkg::MAX_SIM_BYTS*8-1:0] data,
input logic [A_BITS-1:0] addr);
while (data != 0) begin
put_data(data, addr);
data = data >> D_BITS;
addr = addr + D_BITS/8;
end
reset_source();*/
endtask
task automatic get_data(ref logic [D_BITS-1:0] data, input logic [A_BITS-1:0] addr_in);
reset_source();
task automatic peek(ref logic [31:0] data, input logic [A_BITS-1:0] addr_in);
/* reset_source();
@(posedge i_clk);
rd = 1;
addr = addr_in;
@ -228,7 +240,7 @@ interface if_axi_mm # (
@(posedge i_clk);
end
data = rd_dat;
reset_source();
reset_source();*/
endtask
endinterface
@ -281,3 +293,58 @@ interface if_ram # (
endtask
endinterface
interface if_axi4 # (
A_WIDTH = 64,
D_WIDTH = 512,
ID_WIDTH = 1
);
logic [ID_WIDTH-1:0] awid;
logic [A_WIDTH-1:0] awaddr;
logic [7:0] awlen;
logic [2:0] awsize;
logic [1:0] awburst;
logic awlock;
logic [3:0] awcache;
logic [2:0] awprot;
logic awvalid;
logic awready;
logic [D_WIDTH-1:0] wdata;
logic [D_WIDTH/8-1:0] wstrb;
logic wlast;
logic wvalid;
logic wready;
logic [ID_WIDTH-1:0] bid;
logic [1:0] bresp;
logic bvalid;
logic bready;
logic arid;
logic [A_WIDTH-1:0] araddr;
logic [7:0] arlen;
logic [2:0] arsize;
logic [1:0] arburst;
logic arlock;
logic [3:0] arcache;
logic [2:0] arprot;
logic arvalid;
logic arready;
logic [ID_WIDTH-1:0] rid;
logic [D_WIDTH-1:0] rdata;
logic [1:0] rresp;
logic rlast;
logic rvalid;
logic rready;
modport sink (input awid, awaddr, awlen, awsize, awburst, awlock, awcache, awprot, awvalid, wdata,
wstrb, wlast, wvalid, bready, arid, araddr, arlen, arsize, arburst, arlock,
arcache, arprot, arvalid, rready,
output awready, wready, bid, bresp, bvalid, arready, rid, rdata, rresp, rlast, rvalid);
modport source (output awid, awaddr, awlen, awsize, awburst, awlock, awcache, awprot, awvalid, wdata,
wstrb, wlast, wvalid, bready, arid, araddr, arlen, arsize, arburst, arlock,
arcache, arprot, arvalid, rready,
input awready, wready, bid, bresp, bvalid, arready, rid, rdata, rresp, rlast, rvalid);
endinterface

View File

@ -27,6 +27,7 @@ module ec_fe2_arithmetic
parameter CTL_BIT = 8 // From this bit 2 bits are used for control
)(
input i_clk, i_rst,
input i_fp_mode, // If this bit is high then we operate in fp mode
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if,
@ -47,12 +48,13 @@ module ec_fe2_arithmetic
if_axi_stream.sink i_sub_fe2_if
);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [2] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_i [1:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [1:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_o [2] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_i [1:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_o [1:0] (i_clk);
logic fp_mode_add, fp_mode_sub, fp_mode_mul;
// Point addtions are simple additions on each of the Fp elements
enum {ADD0, ADD1} add_state;
@ -66,7 +68,9 @@ always_ff @ (posedge i_clk) begin
o_add_fe2_if.reset_source();
add_state <= ADD0;
add_if_fe_o[0].reset_source();
fp_mode_add <= 0;
end else begin
fp_mode_add <= i_fp_mode;
if (add_if_fe_o[0].val && add_if_fe_o[0].rdy) add_if_fe_o[0].val <= 0;
if (o_add_fe2_if.val && o_add_fe2_if.rdy) o_add_fe2_if.val <= 0;
@ -79,7 +83,7 @@ always_ff @ (posedge i_clk) begin
i_add_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
i_add_fe2_if.val, 1, 1, i_add_fe2_if.err, i_add_fe2_if.mod, i_add_fe2_if.ctl);
add_if_fe_o[0].ctl[CTL_BIT] <= 0;
if (i_add_fe2_if.val) add_state <= ADD1;
if (i_add_fe2_if.val && ~fp_mode_add) add_state <= ADD1;
end
end
ADD1: begin
@ -99,6 +103,7 @@ always_ff @ (posedge i_clk) begin
if (add_if_fe_i[0].ctl[CTL_BIT] == 0) begin
if (add_if_fe_i[0].val)
o_add_fe2_if.dat[0 +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
if (fp_mode_add) o_add_fe2_if.val <= add_if_fe_i[0].val;
end else begin
o_add_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
o_add_fe2_if.val <= add_if_fe_i[0].val;
@ -119,8 +124,11 @@ always_ff @ (posedge i_clk) begin
o_sub_fe2_if.reset_source();
sub_state <= SUB0;
sub_if_fe_o[0].reset_source();
fp_mode_sub <= 0;
end else begin
fp_mode_sub <= i_fp_mode;
if (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy) sub_if_fe_o[0].val <= 0;
if (o_sub_fe2_if.val && o_sub_fe2_if.rdy) o_sub_fe2_if.val <= 0;
@ -132,7 +140,7 @@ always_ff @ (posedge i_clk) begin
i_sub_fe2_if.dat[0 +: $bits(FE_TYPE)]},
i_sub_fe2_if.val, 1, 1, i_sub_fe2_if.err, i_sub_fe2_if.mod, i_sub_fe2_if.ctl);
sub_if_fe_o[0].ctl[CTL_BIT] <= 0;
if (i_sub_fe2_if.val) sub_state <= SUB1;
if (i_sub_fe2_if.val && ~fp_mode_sub) sub_state <= SUB1;
end
end
SUB1: begin
@ -152,6 +160,7 @@ always_ff @ (posedge i_clk) begin
if (sub_if_fe_i[0].ctl[CTL_BIT] == 0) begin
if (sub_if_fe_i[0].val)
o_sub_fe2_if.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
if (fp_mode_sub) o_sub_fe2_if.val <= sub_if_fe_i[0].val;
end else begin
o_sub_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
o_sub_fe2_if.val <= sub_if_fe_i[0].val;
@ -179,8 +188,9 @@ end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
add_sub_val <= 0;
o_mul_fe2_if.sop <= 0;
o_mul_fe2_if.eop <= 0;
o_mul_fe2_if.sop <= 1;
o_mul_fe2_if.eop <= 1;
o_mul_fe2_if.err <= 0;
o_mul_fe2_if.ctl <= 0;
o_mul_fe2_if.dat <= 0;
o_mul_fe2_if.mod <= 0;
@ -188,8 +198,11 @@ always_ff @ (posedge i_clk) begin
o_mul_fe_if.reset_source();
sub_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
add_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
fp_mode_mul <= 0;
end else begin
fp_mode_mul <= i_fp_mode;
if (o_mul_fe2_if.val && o_mul_fe2_if.rdy) begin
add_sub_val <= 0;
end
@ -205,7 +218,7 @@ always_ff @ (posedge i_clk) begin
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_fe_if.ctl[CTL_BIT +: 2] <= 0;
if (mul_if_fe2_i.val) mul_state <= MUL1;
if (mul_if_fe2_i.val && ~fp_mode_mul) mul_state <= MUL1;
end
MUL1: begin
o_mul_fe_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
@ -258,45 +271,58 @@ always_ff @ (posedge i_clk) begin
// One process to assign output
if (~add_sub_val[0] || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.ctl <= add_if_fe_i[1].ctl;
o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[1].dat;
add_sub_val[0] <= add_if_fe_i[1].val;
o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[1].dat;
add_sub_val[0] <= add_if_fe_i[1].val;
end
if (~add_sub_val[1] || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[1].dat;
add_sub_val[1] <= sub_if_fe_i[1].val;
end
// If we are in fp_mode
if (fp_mode_mul) begin
if (~add_sub_val[0] || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
add_sub_val <= {2{i_mul_fe_if.val}};
end
end
end
end
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( 16 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_sub (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( sub_if_fe_o ),
.i_axi ( sub_if_fe_o[1:0] ),
.o_res ( o_sub_fe_if ),
.i_res ( i_sub_fe_if ),
.o_axi ( sub_if_fe_i )
.o_axi ( sub_if_fe_i[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( 16 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( add_if_fe_o ),
.i_axi ( add_if_fe_o[1:0] ),
.o_res ( o_add_fe_if ),
.i_res ( i_add_fe_if ),
.o_axi ( add_if_fe_i )
.o_axi ( add_if_fe_i[1:0] )
);
endmodule

View File

@ -86,6 +86,7 @@ ec_fe2_arithmetic
ec_fe2_arithmetic (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_fp_mode ( 1'd0 ),
.o_mul_fe_if ( o_mul_if ),
.i_mul_fe_if ( i_mul_if ),
.o_add_fe_if ( o_add_if ),

View File

@ -84,6 +84,7 @@ ec_fe2_arithmetic
ec_fe2_arithmetic (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_fp_mode ( 1'd0 ),
.o_mul_fe_if ( o_mul_if ),
.i_mul_fe_if ( i_mul_if ),
.o_add_fe_if ( o_add_if ),

View File

@ -116,4 +116,11 @@ barret_mod_pipe (
.i_mult_if_1 ( mult_if[4] )
);
always_comb begin
o_mul.err = 0;
o_mul.sop = 1;
o_mul.eop = 1;
o_mul.mod = 0;
end
endmodule

View File

@ -61,6 +61,7 @@ always_comb begin
o_rdy = (~o_mult_if_0.val || (o_mult_if_0.val && o_mult_if_0.rdy)) && fifo_in_if.rdy;
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_mult_if_0.reset_source();
@ -140,7 +141,7 @@ end
// Fifo to store inputs (as we need to do final subtraction)
axi_stream_fifo #(
.SIZE ( 32 ),
.SIZE ( 32 ),
.DAT_BITS ( DAT_BITS )
)
axi_stream_fifo (

View File

@ -21,6 +21,9 @@
module resource_share # (
parameter NUM_IN = 4,
parameter CTL_BITS = 16,
parameter DAT_BYTS = 8,
parameter DAT_BITS = DAT_BYTS*8,
parameter OVR_WRT_BIT = 0,
parameter PIPELINE_IN = 0,
parameter PIPELINE_OUT = 0,
@ -37,11 +40,11 @@ module resource_share # (
// Arbitratation to the resource
packet_arb # (
.DAT_BITS ( i_axi[0].DAT_BITS ),
.DAT_BYTS ( i_axi[0].DAT_BYTS ),
.CTL_BITS ( i_axi[0].CTL_BITS ),
.NUM_IN ( NUM_IN ),
.OVR_WRT_BIT ( OVR_WRT_BIT ),
.DAT_BITS ( DAT_BITS ),
.DAT_BYTS ( DAT_BYTS ),
.CTL_BITS ( CTL_BITS ),
.NUM_IN ( NUM_IN ),
.OVR_WRT_BIT ( OVR_WRT_BIT ),
.PIPELINE ( PIPELINE_IN ),
.PRIORITY_IN ( PRIORITY_IN )
)
@ -53,7 +56,7 @@ packet_arb_mult (
);
// Demuxing
if_axi_stream #(.DAT_BYTS(i_res.DAT_BYTS), .DAT_BITS(i_res.DAT_BITS), .CTL_BITS(i_res.CTL_BITS)) int_axi [NUM_IN-1:0] (i_res.i_clk);
if_axi_stream #(.DAT_BYTS(DAT_BYTS), .DAT_BITS(DAT_BITS), .CTL_BITS(CTL_BITS)) int_axi [NUM_IN-1:0] (i_clk);
genvar gen0;
logic [NUM_IN-1:0] rdy;
@ -67,10 +70,10 @@ generate
end
pipeline_if #(
.DAT_BITS ( i_res.DAT_BITS ),
.DAT_BYTS ( i_res.DAT_BYTS ),
.CTL_BITS ( i_res.CTL_BITS ),
.NUM_STAGES ( PIPELINE_OUT )
.DAT_BITS ( DAT_BITS ),
.DAT_BYTS ( DAT_BYTS ),
.CTL_BITS ( CTL_BITS ),
.NUM_STAGES ( PIPELINE_OUT )
)
pipeline_if (
.i_rst ( i_rst ),

View File

@ -43,17 +43,17 @@ package bls12_381_pkg;
fe_t x, y, z;
} jb_point_t;
typedef struct packed {
fe_t c1, c0;
} fe2_t;
typedef fe_t [1:0] fe2_t;
typedef fe2_t [2:0] fe6_t;
typedef fe6_t [1:0] fe12_t;
fe2_t G2x = '{c0:381'd352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
c1:381'd3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758};
fe2_t G2x = {381'd3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
381'd352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160};
fe2_t G2y = '{c0:381'd1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
c1:381'd927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582};
fe2_t G2y = {381'd927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582,
381'd1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905};
fe2_t FE2_one = '{c0:381'd1, c1:381'd0};
fe2_t FE2_one = {381'd0, 381'd1};
jb_point_t g_point = '{x:Gx, y:Gy, z:381'd1};
@ -69,7 +69,12 @@ package bls12_381_pkg;
typedef enum logic [7:0] {
NOOP_WAIT = 8'h0,
COPY_REG = 8'h1,
FP_FPOINT_MULT = 8'h26
SCALAR_INV = 8'h13,
POINT_MULT = 8'h24,
FP_FPOINT_MULT = 8'h25,
FP2_FPOINT_MULT = 8'h26
} code_t;
// Instruction format
@ -78,7 +83,12 @@ package bls12_381_pkg;
code_t code;
} inst_t;
localparam DATA_RAM_WIDTH = $bits(point_type_t) + DAT_BITS;
typedef struct packed {
fe_t dat;
point_type_t pt;
} data_t;
localparam DATA_RAM_WIDTH = $bits(data_t);
localparam DATA_RAM_ALIGN_BYTE = 64;
localparam DATA_RAM_DEPTH = 8;
localparam DATA_RAM_USR_WIDTH = 4;
@ -90,9 +100,6 @@ package bls12_381_pkg;
localparam INST_RAM_USR_WIDTH = 4;
localparam INST_RAM_USR_DEPTH = INST_RAM_DEPTH*INST_RAM_ALIGN_BYTE/INST_RAM_USR_WIDTH;
function is_zero(jb_point_t p);
is_zero = (p.x == 0 && p.y == 0 && p.z == 1);
return is_zero;
@ -145,8 +152,8 @@ package bls12_381_pkg;
endfunction
function fe2_t fe2_add(fe2_t a, b);
fe2_add.c0 = fe_add(a.c0,b.c0);
fe2_add.c1 = fe_add(a.c1,b.c1);
fe2_add[0] = fe_add(a[0], b[0]);
fe2_add[1] = fe_add(a[1] ,b[1]);
endfunction
function fe_t fe_sub(fe_t a, b);
@ -157,8 +164,8 @@ package bls12_381_pkg;
endfunction
function fe2_t fe2_sub(fe2_t a, b);
fe2_sub.c0 = fe_sub(a.c0, b.c0);
fe2_sub.c1 = fe_sub(a.c1, b.c1);
fe2_sub[0] = fe_sub(a[0], b[0]);
fe2_sub[1] = fe_sub(a[1], b[1]);
endfunction
function fe_t fe_mul(fe2_t a, b);
@ -166,8 +173,8 @@ package bls12_381_pkg;
endfunction
function fe2_t fe2_mul(fe2_t a, b);
fe2_mul.c0 = fe_sub(fe_mul(a.c0, b.c0), fe_mul(a.c1, b.c1));
fe2_mul.c1 = fe_add(fe_mul(a.c0, b.c1), fe_mul(a.c1, b.c0));
fe2_mul[0] = fe_sub(fe_mul(a[0], b[0]), fe_mul(a[1], b[1]));
fe2_mul[1] = fe_add(fe_mul(a[0], b[1]), fe_mul(a[1], b[0]));
endfunction
// Function to double point in Jacobian coordinates (for comparison in testbench)
@ -336,11 +343,11 @@ package bls12_381_pkg;
function fe2_t fe2_inv(fe2_t a);
fe_t factor, t0, t1;
t0 = fe_mul(a.c0, a.c0);
t1 = fe_mul(a.c1, a.c1);
t0 = fe_mul(a[0], a[0]);
t1 = fe_mul(a[1], a[1]);
factor = fe_inv(fe_add(t0, t1));
fe2_inv.c0 = fe_mul(a.c0, factor);
fe2_inv.c1 = fe_mul(fe_sub(P, a.c1), factor);
fe2_inv[0]= fe_mul(a[0], factor);
fe2_inv[1] = fe_mul(fe_sub(P, a[1]), factor);
endfunction
function jb_point_t to_affine(jb_point_t p);
@ -369,9 +376,9 @@ package bls12_381_pkg;
endfunction
function print_fp2_jb_point(fp2_jb_point_t p);
$display("x:(c1:%h, c0:%h)", p.x.c1, p.x.c0);
$display("y:(c1:%h, c0:%h)", p.y.c1, p.y.c0);
$display("z:(c1:%h, c0:%h)", p.z.c1, p.z.c0);
$display("x:(c1:%h, c0:%h)", p.x[1], p.x[0]);
$display("y:(c1:%h, c0:%h)", p.y[1], p.y[0]);
$display("z:(c1:%h, c0:%h)", p.z[1], p.z[0]);
return;
endfunction