updates to point multiplication

2019-06-19 20:53:36 +08:00 · 2019-06-19 20:53:36 +08:00 · fef2e26a1d
parent c996d487e4
commit fef2e26a1d
8 changed files with 212 additions and 99 deletions
--- a/ip_cores/common/src/rtl/common_if.sv
+++ b/ip_cores/common/src/rtl/common_if.sv
@ -1,6 +1,8 @@
 /*
-  Commonly used interfaces:
+  Commonly used interfaces and tasks:
    - AXI stream
+    - AXI 4
+    - AXI lite
    - RAM

  Copyright (C) 2019  Benjamin Devlin and Zcash Foundation
@ -19,6 +21,7 @@
  along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

+// This is a simplified version of axi stream
 interface if_axi_stream # (
  parameter DAT_BYTS = 8,
  parameter DAT_BITS = DAT_BYTS*8,
@ -160,64 +163,73 @@ interface if_axi_stream # (

 endinterface

-// This uses byte addressing
-interface if_axi_mm # (
-  parameter D_BITS = 64,
-  parameter A_BITS = 8
+
+interface if_axi_lite # (
+  parameter A_BITS = 32
 )(
  input i_clk
 );

-  logic [A_BITS-1:0] addr;
-  logic [D_BITS-1:0] rd_dat;
-  logic [D_BITS-1:0] wr_dat;
-  logic              wr;
-  logic              rd;
-  logic              rd_dat_val;
-  logic              wait_rq;
+  logic [A_BITS-1:0] awaddr;
+  logic              awvalid;
+  logic              awready;
+  logic [31:0]       wdata;
+  logic [3:0]        wstrb;
+  logic              wvalid;
+  logic              wready;
+  logic [1:0]        bresp;
+  logic              bvalid;
+  logic              bready;
+  logic [A_BITS-1:0] araddr;
+  logic              arvalid;
+  logic              arready;
+  logic [31:0]       rdata;
+  logic [1:0]        rresp;
+  logic              rvalid;
+  logic              rready;

-  modport sink (input addr, wr_dat, wr, rd, i_clk, output rd_dat, rd_dat_val, wait_rq, import task reset_sink());
-  modport source (input rd_dat, rd_dat_val, wait_rq , i_clk, output addr, wr_dat, wr, rd, import task reset_source());
+  modport sink (input awaddr, awvalid, wdata, wstrb, wvalid, bready, araddr, arvalid, rready,
+                output awready, wready, bresp, bvalid, arready, rdata, rresp, rvalid);
+  modport source (input awready, wready, bresp, bvalid, arready, rdata, rresp, rvalid,
+                  output awaddr, awvalid, wdata, wstrb, wvalid, bready, araddr, arvalid, rready);

  task reset_source();
-    addr <= 0;
-    wr_dat <= 0;
-    wr <= 0;
-    rd <= 0;
+    awaddr <= 0;
+    awvalid <= 0;
+    wdata <= 0;
+    wstrb <= 0;
+    wvalid <= 0;
+    bready <= 0;
+    araddr <= 0;
+    arvalid <= 0;
+    rready <= 0;
  endtask

  task reset_sink();
-    rd_dat <= 0;
-    rd_dat_val <= 0;
-    wait_rq <= 0;
+    awready <= 0;
+    wready <= 0;
+    bresp <= 0;
+    bvalid <= 0;
+    arready <= 0;
+    rdata <= 0;
+    rresp <= 0;
+    rvalid <= 0;
  endtask

-  task automatic put_data(input logic [D_BITS-1:0] data, [A_BITS-1:0] addr_in);
-    reset_source();
+  task automatic poke(input logic [31:0] data, [A_BITS-1:0] addr_in);
+  /*  reset_source();
    @(posedge i_clk);
    wr = 1;
    wr_dat = data;
    addr = addr_in;
    @(posedge i_clk); // Go to next clock edge
    while (wait_rq) @(posedge i_clk); // If not rdy then wait here
-    reset_source();
-  endtask
-
-  // For writing multiple words
-  task automatic put_data_multiple(input logic [common_pkg::MAX_SIM_BYTS*8-1:0] data,
-                                   input logic [A_BITS-1:0] addr);
-
-    while (data != 0) begin
-      put_data(data, addr);
-      data = data >> D_BITS;
-      addr = addr + D_BITS/8;
-    end
-
+    reset_source();*/
  endtask


-  task automatic get_data(ref logic [D_BITS-1:0] data, input logic [A_BITS-1:0] addr_in);
-    reset_source();
+  task automatic peek(ref logic [31:0] data, input logic [A_BITS-1:0] addr_in);
+   /* reset_source();
    @(posedge i_clk);
    rd = 1;
    addr = addr_in;
@ -228,7 +240,7 @@ interface if_axi_mm # (
      @(posedge i_clk);
    end
    data = rd_dat;
-    reset_source();
+    reset_source();*/
  endtask

 endinterface
@ -281,3 +293,58 @@ interface if_ram # (
  endtask

 endinterface
+
+
+interface if_axi4 # (
+  A_WIDTH = 64,
+  D_WIDTH = 512,
+  ID_WIDTH = 1
+);
+
+  logic [ID_WIDTH-1:0]  awid;
+  logic [A_WIDTH-1:0]   awaddr;
+  logic [7:0]           awlen;
+  logic [2:0]           awsize;
+  logic [1:0]           awburst;
+  logic                 awlock;
+  logic [3:0]           awcache;
+  logic [2:0]           awprot;
+  logic                 awvalid;
+  logic                 awready;
+  logic [D_WIDTH-1:0]   wdata;
+  logic [D_WIDTH/8-1:0] wstrb;
+  logic                 wlast;
+  logic                 wvalid;
+  logic                 wready;
+  logic [ID_WIDTH-1:0]  bid;
+  logic [1:0]           bresp;
+  logic                 bvalid;
+  logic                 bready;
+  logic                 arid;
+  logic [A_WIDTH-1:0]   araddr;
+  logic [7:0]           arlen;
+  logic [2:0]           arsize;
+  logic [1:0]           arburst;
+  logic                 arlock;
+  logic [3:0]           arcache;
+  logic [2:0]           arprot;
+  logic                 arvalid;
+  logic                 arready;
+  logic [ID_WIDTH-1:0]  rid;
+  logic [D_WIDTH-1:0]   rdata;
+  logic [1:0]           rresp;
+  logic                 rlast;
+  logic                 rvalid;
+  logic                 rready;
+
+  modport sink (input awid, awaddr, awlen, awsize, awburst, awlock, awcache, awprot, awvalid, wdata,
+                      wstrb, wlast, wvalid, bready, arid, araddr, arlen, arsize, arburst, arlock,
+                      arcache, arprot, arvalid, rready,
+                output awready, wready, bid, bresp, bvalid, arready, rid, rdata, rresp, rlast, rvalid);
+
+  modport source (output awid, awaddr, awlen, awsize, awburst, awlock, awcache, awprot, awvalid, wdata,
+                         wstrb, wlast, wvalid, bready, arid, araddr, arlen, arsize, arburst, arlock,
+                         arcache, arprot, arvalid, rready,
+                  input awready, wready, bid, bresp, bvalid, arready, rid, rdata, rresp, rlast, rvalid);
+
+endinterface
--- a/ip_cores/ec/src/rtl/ec_fp2_arithmetic.sv
+++ b/ip_cores/ec/src/rtl/ec_fp2_arithmetic.sv
@ -27,6 +27,7 @@ module ec_fe2_arithmetic
  parameter CTL_BIT = 8        // From this bit 2 bits are used for control
 )(
  input i_clk, i_rst,
+  input i_fp_mode,      // If this bit is high then we operate in fp mode
  // Interface to FE_TYPE multiplier (mod P)
  if_axi_stream.source o_mul_fe_if,
  if_axi_stream.sink   i_mul_fe_if,
@ -47,12 +48,13 @@ module ec_fe2_arithmetic
  if_axi_stream.sink   i_sub_fe2_if
 );

-if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16))   add_if_fe_i [2] (i_clk);
-if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [2] (i_clk);
+if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16))   add_if_fe_i [1:0] (i_clk);
+if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [1:0] (i_clk);

-if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16))   sub_if_fe_i [2] (i_clk);
-if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_o [2] (i_clk);
+if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16))   sub_if_fe_i [1:0] (i_clk);
+if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_o [1:0] (i_clk);

+logic fp_mode_add, fp_mode_sub, fp_mode_mul;

 // Point addtions are simple additions on each of the Fp elements
 enum {ADD0, ADD1} add_state;
@ -66,7 +68,9 @@ always_ff @ (posedge i_clk) begin
    o_add_fe2_if.reset_source();
    add_state <= ADD0;
    add_if_fe_o[0].reset_source();
+    fp_mode_add <= 0;
  end else begin
+    fp_mode_add <= i_fp_mode;

    if (add_if_fe_o[0].val && add_if_fe_o[0].rdy) add_if_fe_o[0].val <= 0;
    if (o_add_fe2_if.val && o_add_fe2_if.rdy) o_add_fe2_if.val <= 0;
@ -79,7 +83,7 @@ always_ff @ (posedge i_clk) begin
                                  i_add_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
                                  i_add_fe2_if.val, 1, 1, i_add_fe2_if.err, i_add_fe2_if.mod, i_add_fe2_if.ctl);
          add_if_fe_o[0].ctl[CTL_BIT] <= 0;
-          if (i_add_fe2_if.val) add_state <= ADD1;
+          if (i_add_fe2_if.val && ~fp_mode_add) add_state <= ADD1;
        end
      end
      ADD1: begin
@ -99,6 +103,7 @@ always_ff @ (posedge i_clk) begin
      if (add_if_fe_i[0].ctl[CTL_BIT] == 0) begin
        if (add_if_fe_i[0].val)
          o_add_fe2_if.dat[0 +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
+        if (fp_mode_add) o_add_fe2_if.val <= add_if_fe_i[0].val;
      end else begin
        o_add_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
        o_add_fe2_if.val <= add_if_fe_i[0].val;
@ -119,8 +124,11 @@ always_ff @ (posedge i_clk) begin
    o_sub_fe2_if.reset_source();
    sub_state <= SUB0;
    sub_if_fe_o[0].reset_source();
+    fp_mode_sub <= 0;
  end else begin

+    fp_mode_sub <= i_fp_mode;
+
    if (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy) sub_if_fe_o[0].val <= 0;
    if (o_sub_fe2_if.val && o_sub_fe2_if.rdy) o_sub_fe2_if.val <= 0;

@ -132,7 +140,7 @@ always_ff @ (posedge i_clk) begin
                                  i_sub_fe2_if.dat[0 +: $bits(FE_TYPE)]},
                                  i_sub_fe2_if.val, 1, 1, i_sub_fe2_if.err, i_sub_fe2_if.mod, i_sub_fe2_if.ctl);
          sub_if_fe_o[0].ctl[CTL_BIT] <= 0;
-          if (i_sub_fe2_if.val) sub_state <= SUB1;
+          if (i_sub_fe2_if.val && ~fp_mode_sub) sub_state <= SUB1;
        end
      end
      SUB1: begin
@ -152,6 +160,7 @@ always_ff @ (posedge i_clk) begin
      if (sub_if_fe_i[0].ctl[CTL_BIT] == 0) begin
        if (sub_if_fe_i[0].val)
          o_sub_fe2_if.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
+        if (fp_mode_sub) o_sub_fe2_if.val <= sub_if_fe_i[0].val;
      end else begin
        o_sub_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
        o_sub_fe2_if.val <= sub_if_fe_i[0].val;
@ -179,8 +188,9 @@ end
 always_ff @ (posedge i_clk) begin
  if (i_rst) begin
    add_sub_val <= 0;
-    o_mul_fe2_if.sop <= 0;
-    o_mul_fe2_if.eop <= 0;
+    o_mul_fe2_if.sop <= 1;
+    o_mul_fe2_if.eop <= 1;
+    o_mul_fe2_if.err <= 0;
    o_mul_fe2_if.ctl <= 0;
    o_mul_fe2_if.dat <= 0;
    o_mul_fe2_if.mod <= 0;
@ -188,8 +198,11 @@ always_ff @ (posedge i_clk) begin
    o_mul_fe_if.reset_source();
    sub_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
    add_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
+    fp_mode_mul <= 0;
  end else begin

+    fp_mode_mul <= i_fp_mode;
+
    if (o_mul_fe2_if.val && o_mul_fe2_if.rdy) begin
      add_sub_val <= 0;
    end
@ -205,7 +218,7 @@ always_ff @ (posedge i_clk) begin
                            mul_if_fe2_i.dat[$bits(FE2_TYPE)  +: $bits(FE_TYPE)]},
                            mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
          o_mul_fe_if.ctl[CTL_BIT +: 2] <= 0;
-          if (mul_if_fe2_i.val) mul_state <= MUL1;
+          if (mul_if_fe2_i.val && ~fp_mode_mul) mul_state <= MUL1;
        end
        MUL1: begin
          o_mul_fe_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
@ -258,45 +271,58 @@ always_ff @ (posedge i_clk) begin
    // One process to assign output
    if (~add_sub_val[0] || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
      o_mul_fe2_if.ctl <= add_if_fe_i[1].ctl;
-        o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[1].dat;
-        add_sub_val[0] <= add_if_fe_i[1].val;
+      o_mul_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[1].dat;
+      add_sub_val[0] <= add_if_fe_i[1].val;
    end

    if (~add_sub_val[1] || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
        o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[1].dat;
        add_sub_val[1] <= sub_if_fe_i[1].val;
    end
+
+    // If we are in fp_mode
+    if (fp_mode_mul) begin
+      if (~add_sub_val[0] || (o_mul_fe2_if.val && o_mul_fe2_if.rdy)) begin
+        o_mul_fe2_if.dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
+        add_sub_val <= {2{i_mul_fe_if.val}};
+      end
+    end
+
  end
 end

 resource_share # (
-  .NUM_IN ( 2 ),
-  .OVR_WRT_BIT ( 12 ),
-  .PIPELINE_IN ( 0  ),
-  .PIPELINE_OUT ( 0 )
+  .NUM_IN       ( 2                ),
+  .DAT_BITS     ( 2*$bits(FE_TYPE) ),
+  .CTL_BITS     ( 16               ),
+  .OVR_WRT_BIT  ( 12               ),
+  .PIPELINE_IN  ( 0                ),
+  .PIPELINE_OUT ( 0                )
 )
 resource_share_sub (
  .i_clk ( i_clk ),
  .i_rst ( i_rst ),
-  .i_axi ( sub_if_fe_o ),
+  .i_axi ( sub_if_fe_o[1:0] ),
  .o_res ( o_sub_fe_if ),
  .i_res ( i_sub_fe_if ),
-  .o_axi ( sub_if_fe_i )
+  .o_axi ( sub_if_fe_i[1:0] )
 );

 resource_share # (
-  .NUM_IN ( 2 ),
-  .OVR_WRT_BIT ( 12 ),
-  .PIPELINE_IN ( 0  ),
-  .PIPELINE_OUT ( 0 )
+  .NUM_IN       ( 2                ),
+  .DAT_BITS     ( 2*$bits(FE_TYPE) ),
+  .CTL_BITS     ( 16               ),
+  .OVR_WRT_BIT  ( 12               ),
+  .PIPELINE_IN  ( 0                ),
+  .PIPELINE_OUT ( 0                )
 )
 resource_share_add (
  .i_clk ( i_clk ),
  .i_rst ( i_rst ),
-  .i_axi ( add_if_fe_o ),
+  .i_axi ( add_if_fe_o[1:0] ),
  .o_res ( o_add_fe_if ),
  .i_res ( i_add_fe_if ),
-  .o_axi ( add_if_fe_i )
+  .o_axi ( add_if_fe_i[1:0] )
 );

 endmodule
--- a/ip_cores/ec/src/rtl/ec_fp2_point_add.sv
+++ b/ip_cores/ec/src/rtl/ec_fp2_point_add.sv
@ -86,6 +86,7 @@ ec_fe2_arithmetic
 ec_fe2_arithmetic (
  .i_clk ( i_clk ),
  .i_rst ( i_rst ),
+  .i_fp_mode ( 1'd0 ),
  .o_mul_fe_if ( o_mul_if ),
  .i_mul_fe_if ( i_mul_if ),
  .o_add_fe_if ( o_add_if ),
--- a/ip_cores/ec/src/rtl/ec_fp2_point_dbl.sv
+++ b/ip_cores/ec/src/rtl/ec_fp2_point_dbl.sv
@ -84,6 +84,7 @@ ec_fe2_arithmetic
 ec_fe2_arithmetic (
  .i_clk ( i_clk ),
  .i_rst ( i_rst ),
+  .i_fp_mode ( 1'd0 ),
  .o_mul_fe_if ( o_mul_if ),
  .i_mul_fe_if ( i_mul_if ),
  .o_add_fe_if ( o_add_if ),
--- a/ip_cores/ec/src/rtl/ec_fp_mult_mod.sv
+++ b/ip_cores/ec/src/rtl/ec_fp_mult_mod.sv
@ -116,4 +116,11 @@ barret_mod_pipe (
  .i_mult_if_1 ( mult_if[4] )
 );

+always_comb begin
+  o_mul.err = 0;
+  o_mul.sop = 1;
+  o_mul.eop = 1;
+  o_mul.mod = 0;
+end
+
 endmodule
--- a/ip_cores/util/src/rtl/barret_mod_pipe.sv
+++ b/ip_cores/util/src/rtl/barret_mod_pipe.sv
@ -61,6 +61,7 @@ always_comb begin
  o_rdy = (~o_mult_if_0.val || (o_mult_if_0.val && o_mult_if_0.rdy)) && fifo_in_if.rdy;
 end

+
 always_ff @ (posedge i_clk) begin
  if (i_rst) begin
    o_mult_if_0.reset_source();
@ -140,7 +141,7 @@ end

 // Fifo to store inputs (as we need to do final subtraction)
 axi_stream_fifo #(
-  .SIZE     ( 32       ),
+  .SIZE     ( 32      ),
  .DAT_BITS ( DAT_BITS )
 )
 axi_stream_fifo (
--- a/ip_cores/util/src/rtl/resource_share.sv
+++ b/ip_cores/util/src/rtl/resource_share.sv
@ -21,6 +21,9 @@

 module resource_share # (
  parameter NUM_IN = 4,
+  parameter CTL_BITS = 16,
+  parameter DAT_BYTS = 8,
+  parameter DAT_BITS = DAT_BYTS*8,
  parameter OVR_WRT_BIT = 0,
  parameter PIPELINE_IN = 0,
  parameter PIPELINE_OUT = 0,
@ -37,11 +40,11 @@ module resource_share # (

 // Arbitratation to the resource
 packet_arb # (
-  .DAT_BITS    ( i_axi[0].DAT_BITS ),
-  .DAT_BYTS    ( i_axi[0].DAT_BYTS ),
-  .CTL_BITS    ( i_axi[0].CTL_BITS ),
-  .NUM_IN      ( NUM_IN       ),
-  .OVR_WRT_BIT ( OVR_WRT_BIT  ),
+  .DAT_BITS    ( DAT_BITS ),
+  .DAT_BYTS    ( DAT_BYTS ),
+  .CTL_BITS    ( CTL_BITS ),
+  .NUM_IN      ( NUM_IN      ),
+  .OVR_WRT_BIT ( OVR_WRT_BIT ),
  .PIPELINE    ( PIPELINE_IN ),
  .PRIORITY_IN ( PRIORITY_IN )
 )
@ -53,7 +56,7 @@ packet_arb_mult (
 );

 // Demuxing
-if_axi_stream #(.DAT_BYTS(i_res.DAT_BYTS), .DAT_BITS(i_res.DAT_BITS), .CTL_BITS(i_res.CTL_BITS)) int_axi [NUM_IN-1:0] (i_res.i_clk);
+if_axi_stream #(.DAT_BYTS(DAT_BYTS), .DAT_BITS(DAT_BITS), .CTL_BITS(CTL_BITS)) int_axi [NUM_IN-1:0] (i_clk);

 genvar gen0;
 logic [NUM_IN-1:0] rdy;
@ -67,10 +70,10 @@ generate
    end 
    
    pipeline_if  #(
-      .DAT_BITS   ( i_res.DAT_BITS ),
-      .DAT_BYTS   ( i_res.DAT_BYTS ),
-      .CTL_BITS   ( i_res.CTL_BITS ),
-      .NUM_STAGES ( PIPELINE_OUT   )
+      .DAT_BITS   ( DAT_BITS ),
+      .DAT_BYTS   ( DAT_BYTS ),
+      .CTL_BITS   ( CTL_BITS ),
+      .NUM_STAGES ( PIPELINE_OUT )
    )
    pipeline_if (
      .i_rst ( i_rst         ),
--- a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv
+++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv
@ -43,17 +43,17 @@ package bls12_381_pkg;
    fe_t x, y, z;
  } jb_point_t;

-  typedef struct packed {
-    fe_t c1, c0;
-  } fe2_t;
+  typedef fe_t  [1:0] fe2_t;
+  typedef fe2_t [2:0] fe6_t;
+  typedef fe6_t [1:0] fe12_t;

-  fe2_t G2x = '{c0:381'd352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
-                c1:381'd3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758};
+  fe2_t G2x = {381'd3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
+               381'd352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160};

-  fe2_t G2y = '{c0:381'd1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
-                c1:381'd927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582};
+  fe2_t G2y = {381'd927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582,
+               381'd1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905};

-  fe2_t FE2_one =  '{c0:381'd1, c1:381'd0};
+  fe2_t FE2_one =  {381'd0, 381'd1};

  jb_point_t g_point = '{x:Gx, y:Gy, z:381'd1};

@ -69,7 +69,12 @@ package bls12_381_pkg;
  typedef enum logic [7:0] {
    NOOP_WAIT       = 8'h0,
    COPY_REG        = 8'h1,
-    FP_FPOINT_MULT  = 8'h26
+
+    SCALAR_INV      = 8'h13,
+
+    POINT_MULT     = 8'h24,
+    FP_FPOINT_MULT = 8'h25,
+    FP2_FPOINT_MULT = 8'h26
  } code_t;

  // Instruction format
@ -78,7 +83,12 @@ package bls12_381_pkg;
    code_t code;
  } inst_t;

-  localparam DATA_RAM_WIDTH = $bits(point_type_t) + DAT_BITS;
+  typedef struct packed {
+    fe_t dat;
+    point_type_t pt;
+  } data_t;
+
+  localparam DATA_RAM_WIDTH = $bits(data_t);
  localparam DATA_RAM_ALIGN_BYTE = 64;
  localparam DATA_RAM_DEPTH = 8;
  localparam DATA_RAM_USR_WIDTH = 4;
@ -90,9 +100,6 @@ package bls12_381_pkg;
  localparam INST_RAM_USR_WIDTH = 4;
  localparam INST_RAM_USR_DEPTH = INST_RAM_DEPTH*INST_RAM_ALIGN_BYTE/INST_RAM_USR_WIDTH;

-
-
-
  function is_zero(jb_point_t p);
    is_zero = (p.x == 0 && p.y == 0 && p.z == 1);
    return is_zero;
@ -145,8 +152,8 @@ package bls12_381_pkg;
   endfunction

   function fe2_t fe2_add(fe2_t a, b);
-     fe2_add.c0 = fe_add(a.c0,b.c0);
-     fe2_add.c1 = fe_add(a.c1,b.c1);
+     fe2_add[0] = fe_add(a[0], b[0]);
+     fe2_add[1] = fe_add(a[1] ,b[1]);
   endfunction

   function fe_t fe_sub(fe_t a, b);
@ -157,8 +164,8 @@ package bls12_381_pkg;
   endfunction

   function fe2_t fe2_sub(fe2_t a, b);
-     fe2_sub.c0 = fe_sub(a.c0, b.c0);
-     fe2_sub.c1 = fe_sub(a.c1, b.c1);
+     fe2_sub[0] = fe_sub(a[0], b[0]);
+     fe2_sub[1] = fe_sub(a[1], b[1]);
   endfunction

   function fe_t fe_mul(fe2_t a, b);
@ -166,8 +173,8 @@ package bls12_381_pkg;
   endfunction

   function fe2_t fe2_mul(fe2_t a, b);
-     fe2_mul.c0 = fe_sub(fe_mul(a.c0, b.c0), fe_mul(a.c1, b.c1));
-     fe2_mul.c1 = fe_add(fe_mul(a.c0, b.c1), fe_mul(a.c1, b.c0));
+     fe2_mul[0] = fe_sub(fe_mul(a[0], b[0]), fe_mul(a[1], b[1]));
+     fe2_mul[1] = fe_add(fe_mul(a[0], b[1]), fe_mul(a[1], b[0]));
   endfunction

      // Function to double point in Jacobian coordinates (for comparison in testbench)
@ -336,11 +343,11 @@ package bls12_381_pkg;

   function fe2_t fe2_inv(fe2_t a);
     fe_t factor, t0, t1;
-     t0 = fe_mul(a.c0, a.c0);
-     t1 = fe_mul(a.c1, a.c1);
+     t0 = fe_mul(a[0], a[0]);
+     t1 = fe_mul(a[1], a[1]);
     factor = fe_inv(fe_add(t0, t1));
-     fe2_inv.c0 = fe_mul(a.c0, factor);
-     fe2_inv.c1 = fe_mul(fe_sub(P, a.c1), factor);
+     fe2_inv[0]= fe_mul(a[0], factor);
+     fe2_inv[1] = fe_mul(fe_sub(P, a[1]), factor);
   endfunction

   function jb_point_t to_affine(jb_point_t p);
@ -369,9 +376,9 @@ package bls12_381_pkg;
   endfunction

   function print_fp2_jb_point(fp2_jb_point_t p);
-     $display("x:(c1:%h, c0:%h)", p.x.c1, p.x.c0);
-     $display("y:(c1:%h, c0:%h)", p.y.c1, p.y.c0);
-     $display("z:(c1:%h, c0:%h)", p.z.c1, p.z.c0);
+     $display("x:(c1:%h, c0:%h)", p.x[1], p.x[0]);
+     $display("y:(c1:%h, c0:%h)", p.y[1], p.y[0]);
+     $display("z:(c1:%h, c0:%h)", p.z[1], p.z[0]);
     return;
   endfunction