updates for Fp^2 point logic

This commit is contained in:
bsdevlin 2019-06-11 22:55:11 +08:00
parent dcdbc97957
commit 4cef72abda
14 changed files with 1655 additions and 250 deletions

View File

@ -0,0 +1,337 @@
/*
This performs Fp^2 point addition.
Is a wrapper around the Fp point addition module, but with logic
to handle the multiplications / subtractions / additions
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fp2_point_add
#(
parameter type FP2_TYPE, // Should have FE2_TYPE elements
parameter type FE_TYPE,
parameter type FE2_TYPE
)(
input i_clk, i_rst,
// Input points
input FP2_TYPE i_p1,
input FP2_TYPE i_p2,
input logic i_val,
output logic o_rdy,
// Output point
output FP2_TYPE o_p,
input logic i_rdy,
output logic o_val,
output logic o_err,
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_if,
if_axi_stream.sink i_mul_if,
// Interface to FE_TYPE adder (mod P)
if_axi_stream.source o_add_if,
if_axi_stream.sink i_add_if,
// Interface to FE_TYPE subtractor (mod P)
if_axi_stream.source o_sub_if,
if_axi_stream.sink i_sub_if
);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) mul_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) mul_if_fe2_o(i_clk);
localparam ADD_CTL_BIT = 8;
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) add_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) add_if_fe2_o(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) sub_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) sub_if_fe2_o(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_o [2] (i_clk);
// Point addtions are simple additions on each of the Fp elements
enum {ADD0, ADD1} add_state;
always_comb begin
add_if_fe2_i.rdy = add_state == ADD1 && (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy));
add_if_fe_i[0].rdy = ~add_if_fe2_o.val || (add_if_fe2_o.val && add_if_fe2_o.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
add_if_fe2_o.reset_source();
add_state <= ADD0;
add_if_fe_o[0].reset_source();
end else begin
if (add_if_fe_o[0].val && add_if_fe_o[0].rdy) add_if_fe_o[0].val <= 0;
if (add_if_fe2_o.val && add_if_fe2_o.rdy) add_if_fe2_o.val <= 0;
// One process to parse inputs and send them to the adder
case(add_state)
ADD0: begin
if (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)) begin
add_if_fe_o[0].copy_if({add_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
add_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
add_if_fe2_i.val, 1, 1, add_if_fe2_i.err, add_if_fe2_i.mod, add_if_fe2_i.ctl);
add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0;
if (add_if_fe2_i.val) add_state <= ADD1;
end
end
ADD1: begin
if (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)) begin
add_if_fe_o[0].copy_if({add_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
add_if_fe2_i.dat[$bits(FE2_TYPE)+$bits(FE_TYPE) +: $bits(FE_TYPE)]},
add_if_fe2_i.val, 1, 1, add_if_fe2_i.err, add_if_fe2_i.mod, add_if_fe2_i.ctl);
add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1;
if (add_if_fe2_i.val) add_state <= ADD0;
end
end
endcase
// One process to assign outputs
if (~add_if_fe2_o.val || (add_if_fe2_o.val && add_if_fe2_o.rdy)) begin
add_if_fe2_o.ctl <= add_if_fe_i[0].ctl;
if (add_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin
if (add_if_fe_i[0].val)
add_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
end else begin
add_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
add_if_fe2_o.val <= add_if_fe_i[0].val;
end
end
end
end
// Point subtractions are simple subtractions on each of the Fp elements
enum {SUB0, SUB1} sub_state;
always_comb begin
sub_if_fe2_i.rdy = sub_state == ADD1 && (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy));
sub_if_fe_i[0].rdy = ~sub_if_fe2_o.val || (sub_if_fe2_o.val && sub_if_fe2_o.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
sub_if_fe2_o.reset_source();
sub_state <= SUB0;
sub_if_fe_o[0].reset_source();
end else begin
if (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy) sub_if_fe_o[0].val <= 0;
if (sub_if_fe2_o.val && sub_if_fe2_o.rdy) sub_if_fe2_o.val <= 0;
// One process to parse inputs and send them to the subtractor
case(sub_state)
SUB0: begin
if (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)) begin
sub_if_fe_o[0].copy_if({sub_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
sub_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
sub_if_fe2_i.val, 1, 1, sub_if_fe2_i.err, sub_if_fe2_i.mod, sub_if_fe2_i.ctl);
sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0;
if (sub_if_fe2_i.val) sub_state <= SUB1;
end
end
SUB1: begin
if (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)) begin
sub_if_fe_o[0].copy_if({sub_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
sub_if_fe2_i.dat[$bits(FE_TYPE) + $bits(FE2_TYPE) +: $bits(FE_TYPE)]},
sub_if_fe2_i.val, 1, 1, sub_if_fe2_i.err, sub_if_fe2_i.mod, sub_if_fe2_i.ctl);
sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1;
if (sub_if_fe2_i.val) sub_state <= SUB0;
end
end
endcase
// One process to assign outputs
if (~sub_if_fe2_o.val || (sub_if_fe2_o.val && sub_if_fe2_o.rdy)) begin
sub_if_fe2_o.ctl <= sub_if_fe_i[0].ctl;
if (sub_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin
if (sub_if_fe_i[0].val)
sub_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
end else begin
sub_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
sub_if_fe2_o.val <= sub_if_fe_i[0].val;
end
end
end
end
// Multiplications are calculated as (a + bi)x(a' +b'i) = (aa' - bb') + (ab' + a'b)i
// First 4 multiplications are issued, then 1 add and 1 subtraction (so we need arbitrator)
enum {MUL0, MUL1, MUL2, MUL3} mul_state;
logic [1:0] add_sub_val;
always_comb begin
mul_if_fe2_i.rdy = mul_state == MUL3 && (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy));
i_mul_if.rdy = (mul_state == MUL1 || mul_state == MUL2) ? (~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy)) :
(~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy));
// TODO check
mul_if_fe2_o.val = &add_sub_val;
sub_if_fe_i[1].rdy = ~add_sub_val[1] || (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy));
add_if_fe_i[1].rdy = ~add_sub_val[0] || (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy));
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
add_sub_val <= 0;
mul_if_fe2_o.sop <= 0;
mul_if_fe2_o.eop <= 0;
mul_if_fe2_o.ctl <= 0;
mul_if_fe2_o.dat <= 0;
mul_if_fe2_o.mod <= 0;
mul_state <= MUL0;
o_mul_if.reset_source();
sub_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
add_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
end else begin
if (mul_if_fe2_o.val && mul_if_fe2_o.rdy) begin
add_sub_val <= 0;
end
if (o_mul_if.val && o_mul_if.rdy) o_mul_if.val <= 0;
if (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy) sub_if_fe_o[1].val <= 0;
if (add_if_fe_o[1].val && add_if_fe_o[1].rdy) add_if_fe_o[1].val <= 0;
// One process to parse inputs and send them to the multiplier
if (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy)) begin
case (mul_state)
MUL0: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 0;
if (mul_if_fe2_i.val) mul_state <= MUL1;
end
MUL1: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 1;
if (mul_if_fe2_i.val) mul_state <= MUL2;
end
MUL2: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 2;
if (mul_if_fe2_i.val) mul_state <= MUL3;
end
MUL3: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 3;
if (mul_if_fe2_i.val) mul_state <= MUL0;
end
endcase
end
// Process multiplications and do subtraction
if (~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy)) begin
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 0) begin
if (i_mul_if.val) sub_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 1) begin
sub_if_fe_o[1].val <= i_mul_if.val;
sub_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
sub_if_fe_o[1].ctl <= i_mul_if.ctl;
end
// Process multiplications and do addition
if (~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy)) begin
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 2) begin
if (i_mul_if.val) add_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 3) begin
add_if_fe_o[1].val <= i_mul_if.val;
add_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
add_if_fe_o[1].ctl <= i_mul_if.ctl;
end
// One process to assign output
if (~mul_if_fe2_o.val || (mul_if_fe2_o.val && mul_if_fe2_o.rdy)) begin
mul_if_fe2_o.ctl <= add_if_fe_i[1].ctl;
if (~add_sub_val[0]) begin
mul_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[1].dat;
add_sub_val[0] <= add_if_fe_i[1].val;
end
if (~add_sub_val[1]) begin
mul_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[1].dat;
add_sub_val[1] <= sub_if_fe_i[1].val;
end
end
end
end
ec_point_add #(
.FP_TYPE ( FP2_TYPE ),
.FE_TYPE ( FE2_TYPE )
)
ec_point_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
// Input points
.i_p1 ( i_p1 ),
.i_p2 ( i_p2 ),
.i_val ( i_val ),
.o_rdy ( o_rdy ),
.o_p ( o_p ),
.o_err ( o_err ),
.i_rdy ( i_rdy ),
.o_val ( o_val ) ,
.o_mul_if ( mul_if_fe2_i ),
.i_mul_if ( mul_if_fe2_o ),
.o_add_if ( add_if_fe2_i ),
.i_add_if ( add_if_fe2_o ),
.o_sub_if ( sub_if_fe2_i ),
.i_sub_if ( sub_if_fe2_o )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 10 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_sub (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( sub_if_fe_o ),
.o_res ( o_sub_if ),
.i_res ( i_sub_if ),
.o_axi ( sub_if_fe_i )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 10 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( add_if_fe_o ),
.o_res ( o_add_if ),
.i_res ( i_add_if ),
.o_axi ( add_if_fe_i )
);
endmodule

View File

@ -0,0 +1,338 @@
/*
This performs Fp^2 point addition.
Is a wrapper around the Fp point addition module, but with logic
to handle the multiplications / subtractions / additions
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fp2_point_dbl
#(
parameter type FP2_TYPE, // Should have FE2_TYPE elements
parameter type FE_TYPE,
parameter type FE2_TYPE
)(
input i_clk, i_rst,
// Input points
input FP2_TYPE i_p,
input logic i_val,
output logic o_rdy,
// Output point
output FP2_TYPE o_p,
input logic i_rdy,
output logic o_val,
output logic o_err,
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_if,
if_axi_stream.sink i_mul_if,
// Interface to FE_TYPE adder (mod P)
if_axi_stream.source o_add_if,
if_axi_stream.sink i_add_if,
// Interface to FE_TYPE subtractor (mod P)
if_axi_stream.source o_sub_if,
if_axi_stream.sink i_sub_if
);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) mul_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) mul_if_fe2_o(i_clk);
localparam ADD_CTL_BIT = 8;
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) add_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) add_if_fe2_o(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(8)) sub_if_fe2_i(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(8)) sub_if_fe2_o(i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_if_fe_o [2] (i_clk);
// Point addtions are simple additions on each of the Fp elements
enum {ADD0, ADD1} add_state;
always_comb begin
add_if_fe2_i.rdy = add_state == ADD1 && (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy));
add_if_fe_i[0].rdy = ~add_if_fe2_o.val || (add_if_fe2_o.val && add_if_fe2_o.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
add_if_fe2_o.reset_source();
add_state <= ADD0;
add_if_fe_o[0].reset_source();
end else begin
if (add_if_fe_o[0].val && add_if_fe_o[0].rdy) add_if_fe_o[0].val <= 0;
if (add_if_fe2_o.val && add_if_fe2_o.rdy) add_if_fe2_o.val <= 0;
// One process to parse inputs and send them to the adder
case(add_state)
ADD0: begin
if (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)) begin
add_if_fe_o[0].copy_if({add_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
add_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
add_if_fe2_i.val, 1, 1, add_if_fe2_i.err, add_if_fe2_i.mod, add_if_fe2_i.ctl);
add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0;
if (add_if_fe2_i.val) add_state <= ADD1;
end
end
ADD1: begin
if (~add_if_fe_o[0].val || (add_if_fe_o[0].val && add_if_fe_o[0].rdy)) begin
add_if_fe_o[0].copy_if({add_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
add_if_fe2_i.dat[$bits(FE2_TYPE)+$bits(FE_TYPE) +: $bits(FE_TYPE)]},
add_if_fe2_i.val, 1, 1, add_if_fe2_i.err, add_if_fe2_i.mod, add_if_fe2_i.ctl);
add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1;
if (add_if_fe2_i.val) add_state <= ADD0;
end
end
endcase
// One process to assign outputs
if (~add_if_fe2_o.val || (add_if_fe2_o.val && add_if_fe2_o.rdy)) begin
add_if_fe2_o.ctl <= add_if_fe_i[0].ctl;
if (add_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin
if (add_if_fe_i[0].val)
add_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
end else begin
add_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
add_if_fe2_o.val <= add_if_fe_i[0].val;
end
end
end
end
// Point subtractions are simple subtractions on each of the Fp elements
enum {SUB0, SUB1} sub_state;
always_comb begin
sub_if_fe2_i.rdy = sub_state == ADD1 && (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy));
sub_if_fe_i[0].rdy = ~sub_if_fe2_o.val || (sub_if_fe2_o.val && sub_if_fe2_o.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
sub_if_fe2_o.reset_source();
sub_state <= SUB0;
sub_if_fe_o[0].reset_source();
end else begin
if (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy) sub_if_fe_o[0].val <= 0;
if (sub_if_fe2_o.val && sub_if_fe2_o.rdy) sub_if_fe2_o.val <= 0;
// One process to parse inputs and send them to the subtractor
case(sub_state)
SUB0: begin
if (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)) begin
sub_if_fe_o[0].copy_if({sub_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
sub_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
sub_if_fe2_i.val, 1, 1, sub_if_fe2_i.err, sub_if_fe2_i.mod, sub_if_fe2_i.ctl);
sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0;
if (sub_if_fe2_i.val) sub_state <= SUB1;
end
end
SUB1: begin
if (~sub_if_fe_o[0].val || (sub_if_fe_o[0].val && sub_if_fe_o[0].rdy)) begin
sub_if_fe_o[0].copy_if({sub_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
sub_if_fe2_i.dat[$bits(FE_TYPE) + $bits(FE2_TYPE) +: $bits(FE_TYPE)]},
sub_if_fe2_i.val, 1, 1, sub_if_fe2_i.err, sub_if_fe2_i.mod, sub_if_fe2_i.ctl);
sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1;
if (sub_if_fe2_i.val) sub_state <= SUB0;
end
end
endcase
// One process to assign outputs
if (~sub_if_fe2_o.val || (sub_if_fe2_o.val && sub_if_fe2_o.rdy)) begin
sub_if_fe2_o.ctl <= sub_if_fe_i[0].ctl;
if (sub_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin
if (sub_if_fe_i[0].val)
sub_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
end else begin
sub_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
sub_if_fe2_o.val <= sub_if_fe_i[0].val;
end
end
end
end
// Multiplications are calculated as (a + bi)x(a' +b'i) = (aa' - bb') + (ab' + a'b)i
// First 4 multiplications are issued, then 1 add and 1 subtraction (so we need arbitrator)
enum {MUL0, MUL1, MUL2, MUL3} mul_state;
logic [1:0] add_sub_val;
always_comb begin
mul_if_fe2_i.rdy = mul_state == MUL3 && (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy));
i_mul_if.rdy = (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 0 || i_mul_if.ctl[ADD_CTL_BIT +: 2] == 1) ?
(~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy)) :
(~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy));
mul_if_fe2_o.val = &add_sub_val;
sub_if_fe_i[1].rdy = ~add_sub_val[1] || (mul_if_fe2_o.val && mul_if_fe2_o.rdy);
add_if_fe_i[1].rdy = ~add_sub_val[0] || (mul_if_fe2_o.val && mul_if_fe2_o.rdy);
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
add_sub_val <= 0;
mul_if_fe2_o.sop <= 0;
mul_if_fe2_o.eop <= 0;
mul_if_fe2_o.ctl <= 0;
mul_if_fe2_o.dat <= 0;
mul_if_fe2_o.mod <= 0;
mul_state <= MUL0;
o_mul_if.reset_source();
sub_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
add_if_fe_o[1].copy_if(0, 0, 1, 1, 0, 0, 0);
end else begin
if (mul_if_fe2_o.val && mul_if_fe2_o.rdy) begin
add_sub_val <= 0;
end
if (o_mul_if.val && o_mul_if.rdy) o_mul_if.val <= 0;
if (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy) sub_if_fe_o[1].val <= 0;
if (add_if_fe_o[1].val && add_if_fe_o[1].rdy) add_if_fe_o[1].val <= 0;
// One process to parse inputs and send them to the multiplier
if (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy)) begin
case (mul_state)
MUL0: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 0;
if (mul_if_fe2_i.val) mul_state <= MUL1;
end
MUL1: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 1;
if (mul_if_fe2_i.val) mul_state <= MUL2;
end
MUL2: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 2;
if (mul_if_fe2_i.val) mul_state <= MUL3;
end
MUL3: begin
o_mul_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_if.ctl[ADD_CTL_BIT +: 2] <= 3;
if (mul_if_fe2_i.val) mul_state <= MUL0;
end
endcase
end
// Process multiplications and do subtraction
if (~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy)) begin
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 0) begin
if (i_mul_if.val) sub_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 1) begin
sub_if_fe_o[1].val <= i_mul_if.val;
sub_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
sub_if_fe_o[1].ctl <= i_mul_if.ctl;
end
// Process multiplications and do addition
if (~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy)) begin
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 2) begin
if (i_mul_if.val) add_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
if (i_mul_if.ctl[ADD_CTL_BIT +: 2] == 3) begin
add_if_fe_o[1].val <= i_mul_if.val;
add_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_if.dat;
end
add_if_fe_o[1].ctl <= i_mul_if.ctl;
end
// One process to assign output
if (~add_sub_val[0] || (mul_if_fe2_o.val && mul_if_fe2_o.rdy)) begin
mul_if_fe2_o.ctl <= add_if_fe_i[1].ctl;
//if (~add_sub_val[0]) begin
mul_if_fe2_o.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= add_if_fe_i[1].dat;
add_sub_val[0] <= add_if_fe_i[1].val;
end
if (~add_sub_val[1] || (mul_if_fe2_o.val && mul_if_fe2_o.rdy)) begin
//end
//if (~add_sub_val[1]) begin
mul_if_fe2_o.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[1].dat;
add_sub_val[1] <= sub_if_fe_i[1].val;
//end
end
end
end
ec_point_dbl #(
.FP_TYPE ( FP2_TYPE ),
.FE_TYPE ( FE2_TYPE )
)
ec_point_dbl (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
// Input points
.i_p ( i_p ),
.i_val ( i_val ),
.o_rdy ( o_rdy ),
.o_p ( o_p ),
.o_err ( o_err ),
.i_rdy ( i_rdy ),
.o_val ( o_val ) ,
.o_mul_if ( mul_if_fe2_i ),
.i_mul_if ( mul_if_fe2_o ),
.o_add_if ( add_if_fe2_i ),
.i_add_if ( add_if_fe2_o ),
.o_sub_if ( sub_if_fe2_i ),
.i_sub_if ( sub_if_fe2_o )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 10 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_sub (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( sub_if_fe_o ),
.o_res ( o_sub_if ),
.i_res ( i_sub_if ),
.o_axi ( sub_if_fe_i )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 10 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( add_if_fe_o ),
.o_res ( o_add_if ),
.i_res ( i_add_if ),
.o_axi ( add_if_fe_i )
);
endmodule

View File

@ -333,7 +333,7 @@ generate
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.KARATSUBA_LVL ( 2 ),
.CTL_BITS ( 16 )
)
ec_fp_mult_mod (

View File

@ -19,7 +19,6 @@
module ec_point_add
#(
parameter P,
parameter type FP_TYPE,
parameter type FE_TYPE
)(
@ -35,8 +34,8 @@ module ec_point_add
output logic o_val,
output logic o_err,
// Interface to multiplier (mod P)
if_axi_stream.source o_mult_if,
if_axi_stream.sink i_mult_if,
if_axi_stream.source o_mul_if,
if_axi_stream.sink i_mul_if,
// Interface to adder (mod P)
if_axi_stream.source o_add_if,
if_axi_stream.sink i_add_if,
@ -103,11 +102,11 @@ always_ff @ (posedge i_clk) begin
o_val <= 0;
o_rdy <= 0;
o_p <= 0;
o_mult_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_mul_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_add_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_sub_if.copy_if(0, 0, 1, 1, 0, 0, 0);
i_add_if.rdy <= 0;
i_mult_if.rdy <= 0;
i_mul_if.rdy <= 0;
i_sub_if.rdy <= 0;
eq_val <= 0;
state <= IDLE;
@ -121,7 +120,7 @@ always_ff @ (posedge i_clk) begin
D <= 0;
end else begin
if (o_mult_if.rdy) o_mult_if.val <= 0;
if (o_mul_if.rdy) o_mul_if.val <= 0;
if (o_add_if.rdy) o_add_if.val <= 0;
if (o_sub_if.rdy) o_sub_if.val <= 0;
@ -131,7 +130,7 @@ always_ff @ (posedge i_clk) begin
eq_val <= 0;
eq_wait <= 0;
o_err <= 0;
i_mult_if.rdy <= 1;
i_mul_if.rdy <= 1;
i_add_if.rdy <= 1;
i_sub_if.rdy <= 1;
i_p1_l <= i_p1;
@ -168,25 +167,25 @@ always_ff @ (posedge i_clk) begin
{START}: begin
// Check any results from multiplier
if (i_mult_if.val && i_mult_if.rdy) begin
eq_val[i_mult_if.ctl[5:0]] <= 1;
case(i_mult_if.ctl[5:0]) inside
0: A <= i_mult_if.dat;
1: i_p1_l.x <= i_mult_if.dat;
2: C <= i_mult_if.dat;
3: i_p2_l.x <= i_mult_if.dat;
4: A <= i_mult_if.dat;
5: A <= i_mult_if.dat;
6: C <= i_mult_if.dat;
7: C <= i_mult_if.dat;
10: o_p.x <= i_mult_if.dat;
11: D <= i_mult_if.dat;
12: i_p2_l.x <= i_mult_if.dat;
14: i_p1_l.x <= i_mult_if.dat;
19: o_p.y <= i_mult_if.dat;
20: i_p2_l.x <= i_mult_if.dat;
22: o_p.z <= i_mult_if.dat;
23: o_p.z <= i_mult_if.dat;
if (i_mul_if.val && i_mul_if.rdy) begin
eq_val[i_mul_if.ctl[5:0]] <= 1;
case(i_mul_if.ctl[5:0]) inside
0: A <= FE_TYPE'(i_mul_if.dat);
1: i_p1_l.x <= i_mul_if.dat;
2: C <= i_mul_if.dat;
3: i_p2_l.x <= i_mul_if.dat;
4: A <= i_mul_if.dat;
5: A <= i_mul_if.dat;
6: C <= i_mul_if.dat;
7: C <= i_mul_if.dat;
10: o_p.x <= i_mul_if.dat;
11: D <= i_mul_if.dat;
12: i_p2_l.x <= i_mul_if.dat;
14: i_p1_l.x <= i_mul_if.dat;
19: o_p.y <= i_mul_if.dat;
20: i_p2_l.x <= i_mul_if.dat;
22: o_p.z <= i_mul_if.dat;
23: o_p.z <= i_mul_if.dat;
default: o_err <= 1;
endcase
end
@ -345,11 +344,11 @@ endtask
// Task for using multiplies
task multiply(input int unsigned ctl, input FE_TYPE a, b);
if (~o_mult_if.val || (o_mult_if.val && o_mult_if.rdy)) begin
o_mult_if.val <= 1;
o_mult_if.dat[0 +: $bits(FE_TYPE)] <= a;
o_mult_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b;
o_mult_if.ctl[5:0] <= ctl;
if (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy)) begin
o_mul_if.val <= 1;
o_mul_if.dat[0 +: $bits(FE_TYPE)] <= a;
o_mul_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b;
o_mul_if.ctl[5:0] <= ctl;
eq_wait[ctl] <= 1;
end
endtask

View File

@ -17,31 +17,30 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fp_point_dbl
module ec_point_dbl
#(
parameter P,
parameter type POINT_TYPE
parameter type FP_TYPE,
parameter type FE_TYPE
)(
input i_clk, i_rst,
// Input point
input POINT_TYPE i_p,
input logic i_val,
output logic o_rdy,
input FP_TYPE i_p,
input logic i_val,
output logic o_rdy,
// Output point
output POINT_TYPE o_p,
input logic i_rdy,
output logic o_val,
output logic o_err,
output FP_TYPE o_p,
input logic i_rdy,
output logic o_val,
output logic o_err,
// Interface to multiplier (mod p)
if_axi_stream.source o_mult_if,
if_axi_stream.sink i_mult_if,
if_axi_stream.source o_mul_if,
if_axi_stream.sink i_mul_if,
if_axi_stream.source o_add_if,
if_axi_stream.sink i_add_if,
if_axi_stream.source o_sub_if,
if_axi_stream.sink i_sub_if
);
localparam DAT_BITS = $clog2(P);
/*
* These are the equations that need to be computed, they are issued as variables
* become valid. We have a bitmask to track what equation results are valid which
@ -66,8 +65,8 @@ localparam DAT_BITS = $clog2(P);
logic [14:0] eq_val, eq_wait;
// Temporary variables
logic [DAT_BITS-1:0] A, B, C, D, E;
POINT_TYPE i_p_l;
FE_TYPE A, B, C, D, E;
FP_TYPE i_p_l;
enum {IDLE, START, FINISHED} state;
@ -76,12 +75,12 @@ always_ff @ (posedge i_clk) begin
o_val <= 0;
o_rdy <= 0;
o_p <= 0;
o_mult_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_mul_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_add_if.copy_if(0, 0, 1, 1, 0, 0, 0);
o_sub_if.copy_if(0, 0, 1, 1, 0, 0, 0);
i_mult_if.rdy <= 0;
i_add_if.rdy <= 0;
i_sub_if.rdy <= 0;
i_mul_if.rdy <= 0;
i_add_if.rdy <= 0;
i_sub_if.rdy <= 0;
eq_val <= 0;
state <= IDLE;
eq_wait <= 0;
@ -94,7 +93,7 @@ always_ff @ (posedge i_clk) begin
E <= 0;
end else begin
if (o_mult_if.rdy) o_mult_if.val <= 0;
if (o_mul_if.rdy) o_mul_if.val <= 0;
if (o_add_if.rdy) o_add_if.val <= 0;
if (o_sub_if.rdy) o_sub_if.val <= 0;
@ -104,7 +103,7 @@ always_ff @ (posedge i_clk) begin
eq_val <= 0;
eq_wait <= 0;
o_err <= 0;
i_mult_if.rdy <= 1;
i_mul_if.rdy <= 1;
i_add_if.rdy <= 1;
i_sub_if.rdy <= 1;
i_p_l <= i_p;
@ -127,26 +126,26 @@ always_ff @ (posedge i_clk) begin
// Just a big if tree where we issue equations if the required inputs
// are valid
{START}: begin
i_mult_if.rdy <= 1;
i_mul_if.rdy <= 1;
// Check any results from multiplier
if (i_mult_if.val && i_mult_if.rdy) begin
eq_val[i_mult_if.ctl[5:0]] <= 1;
case(i_mult_if.ctl[5:0]) inside
0: A <= i_mult_if.dat;
1: B <= i_mult_if.dat;
2: B <= i_mult_if.dat;
3: C <= i_mult_if.dat;
4: C <= i_mult_if.dat;
5: D <= i_mult_if.dat;
6: D <= i_mult_if.dat;
7: o_p.x <= i_mult_if.dat;
11: o_p.y <= i_mult_if.dat;
14: o_p.z <= i_mult_if.dat;
if (i_mul_if.val && i_mul_if.rdy) begin
eq_val[i_mul_if.ctl[5:0]] <= 1;
case(i_mul_if.ctl[5:0]) inside
0: A <= i_mul_if.dat;
1: B <= i_mul_if.dat;
2: B <= i_mul_if.dat;
3: C <= i_mul_if.dat;
4: C <= i_mul_if.dat;
5: D <= i_mul_if.dat;
6: D <= i_mul_if.dat;
7: o_p.x <= i_mul_if.dat;
11: o_p.y <= i_mul_if.dat;
14: o_p.z <= i_mul_if.dat;
default: o_err <= 1;
endcase
end
// Check any results from adder
if (i_add_if.val && i_add_if.rdy) begin
eq_val[i_add_if.ctl[5:0]] <= 1;
@ -156,7 +155,7 @@ always_ff @ (posedge i_clk) begin
default: o_err <= 1;
endcase
end
// Check any results from subtractor
if (i_sub_if.val && i_sub_if.rdy) begin
eq_val[i_sub_if.ctl[5:0]] <= 1;
@ -219,9 +218,6 @@ always_ff @ (posedge i_clk) begin
addition(13, i_p_l.y, i_p_l.y);
end
if (&eq_val) begin
state <= FINISHED;
o_val <= 1;
@ -248,38 +244,36 @@ always_ff @ (posedge i_clk) begin
end
// Task for subtractions
task subtraction(input int unsigned ctl, input logic [DAT_BITS-1:0] a, b);
task subtraction(input int unsigned ctl, input FE_TYPE a, b);
if (~o_sub_if.val || (o_sub_if.val && o_sub_if.rdy)) begin
o_sub_if.val <= 1;
o_sub_if.dat[0 +: DAT_BITS] <= a;
o_sub_if.dat[DAT_BITS +: DAT_BITS] <= b;
o_sub_if.dat[0 +: $bits(FE_TYPE)] <= a;
o_sub_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b;
o_sub_if.ctl[5:0] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for addition
task addition(input int unsigned ctl, input logic [DAT_BITS-1:0] a, b);
task addition(input int unsigned ctl, input FE_TYPE a, b);
if (~o_add_if.val || (o_add_if.val && o_add_if.rdy)) begin
o_add_if.val <= 1;
o_add_if.dat[0 +: DAT_BITS] <= a;
o_add_if.dat[DAT_BITS +: DAT_BITS] <= b;
o_add_if.dat[0 +: $bits(FE_TYPE)] <= a;
o_add_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b;
o_add_if.ctl[5:0] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for using multiplies
task multiply(input int unsigned ctl, input logic [DAT_BITS-1:0] a, b);
if (~o_mult_if.val || (o_mult_if.val && o_mult_if.rdy)) begin
o_mult_if.val <= 1;
o_mult_if.dat[0 +: DAT_BITS] <= a;
o_mult_if.dat[DAT_BITS +: DAT_BITS] <= b;
o_mult_if.ctl[5:0] <= ctl;
task multiply(input int unsigned ctl, input FE_TYPE a, b);
if (~o_mul_if.val || (o_mul_if.val && o_mul_if.rdy)) begin
o_mul_if.val <= 1;
o_mul_if.dat[0 +: $bits(FE_TYPE)] <= a;
o_mul_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= b;
o_mul_if.ctl[5:0] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
endmodule

View File

@ -0,0 +1,226 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module ec_fp2_point_add_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
localparam CLK_PERIOD = 1000;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(($bits(fp2_jb_point_t)*2+7)/8)) in_if(clk); // Two points
if_axi_stream #(.DAT_BYTS(($bits(fp2_jb_point_t)+7)/8)) out_if(clk);
if_axi_stream #(.DAT_BITS(2*bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) mult_in_if(clk);
if_axi_stream #(.DAT_BITS(bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) mult_out_if(clk);
if_axi_stream #(.DAT_BITS(2*bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) add_in_if(clk);
if_axi_stream #(.DAT_BITS(bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) add_out_if(clk);
if_axi_stream #(.DAT_BITS(2*bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) sub_in_if(clk);
if_axi_stream #(.DAT_BITS(bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) sub_out_if(clk);
fp2_jb_point_t in_p1, in_p2, out_p;
always_comb begin
in_p1 = in_if.dat[0 +: $bits(fp2_jb_point_t)];
in_p2 = in_if.dat[$bits(fp2_jb_point_t) +: $bits(fp2_jb_point_t)];
out_if.dat = out_p;
end
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
ec_fp2_point_add #(
.FP2_TYPE ( fp2_jb_point_t ),
.FE_TYPE ( fe_t ),
.FE2_TYPE ( fe2_t )
)
ec_fp2_point_add (
.i_clk ( clk ),
.i_rst ( rst ),
// Input points
.i_p1 ( in_p1 ),
.i_p2 ( in_p2 ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.o_p ( out_p ),
.o_err ( out_if.err ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ) ,
.o_mul_if ( mult_in_if ),
.i_mul_if ( mult_out_if ),
.o_add_if ( add_in_if ),
.i_add_if ( add_out_if ),
.o_sub_if ( sub_in_if ),
.i_sub_if ( sub_out_if )
);
always_comb begin
mult_out_if.sop = 1;
mult_out_if.eop = 1;
mult_out_if.err = 0;
mult_out_if.mod = 1;
add_out_if.sop = 1;
add_out_if.eop = 1;
add_out_if.err = 0;
add_out_if.mod = 1;
sub_out_if.sop = 1;
sub_out_if.eop = 1;
sub_out_if.err = 0;
sub_out_if.mod = 1;
end
// Attach a mod reduction unit and multiply - mod unit
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( 16 )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat_a ( mult_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( mult_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_val ( mult_in_if.val ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl )
);
adder_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( add_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( add_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( add_in_if.ctl ),
.i_val ( add_in_if.val ),
.o_rdy ( add_in_if.rdy ),
.o_dat ( add_out_if.dat ),
.o_val ( add_out_if.val ),
.o_ctl ( add_out_if.ctl ),
.i_rdy ( add_out_if.rdy )
);
subtractor_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( sub_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( sub_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( sub_in_if.ctl ),
.i_val ( sub_in_if.val ),
.o_rdy ( sub_in_if.rdy ),
.o_dat ( sub_out_if.dat ),
.o_val ( sub_out_if.val ),
.o_ctl ( sub_out_if.ctl ),
.i_rdy ( sub_out_if.rdy )
);
task test(input fp2_jb_point_t p1, p2, p_exp);
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
fp2_jb_point_t p_out;
$display("Running test ...");
fork
in_if.put_stream({p2, p1}, ((2*$bits(fp2_jb_point_t)+7)/8));
out_if.get_stream(get_dat, get_len);
join
p_out = get_dat;
$display("Expected:");
print_fp2_jb_point(p_exp);
$display("Was:");
print_fp2_jb_point(p_out);
if (p_exp != p_out) begin
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
end
$display("test PASSED");
end
endtask;
fp2_jb_point_t one_point = '{x:FE2_one, y:FE2_one, z:FE2_one};
fp2_jb_point_t two_point = '{x:'{c1:381'd2, c0:381'd2}, y:'{c1:381'd2, c0:381'd2}, z:FE2_one};
fp2_jb_point_t g2_point_dbl = '{x:'{c0:381'd2004569552561385659566932407633616698939912674197491321901037400001042336021538860336682240104624979660689237563240,
c1:381'd3955604752108186662342584665293438104124851975447411601471797343177761394177049673802376047736772242152530202962941},
y:'{c0:381'd978142457653236052983988388396292566217089069272380812666116929298652861694202207333864830606577192738105844024927,
c1:381'd2248711152455689790114026331322133133284196260289964969465268080325775757898907753181154992709229860715480504777099},
z:'{c0:381'd3145673658656250241340817105688138628074744674635286712244193301767486380727788868972774468795689607869551989918920,
c1:381'd968254395890002185853925600926112283510369004782031018144050081533668188797348331621250985545304947843412000516197}};
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test(g2_point, g2_point_dbl, add_fp2_jb_point(g2_point, g2_point_dbl)
/*'{x:'{c0:381'd2260316515795278483227354417550273673937385151660885802822200676798473320332386191812885909324314180009401590033496,
c1:381'd3157705674295752746643045744187038651144673626385096899515739718638356953289853357506730468806346866010850469607484},
y:'{c0:381'd3116406908094559010983016654096953279342014296159903648784769141704444407188785914041577477129027384530629024324101,
c1:381'd624739198846365065958511422206549337298084868949577950118937104460230094422413163466712508875838914229203179007739},
z:'{c0:381'd1372365362697527824661960056804989242334959973433633343888520294361286317391588271032081626721722944066233963018813,
c1:381'd135340553306575460225879133388402231094623862625345515492709522456301372944095308361691014711792956665222682354141}}*/);
#1us $finish();
end
endmodule

View File

@ -0,0 +1,211 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module ec_fp2_point_dbl_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
localparam CLK_PERIOD = 1000;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(($bits(fp2_jb_point_t)+7)/8)) in_if(clk); // One point
if_axi_stream #(.DAT_BYTS(($bits(fp2_jb_point_t)+7)/8)) out_if(clk);
if_axi_stream #(.DAT_BITS(2*bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) mult_in_if(clk);
if_axi_stream #(.DAT_BITS(bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) mult_out_if(clk);
if_axi_stream #(.DAT_BITS(2*bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) add_in_if(clk);
if_axi_stream #(.DAT_BITS(bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) add_out_if(clk);
if_axi_stream #(.DAT_BITS(2*bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) sub_in_if(clk);
if_axi_stream #(.DAT_BITS(bls12_381_pkg::DAT_BITS), .CTL_BITS(16)) sub_out_if(clk);
fp2_jb_point_t in_p1, out_p;
always_comb begin
in_p1 = in_if.dat;
out_if.dat = out_p;
end
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
ec_fp2_point_dbl #(
.FP2_TYPE ( fp2_jb_point_t ),
.FE_TYPE ( fe_t ),
.FE2_TYPE ( fe2_t )
)
ec_fp2_point_dbl (
.i_clk ( clk ),
.i_rst ( rst ),
// Input points
.i_p ( in_p1 ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.o_p ( out_p ),
.o_err ( out_if.err ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ) ,
.o_mul_if ( mult_in_if ),
.i_mul_if ( mult_out_if ),
.o_add_if ( add_in_if ),
.i_add_if ( add_out_if ),
.o_sub_if ( sub_in_if ),
.i_sub_if ( sub_out_if )
);
always_comb begin
mult_out_if.sop = 1;
mult_out_if.eop = 1;
mult_out_if.err = 0;
mult_out_if.mod = 1;
add_out_if.sop = 1;
add_out_if.eop = 1;
add_out_if.err = 0;
add_out_if.mod = 1;
sub_out_if.sop = 1;
sub_out_if.eop = 1;
sub_out_if.err = 0;
sub_out_if.mod = 1;
end
// Attach a mod reduction unit and multiply - mod unit
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( 16 )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat_a ( mult_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( mult_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_val ( mult_in_if.val ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl )
);
adder_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( add_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( add_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( add_in_if.ctl ),
.i_val ( add_in_if.val ),
.o_rdy ( add_in_if.rdy ),
.o_dat ( add_out_if.dat ),
.o_val ( add_out_if.val ),
.o_ctl ( add_out_if.ctl ),
.i_rdy ( add_out_if.rdy )
);
subtractor_pipe # (
.BITS ( bls12_381_pkg::DAT_BITS ),
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( sub_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( sub_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( sub_in_if.ctl ),
.i_val ( sub_in_if.val ),
.o_rdy ( sub_in_if.rdy ),
.o_dat ( sub_out_if.dat ),
.o_val ( sub_out_if.val ),
.o_ctl ( sub_out_if.ctl ),
.i_rdy ( sub_out_if.rdy )
);
task test(input fp2_jb_point_t p1, p_exp);
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
fp2_jb_point_t p_out;
$display("Running test ...");
fork
in_if.put_stream(p1, (($bits(fp2_jb_point_t)+7)/8));
out_if.get_stream(get_dat, get_len, 0);
join
p_out = get_dat;
$display("Input:");
print_fp2_jb_point(p1);
$display("Expected:");
print_fp2_jb_point(p_exp);
$display("Was:");
print_fp2_jb_point(p_out);
if (p_exp != p_out) begin
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
end
$display("test PASSED");
end
endtask;
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test(g2_point, dbl_fp2_jb_point(g2_point));
#1us $finish();
end
endmodule

View File

@ -16,7 +16,7 @@
*/
`timescale 1ps/1ps
module ec_fp_point_dbl_tb ();
module ec_point_dbl_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
@ -68,11 +68,11 @@ always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
ec_fp_point_dbl #(
.P ( P ),
.POINT_TYPE ( jb_point_t )
ec_point_dbl #(
.FP_TYPE ( jb_point_t ),
.FE_TYPE ( fe_t )
)
ec_fp_point_dbl (
ec_point_dbl (
.i_clk ( clk ),
.i_rst ( rst ),
.i_p ( in_p ),
@ -82,8 +82,8 @@ ec_fp_point_dbl (
.o_err ( out_if.err ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ) ,
.o_mult_if ( mult_in_if ),
.i_mult_if ( mult_out_if ),
.o_mul_if ( mult_in_if ),
.i_mul_if ( mult_out_if ),
.o_add_if ( add_in_if ),
.i_add_if ( add_out_if ),
.o_sub_if ( sub_in_if ),
@ -102,7 +102,6 @@ ec_fp_mult_mod (
.i_dat_a ( mult_in_if.dat[0 +: 381] ),
.i_dat_b ( mult_in_if.dat[381 +: 381] ),
.i_val ( mult_in_if.val ),
.i_err ( mult_in_if.err ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
@ -156,16 +155,16 @@ always_comb begin
mult_out_if.eop = 1;
mult_out_if.err = 0;
mult_out_if.mod = 1;
add_out_if.sop = 1;
add_out_if.eop = 1;
add_out_if.err = 0;
add_out_if.mod = 1;
sub_out_if.sop = 1;
sub_out_if.eop = 1;
sub_out_if.err = 0;
sub_out_if.mod = 1;
sub_out_if.mod = 1;
end
task test_0();
@ -200,14 +199,13 @@ begin
end
endtask;
function compare_point();
endfunction
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
print_jb_point(to_affine(g_point));
print_jb_point(to_affine(dbl_jb_point(g_point)));
test_0();

View File

@ -148,9 +148,10 @@ initial begin
in_if.val = 0;
#(40*CLK_PERIOD);
test(381'h1);
/*test(381'h1);
test(381'h5);
test(381'h10);
test(381'h10);*/
//test(1 << 380);
test(381'h9f5193de96ab6e65e7c7df8adcec4e82b971dd5f54d1c62103776d3eef0255ae346eba9e29eb08c3a957e9a53afc3ce);
#1us $finish();

View File

@ -39,8 +39,8 @@ if_axi_stream #(.DAT_BITS(bls12_381_pkg::DAT_BITS), .CTL_BITS(8)) sub_out_if(clk
jb_point_t in_p1, in_p2, out_p;
always_comb begin
in_p1 = in_if.dat[0 +: bls12_381_pkg::DAT_BITS*3];
in_p2 = in_if.dat[bls12_381_pkg::DAT_BITS*3 +: bls12_381_pkg::DAT_BITS*3];
in_p1 = in_if.dat[0 +: $bits(jb_point_t)];
in_p2 = in_if.dat[$bits(jb_point_t) +: $bits(jb_point_t)];
out_if.dat = out_p;
end
@ -67,9 +67,8 @@ always_ff @ (posedge clk)
$error(1, "%m %t ERROR: output .err asserted", $time);
ec_point_add #(
.P ( P ),
.FP_TYPE ( jb_point_t ),
.FE_TPYE ( fe_t )
.FE_TYPE ( fe_t )
)
ec_point_add (
.i_clk ( clk ),

View File

@ -16,7 +16,7 @@ module uram_reset #(
if_ram.sink b
);
if_ram #(.RAM_WIDTH(RAM_WIDTH), .RAM_DEPTH(RAM_DEPTH)) if_ram_a(.i_clk(a.i_clk), .i_rst(a.i_rst));
if_ram #(.RAM_WIDTH(RAM_WIDTH), .RAM_DEPTH(RAM_DEPTH), .BYT_EN($bits(a.we))) if_ram_a(.i_clk(a.i_clk), .i_rst(a.i_rst));
logic reset_done;
logic [RAM_DEPTH-1:0] addr;
@ -34,7 +34,7 @@ end
always_comb begin
if_ram_a.a = reset_done ? a.a : addr;
if_ram_a.en = reset_done ? a.en : 1'd1;
if_ram_a.we = reset_done ? a.we : 1'd1;
if_ram_a.we = reset_done ? a.we : {$bits(a.we){1'd1}};
if_ram_a.re = a.re;
if_ram_a.d = reset_done ? a.d : {RAM_WIDTH{1'd0}};
a.q = if_ram_a.q;
@ -64,85 +64,96 @@ module uram #(
// Check RAM sizes match the interface
initial begin
assert ($bits(a.d) == RAM_WIDTH) else $fatal(1, "%m %t ERROR: bram RAM_WIDTH (%d) does not match interface a (%d)", $time, RAM_WIDTH, $bits(a.d));
assert ($bits(a.a) == $clog2(RAM_DEPTH)) else $fatal(1, "%m %t ERROR: bram $clog2(RAM_DEPTH) (%d) does not match interface a (%d)", $time, $clog2(RAM_DEPTH), $bits(a.a));
assert ($bits(a.a) == RAM_DEPTH) else $fatal(1, "%m %t ERROR: bram $clog2(RAM_DEPTH) (%d) does not match interface a (%d)", $time, (RAM_DEPTH), $bits(a.a));
assert ($bits(b.d) == RAM_WIDTH) else $fatal(1, "%m %t ERROR: bram RAM_WIDTH (%d) does not match interface b (%d)", $time, RAM_WIDTH, $bits(b.d));
assert ($bits(b.a) == $clog2(RAM_DEPTH)) else $fatal(1, "%m %t ERROR: bram $clog2(RAM_DEPTH) (%d) does not match interface b (%d)", $time, $clog2(RAM_DEPTH), $bits(b.a));
assert ($bits(b.a) == RAM_DEPTH) else $fatal(1, "%m %t ERROR: bram $clog2(RAM_DEPTH) (%d) does not match interface b (%d)", $time, (RAM_DEPTH), $bits(b.a));
end
xilinx_ultraram_true_dual_port #(
.AWIDTH ( $clog2(RAM_DEPTH) ),
.DWIDTH ( RAM_WIDTH ),
.NBPIPE ( PIPELINES )
)
uram_instance (
.addra(a.a),
.addrb(b.a),
.dina(a.d),
.dinb(b.d),
.clk(a.i_clk),
.wea(a.we),
.web(b.we),
.mem_ena(a.en),
.mem_enb(b.en),
.rsta(a.i_rst),
.rstb(b.i_rst),
.regcea(a.re),
.regceb(b.re),
.douta(a.q),
.doutb(b.q)
);
// xilinx_ultraram_true_dual_port
xilinx_ultraram_true_dual_port_bytewrite #(
.AWIDTH ( RAM_DEPTH ),
.DWIDTH ( RAM_WIDTH ),
.NBPIPE ( PIPELINES ),
.NUM_COL ( $bits(a.we) )
)
uram_instance (
.addra(a.a),
.addrb(b.a),
.dina(a.d),
.dinb(b.d),
.clk(a.i_clk),
.wea(a.we),
.web(b.we),
.mem_ena(a.en),
.mem_enb(b.en),
.rsta(a.i_rst),
.rstb(b.i_rst),
.regcea(a.re),
.regceb(b.re),
.douta(a.q),
.doutb(b.q)
);
endmodule
module xilinx_ultraram_true_dual_port #(
parameter AWIDTH = 12, // Address Width
parameter DWIDTH = 72, // Data Width
parameter NBPIPE = 3 // Number of pipeline Registers
) (
input clk, // Clock
// Port A
input rsta, // Reset
input wea, // Write Enable
input regcea, // Output Register Enable
input mem_ena, // Memory Enable
input [DWIDTH-1:0] dina, // Data Input
input [AWIDTH-1:0] addra, // Address Input
output reg [DWIDTH-1:0] douta,// Data Output
module xilinx_ultraram_true_dual_port_bytewrite #(
parameter AWIDTH = 12, // Address Width
parameter NUM_COL = 9, // Number of columns
parameter DWIDTH = 72, // Data Width, (Byte * NUM_COL)
parameter NBPIPE = 3 // Number of pipeline Registers
) (
input clk, // Clock
// Port A
input rsta, // Reset
input [NUM_COL-1:0] wea, // Write Enable
input regcea, // Output Register Enable
input mem_ena, // Memory Enable
input [DWIDTH-1:0] dina, // Data Input
input [AWIDTH-1:0] addra, // Address Input
output reg [DWIDTH-1:0] douta,// Data Output
// Port B
input rstb, // Reset
input web, // Write Enable
input regceb, // Output Register Enable
input mem_enb, // Memory Enable
input [DWIDTH-1:0] dinb, // Data Input
input [AWIDTH-1:0] addrb, // Address Input
output reg [DWIDTH-1:0] doutb // Data Output
);
// Port B
input rstb, // Reset
input [NUM_COL-1:0] web, // Write Enable
input regceb, // Output Register Enable
input mem_enb, // Memory Enable
input [DWIDTH-1:0] dinb, // Data Input
input [AWIDTH-1:0] addrb, // Address Input
output reg [DWIDTH-1:0] doutb // Data Output
);
(* ram_style = "ultra" *)
reg [DWIDTH-1:0] mem[(1<<AWIDTH)-1:0]; // Memory Declaration
reg [DWIDTH-1:0] memrega;
reg [DWIDTH-1:0] memrega;
reg [DWIDTH-1:0] mem_pipe_rega[NBPIPE-1:0]; // Pipelines for memory
reg mem_en_pipe_rega[NBPIPE:0]; // Pipelines for memory enable
reg mem_en_pipe_rega[NBPIPE:0]; // Pipelines for memory enable
reg [DWIDTH-1:0] memregb;
reg [DWIDTH-1:0] memregb;
reg [DWIDTH-1:0] mem_pipe_regb[NBPIPE-1:0]; // Pipelines for memory
reg mem_en_pipe_regb[NBPIPE:0]; // Pipelines for memory enable
reg mem_en_pipe_regb[NBPIPE:0]; // Pipelines for memory enable
integer i;
localparam CWIDTH = DWIDTH/NUM_COL;
// RAM : Read has one latency, Write has one latency as well.
always @ (posedge clk)
begin
if(mem_ena)
if(mem_ena)
begin
if(wea)
mem[addra] <= dina;
else
memrega <= mem[addra];
end
for(i = 0;i<NUM_COL;i=i+1)
if(wea[i])
mem[addra][i*CWIDTH +: CWIDTH] <= dina[i*CWIDTH +: CWIDTH];
end
end
always @ (posedge clk)
begin
if(mem_ena)
if(~|wea)
memrega <= mem[addra];
end
// The enable of the RAM goes through a pipeline to produce a
// series of pipelined enable signals required to control the data
// pipeline.
@ -175,19 +186,26 @@ begin
douta <= 0;
else if (mem_en_pipe_rega[NBPIPE] && regcea)
douta <= mem_pipe_rega[NBPIPE-1];
end
end
// RAM : Read has one latency, Write has one latency as well.
always @ (posedge clk)
begin
if(mem_enb)
if(mem_enb)
begin
if(web)
mem[addrb] <= dinb;
else
memregb <= mem[addrb];
end
for(i=0;i<NUM_COL;i=i+1)
if(web[i])
mem[addrb][i*CWIDTH +: CWIDTH] <= dinb[i*CWIDTH +: CWIDTH];
end
end
always @ (posedge clk)
begin
if(mem_enb)
if(~|web)
memregb <= mem[addrb];
end
// The enable of the RAM goes through a pipeline to produce a
// series of pipelined enable signals required to control the data
// pipeline.
@ -220,6 +238,7 @@ begin
doutb <= 0;
else if (mem_en_pipe_regb[NBPIPE] && regceb)
doutb <= mem_pipe_regb[NBPIPE-1];
end
end
endmodule

View File

@ -21,25 +21,55 @@ package bls12_381_pkg;
localparam DAT_BITS = 381;
localparam MUL_BITS = 384;
localparam [DAT_BITS-1:0] P = 381'h1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab;
localparam [DAT_BITS-1:0] Gx = 381'h17F1D3A73197D7942695638C4FA9AC0FC3688C4F9774B905A14E3A3F171BAC586C55E83FF97A1AEFFB3AF00ADB22C6BB;
localparam [DAT_BITS-1:0] Gy = 381'h08B3F481E3AAA0F1A09E30ED741D8AE4FCF5E095D5D00AF600DB18CB2C04B3EDD03CC744A2888AE40CAA232946C5E7E1;
typedef logic [DAT_BITS-1:0] fe_t;
fe_t Gx = 381'h17F1D3A73197D7942695638C4FA9AC0FC3688C4F9774B905A14E3A3F171BAC586C55E83FF97A1AEFFB3AF00ADB22C6BB;
fe_t Gy = 381'h08B3F481E3AAA0F1A09E30ED741D8AE4FCF5E095D5D00AF600DB18CB2C04B3EDD03CC744A2888AE40CAA232946C5E7E1;
typedef enum logic [2:0] {
SCALAR = 0,
FE = 1,
FE2 = 2,
FE12 = 3,
FP_AF = 4,
FP_JB = 5,
FP2_AF = 6,
FP2_JB = 7
} point_type_t;
// Jacobian coordinates for Fp elements
typedef struct packed {
logic [DAT_BITS-1:0] x, y, z;
fe_t x, y, z;
} jb_point_t;
typedef struct packed {
fe_t c1, c0;
} fe2_t;
fe2_t G2x = '{c0:381'd352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
c1:381'd3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758};
fe2_t G2y = '{c0:381'd1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
c1:381'd927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582};
fe2_t FE2_one = '{c0:381'd1, c1:381'd0};
jb_point_t g_point = '{x:Gx, y:Gy, z:381'd1};
// Jacobian coordinates for Fp^2 elements
typedef struct packed {
jb_point_t fp1_a, fp1_b;
fe2_t x, y, z;
} fp2_jb_point_t;
fp2_jb_point_t g2_point = '{x:G2x, y:G2y, z:FE2_one};
// Instruction codes
typedef enum logic [7:0] {
NOOP_WAIT = 8'h0,
FP_POINT_MULT = 8'h20
NOOP_WAIT = 8'h0,
COPY_REG = 8'h1,
FP_FPOINT_MULT = 8'h26
} code_t;
// Instruction format
@ -48,45 +78,26 @@ package bls12_381_pkg;
code_t code;
} inst_t;
localparam DATA_RAM_WIDTH = 381;
localparam DATA_RAM_DEPTH = $clog2(64);
localparam INST_RAM_WIDTH = $bits(inst_t);
localparam INST_RAM_DEPTH = $clog2(1024);
localparam DATA_RAM_WIDTH = $bits(point_type_t) + DAT_BITS;
localparam DATA_RAM_ALIGN_BYTE = 64;
localparam DATA_RAM_DEPTH = 8;
localparam DATA_RAM_USR_WIDTH = 4;
localparam DATA_RAM_USR_DEPTH = DATA_RAM_DEPTH*DATA_RAM_ALIGN_BYTE/DATA_RAM_USR_WIDTH;
localparam INST_RAM_WIDTH = $bits(inst_t);
localparam INST_RAM_ALIGN_BYTE = 8;
localparam INST_RAM_DEPTH = 8;
localparam INST_RAM_USR_WIDTH = 4;
localparam INST_RAM_USR_DEPTH = INST_RAM_DEPTH*INST_RAM_ALIGN_BYTE/INST_RAM_USR_WIDTH;
jb_point_t g_point = '{x:Gx, y:Gy, z:1};
function is_zero(jb_point_t p);
is_zero = (p.x == 0 && p.y == 0 && p.z == 1);
return is_zero;
endfunction
// Function to double point in Jacobian coordinates (for comparison in testbench)
// Here a is 0, and we also mod the result
function jb_point_t dbl_jb_point(input jb_point_t p);
logic signed [1023:0] I_X, I_Y, I_Z, A, B, C, D, X, Y, Z;
if (p.z == 0) return p;
I_X = p.x;
I_Y = p.y;
I_Z = p.z;
A = (I_Y*I_Y) % P;
B = (((4*I_X) % P)*A) % P;
C = (((8*A) % P)*A) % P;
D = (((3*I_X)% P)*I_X) % P;
X = (D*D)% P;
X = X + ((2*B) % P > X ? P : 0) - (2*B) % P;
Y = (D*((B + (X > B ? P : 0)-X) % P)) % P;
Y = Y + (C > Y ? P : 0) - C;
Z = (((2*I_Y)% P)*I_Z) % P;
dbl_jb_point.x = X;
dbl_jb_point.y = Y;
dbl_jb_point.z = Z;
return dbl_jb_point;
endfunction
function jb_point_t add_jb_point(jb_point_t p1, p2);
logic signed [1023:0] A, U1, U2, S1, S2, H, H3, R;
@ -125,7 +136,127 @@ package bls12_381_pkg;
add_jb_point.y = A + (add_jb_point.y > A ? P : 0) - add_jb_point.y;
endfunction
function fe_t fe_add(fe_t a, b);
logic [$bits(fe_t):0] a_, b_;
a_ = a;
b_ = b;
fe_add = a_ + b_ >= P ? a_ + b_ - P : a_ + b_;
endfunction
function fe2_t fe2_add(fe2_t a, b);
fe2_add.c0 = fe_add(a.c0,b.c0);
fe2_add.c1 = fe_add(a.c1,b.c1);
endfunction
function fe_t fe_sub(fe_t a, b);
logic [$bits(fe_t):0] a_, b_;
a_ = a;
b_ = b;
fe_sub = b_ > a_ ? a_- b_ + P : a_ - b_;
endfunction
function fe2_t fe2_sub(fe2_t a, b);
fe2_sub.c0 = fe_sub(a.c0, b.c0);
fe2_sub.c1 = fe_sub(a.c1, b.c1);
endfunction
function fe_t fe_mul(fe2_t a, b);
fe_mul = (a * b) % P;
endfunction
function fe2_t fe2_mul(fe2_t a, b);
fe2_mul.c0 = fe_sub(fe_mul(a.c0, b.c0), fe_mul(a.c1, b.c1));
fe2_mul.c1 = fe_add(fe_mul(a.c0, b.c1), fe_mul(a.c1, b.c0));
endfunction
// Function to double point in Jacobian coordinates (for comparison in testbench)
// Here a is 0, and we also mod the result
function jb_point_t dbl_jb_point(input jb_point_t p);
fe_t I_X, I_Y, I_Z, A, B, C, D, X, Y, Z;
if (p.z == 0) return p;
I_X = p.x;
I_Y = p.y;
I_Z = p.z;
A = fe_mul(I_Y, I_Y);
B = fe_mul(fe_mul(4, I_X), A);
C = fe_mul(fe_mul(8, A), A);
D = fe_mul(fe_mul(3, I_X), I_X);
X = fe_mul(D, D);
X = fe_sub(X, fe_mul(2, B));
Y = fe_mul(D, fe_sub(B, X));
Y = fe_sub(Y, C);
Z = fe_mul(fe_mul(2, I_Y), I_Z);
dbl_jb_point.x = X;
dbl_jb_point.y = Y;
dbl_jb_point.z = Z;
return dbl_jb_point;
endfunction
function fp2_jb_point_t dbl_fp2_jb_point(input fp2_jb_point_t p);
fe2_t I_X, I_Y, I_Z, A, B, C, D, X, Y, Z;
if (p.z == 0) return p;
I_X = p.x;
I_Y = p.y;
I_Z = p.z;
A = fe2_mul(I_Y, I_Y);
B = fe2_mul(fe2_mul(4, I_X), A);
C = fe2_mul(fe2_mul(8, A), A);
D = fe2_mul(fe2_mul(3, I_X), I_X);
X = fe2_mul(D, D);
X = fe2_sub(X, fe2_mul(2, B));
Y = fe2_mul(D, fe2_sub(B, X));
Y = fe2_sub(Y, C);
Z = fe2_mul(fe2_mul(2, I_Y), I_Z);
dbl_fp2_jb_point.x = X;
dbl_fp2_jb_point.y = Y;
dbl_fp2_jb_point.z = Z;
return dbl_fp2_jb_point;
endfunction
function fp2_jb_point_t add_fp2_jb_point(fp2_jb_point_t p1, p2);
fe2_t A, U1, U2, S1, S2, H, H3, R;
if (p1.y == p2.y && p1.x == p2.x)
return (dbl_fp2_jb_point(p1));
U1 = fe2_mul(p1.x, p2.z);
U1 = fe2_mul(U1, p2.z);
U2 = fe2_mul(p2.x, p1.z);
U2 = fe2_mul(U2, p1.z);
S1 = fe2_mul(p1.y, p2.z);
S1 = fe2_mul(fe2_mul(S1, p2.z), p2.z);
S2 = fe2_mul(p2.y, p1.z);
S2 = fe2_mul(fe2_mul(S2, p1.z), p1.z);
H = fe2_sub(U2, U1);
R = fe2_sub(S2, S1);
H3 = fe2_mul(fe2_mul(H, H), H);
A = fe2_mul(fe2_mul(fe2_mul(2, U1), H), H);
add_fp2_jb_point.z = fe2_mul(fe2_mul(H, p1.z), p2.z);
add_fp2_jb_point.x = fe2_mul(R, R);
add_fp2_jb_point.x = fe2_add(add_fp2_jb_point.x, H3);
add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, A);
A = fe2_mul(fe2_mul(U1, H), H);
A = fe2_sub(A, add_fp2_jb_point.x);
A = fe2_mul(A, R);
add_fp2_jb_point.y = fe2_mul(S1, H3);
add_fp2_jb_point.y = fe2_sub(A, add_fp2_jb_point.y);
endfunction
function jb_point_t point_mult(logic [DAT_BITS-1:0] c, jb_point_t p);
jb_point_t result, addend;
result = 0;
@ -143,12 +274,78 @@ package bls12_381_pkg;
function on_curve(jb_point_t p);
return (p.y*p.y - p.x*p.x*p.x - secp256k1_pkg::a*p.x*p.z*p.z*p.z*p.z - secp256k1_pkg::b*p.z*p.z*p.z*p.z*p.z*p.z);
endfunction
// Inversion using extended euclidean algorithm
function fe_t fe_inv(fe_t a, b = 1);
fe_t u, v;
logic [$bits(fe_t):0] x1, x2;
u = a; v = P;
x1 = b; x2 = 0;
while (u != 1 && v != 1) begin
while (u % 2 == 0) begin
u = u / 2;
if (x1 % 2 == 0)
x1 = x1 / 2;
else
x1 = (x1 + P) / 2;
end
while (v % 2 == 0) begin
v = v / 2;
if (x2 % 2 == 0)
x2 = x2 / 2;
else
x2 = (x2 + P) / 2;
end
if (u >= v) begin
u = u - v;
x1 = fe_sub(x1, x2);
end else begin
v = v - u;
x2 = fe_sub(x2, x1);
end
end
if (u == 1)
return x1;
else
return x2;
endfunction
// This algorithm can also be used for division
function fe_t fe_div(fe_t a, b);
return fe_inv(a, b);
endfunction
function fe2_t fe2_inv(fe2_t a);
fe_t factor, t0, t1;
t0 = fe_mul(a.c0, a.c0);
t1 = fe_mul(a.c1, a.c1);
factor = fe_inv(fe_add(t0, t1));
fe2_inv.c0 = fe_mul(a.c0, factor);
fe2_inv.c1 = fe_mul(fe_sub(P, a.c1), factor);
endfunction
function jb_point_t to_affine(jb_point_t p);
fe_t z_;
z_ = fe_mul(p.z, p.z);
to_affine.z = 1;
to_affine.x = fe_mul(p.x, fe_inv(z_));
z_ = fe_mul(z_, p.z);
to_affine.y = fe_mul(p.x, fe_inv(z_));
endfunction
function print_jb_point(jb_point_t p);
$display("x:%h", p.x);
$display("y:%h", p.y);
$display("z:%h", p.z);
return;
endfunction
function print_fp2_jb_point(fp2_jb_point_t p);
$display("x:(c1:%h, c0:%h)", p.x.c1, p.x.c0);
$display("y:(c1:%h, c0:%h)", p.y.c1, p.y.c0);
$display("z:(c1:%h, c0:%h)", p.z.c1, p.z.c0);
return;
endfunction
endpackage

View File

@ -26,8 +26,8 @@ module bls12_381_top
// Only tx interface is used to send messages to SW on a SEND-INTERRUPT instruction
if_axi_stream.source tx_if,
// User access to the instruction and register RAM
if_ram.sink inst_ram_usr_if,
if_ram.sink data_ram_usr_if,
if_axi_mm.sink inst_usr_if,
if_axi_mm.sink data_usr_if,
// Configuration memory
if_axi_mm.sink cfg_usr_if
);
@ -35,20 +35,23 @@ module bls12_381_top
localparam DAT_BITS = bls12_381_pkg::DAT_BITS;
// Instruction RAM
localparam INST_READ_CYCLE = 3;
logic [INST_READ_CYCLE:0] inst_ram_read;
localparam DATA_READ_CYCLE = 3;
logic [INST_READ_CYCLE:0] data_ram_read;
localparam READ_CYCLE = 3;
logic [READ_CYCLE:0] inst_ram_read, inst_usr_ram_read;
logic [READ_CYCLE:0] data_ram_read, data_usr_ram_read;
if_ram #(.RAM_WIDTH(bls12_381_pkg::INST_RAM_WIDTH), .RAM_DEPTH(bls12_381_pkg::INST_RAM_DEPTH)) inst_ram_sys_if(.i_clk(i_clk), .i_rst(i_rst));
if_ram #(.RAM_WIDTH(bls12_381_pkg::INST_RAM_WIDTH), .RAM_DEPTH(bls12_381_pkg::INST_RAM_DEPTH)) inst_ram_usr_if(.i_clk(i_clk), .i_rst(i_rst));
inst_t curr_inst;
// Data RAM
if_ram #(.RAM_WIDTH(bls12_381_pkg::DATA_RAM_WIDTH), .RAM_DEPTH(bls12_381_pkg::DATA_RAM_DEPTH)) data_ram_sys_if(.i_clk(i_clk), .i_rst(i_rst));
if_ram #(.RAM_WIDTH(bls12_381_pkg::DATA_RAM_WIDTH), .RAM_DEPTH(bls12_381_pkg::DATA_RAM_DEPTH), .BYT_EN(48)) data_ram_sys_if(.i_clk(i_clk), .i_rst(i_rst));
if_ram #(.RAM_WIDTH(bls12_381_pkg::DATA_RAM_WIDTH), .RAM_DEPTH(bls12_381_pkg::DATA_RAM_DEPTH), .BYT_EN(48)) data_ram_usr_if(.i_clk(i_clk), .i_rst(i_rst));
// Fp point multiplication
if_axi_stream #(.DAT_BITS(DAT_BITS*3)) fp_pt_mult_in_if(i_clk);
if_axi_stream #(.DAT_BITS(DAT_BITS*3)) fp_pt_mult_out_if(i_clk);
logic [DAT_BITS-1:0] k_fp_in;
logic [7:0] cnt;
@ -74,12 +77,12 @@ always_ff @ (posedge i_clk) begin
inst_ram_sys_if.re <= 1;
inst_ram_sys_if.en <= 1;
inst_ram_read <= inst_ram_read << 1;
data_ram_sys_if.re <= 1;
data_ram_sys_if.en <= 1;
data_ram_sys_if.we <= 0;
data_ram_read <= data_ram_read << 1;
if (fp_pt_mult_in_if.val && fp_pt_mult_in_if.rdy) fp_pt_mult_in_if.val <= 0;
fp_pt_mult_out_if.rdy <= 1;
@ -87,8 +90,26 @@ always_ff @ (posedge i_clk) begin
{NOOP_WAIT}: begin
// Wait in this state
inst_state <= curr_inst.code;
cnt <= 0;
end
{FP_POINT_MULT}: begin
{COPY_REG}: begin
inst_ram_sys_if.a <= inst_ram_sys_if.a + 1;
inst_ram_read[0] <= 1;
data_ram_sys_if.a <= curr_inst.a;
data_ram_read[0] <= 1;
if (data_ram_read[READ_CYCLE]) begin
data_ram_sys_if.a <= curr_inst.b;
data_ram_sys_if.d <= data_ram_sys_if.q;
data_ram_sys_if.we <= -1;
end
if (inst_ram_read[READ_CYCLE]) begin
inst_state <= curr_inst.code;
end
end
{FP_FPOINT_MULT}: begin
case(cnt) inside
0: begin
data_ram_sys_if.a <= curr_inst.a;
@ -96,9 +117,9 @@ always_ff @ (posedge i_clk) begin
cnt <= cnt + 1;
end
1: begin
if (data_ram_read[DATA_READ_CYCLE]) begin
if (data_ram_read[READ_CYCLE]) begin
data_ram_sys_if.a <= curr_inst.b;
k_fp_in <= data_ram_sys_if.q;
k_fp_in <= data_ram_sys_if.q;
fp_pt_mult_in_if.dat <= bls12_381_pkg::g_point;
fp_pt_mult_in_if.val <= 1;
data_ram_read[0] <= 1;
@ -110,7 +131,7 @@ always_ff @ (posedge i_clk) begin
fp_pt_mult_out_if.rdy <= 0;
if (fp_pt_mult_out_if.val) begin
data_ram_sys_if.d <= fp_pt_mult_out_if.dat;
data_ram_sys_if.we <= 1;
data_ram_sys_if.we <= -1;
cnt <= cnt + 1;
end
end
@ -118,40 +139,99 @@ always_ff @ (posedge i_clk) begin
fp_pt_mult_out_if.rdy <= 0;
data_ram_sys_if.d <= fp_pt_mult_out_if.dat >> DAT_BITS;
data_ram_sys_if.a <= data_ram_sys_if.a + 1;
data_ram_sys_if.we <= 1;
data_ram_sys_if.we <= -1;
cnt <= cnt + 1;
end
4: begin
data_ram_sys_if.d <= fp_pt_mult_out_if.dat >> (2*DAT_BITS);
data_ram_sys_if.we <= 1;
data_ram_sys_if.we <= -1;
data_ram_sys_if.a <= data_ram_sys_if.a + 1;
cnt <= cnt + 1;
inst_ram_sys_if.a <= inst_ram_sys_if.a + 1;
inst_ram_read[0] <= 1;
end
5: begin
if (inst_ram_read[INST_READ_CYCLE]) begin
if (inst_ram_read[READ_CYCLE]) begin
inst_state <= curr_inst.code;
cnt <= 0;
end
end
endcase
end
endcase
end
end
// Configuration registers
// Configuration registers, instruction, data RAM
always_ff @ (posedge i_clk) begin
if (i_rst) begin
cfg_usr_if.reset_sink();
end else begin
cfg_usr_if.rd_dat_val <= 0;
if (cfg_usr_if.wr) begin
inst_usr_if.reset_sink();
data_usr_if.reset_sink();
inst_ram_usr_if.reset_source();
data_ram_usr_if.reset_source();
inst_usr_ram_read <= 0;
data_usr_ram_read <= 0;
end else begin
data_usr_ram_read <= data_usr_ram_read << 1;
inst_usr_ram_read <= inst_usr_ram_read << 1;
cfg_usr_if.rd_dat_val <= 0;
data_usr_if.rd_dat <= data_ram_usr_if.q;
inst_usr_if.rd_dat <= inst_ram_usr_if.q;
data_usr_if.rd_dat_val <= data_usr_ram_read[READ_CYCLE];
inst_usr_if.rd_dat_val <= inst_usr_ram_read[READ_CYCLE];
inst_ram_usr_if.en <= 1;
inst_ram_usr_if.re <= 1;
inst_ram_usr_if.we <= 0;
data_ram_usr_if.en <= 1;
data_ram_usr_if.re <= 1;
data_ram_usr_if.we <= 0;
// Write access
if (data_usr_if.wr) begin
data_ram_usr_if.a <= data_usr_if.addr >> DATA_RAM_ALIGN_BYTE/DATA_RAM_USR_WIDTH;
data_ram_usr_if.d <= data_usr_if.wr_dat << (data_usr_if.addr % DATA_RAM_ALIGN_BYTE)*8;
data_ram_usr_if.we <= {8{1'd1}} << (data_usr_if.addr % DATA_RAM_ALIGN_BYTE);
end
if (inst_usr_if.wr) begin
inst_ram_usr_if.a <= inst_usr_if.addr >> INST_RAM_ALIGN_BYTE/INST_RAM_USR_WIDTH;
inst_ram_usr_if.d <= inst_usr_if.wr_dat;
inst_ram_usr_if.we <= 1;
end
if (cfg_usr_if.wr) begin
// Currently no write supported
end
// Read access
if (data_usr_if.rd) begin
data_usr_ram_read[0] <= 1;
data_ram_usr_if.a <= data_usr_if.addr >> DATA_RAM_ALIGN_BYTE/DATA_RAM_USR_WIDTH;
end
if (inst_usr_if.rd) begin
inst_usr_ram_read[0] <= 1;
inst_ram_usr_if.a <= inst_usr_if.addr >> INST_RAM_ALIGN_BYTE/INST_RAM_USR_WIDTH;
end
if (cfg_usr_if.rd) begin
cfg_usr_if.rd_dat_val <= 1;
case(cfg_usr_if.addr)
0: begin
cfg_usr_if.rd_dat <= inst_ram_sys_if.a;
end
endcase
end
end
end
@ -159,7 +239,7 @@ end
uram_reset #(
.RAM_WIDTH(bls12_381_pkg::INST_RAM_WIDTH),
.RAM_DEPTH(bls12_381_pkg::INST_RAM_DEPTH),
.PIPELINES( INST_READ_CYCLE - 2 )
.PIPELINES( READ_CYCLE - 2 )
)
inst_uram_reset (
.a ( inst_ram_usr_if ),
@ -169,7 +249,7 @@ inst_uram_reset (
uram_reset #(
.RAM_WIDTH(bls12_381_pkg::DATA_RAM_WIDTH),
.RAM_DEPTH(bls12_381_pkg::DATA_RAM_DEPTH),
.PIPELINES( DATA_READ_CYCLE - 2 )
.PIPELINES( READ_CYCLE - 2 )
)
data_uram_reset (
.a ( data_ram_usr_if ),

View File

@ -36,9 +36,10 @@ initial begin
end
if_axi_stream #(.DAT_BYTS(8)) out_if(clk);
if_ram #(.RAM_WIDTH(bls12_381_pkg::INST_RAM_WIDTH), .RAM_DEPTH(bls12_381_pkg::INST_RAM_DEPTH)) inst_ram_usr_if(.i_clk(clk), .i_rst(rst));
if_ram #(.RAM_WIDTH(bls12_381_pkg::DATA_RAM_WIDTH), .RAM_DEPTH(bls12_381_pkg::DATA_RAM_DEPTH)) data_ram_usr_if(.i_clk(clk), .i_rst(rst));
if_axi_mm #(.D_BITS(64), .A_BITS(8)) cfg_usr_if(clk);
if_axi_mm #(.D_BITS(32), .A_BITS(8)) cfg_usr_if(clk);
if_axi_mm #(.D_BITS(32), .A_BITS(DATA_RAM_DEPTH * DATA_RAM_ALIGN_BYTE)) data_usr_if(clk);
if_axi_mm #(.D_BITS(32), .A_BITS(INST_RAM_DEPTH * INST_RAM_ALIGN_BYTE)) inst_usr_if(clk);
bls12_381_top bls12_381_top (
.i_clk ( clk ),
@ -46,23 +47,28 @@ bls12_381_top bls12_381_top (
// Only tx interface is used to send messages to SW on a SEND-INTERRUPT instruction
.tx_if ( out_if ),
// User access to the instruction and register RAM
.inst_ram_usr_if ( inst_ram_usr_if ),
.data_ram_usr_if ( data_ram_usr_if ),
.inst_usr_if ( inst_usr_if ),
.data_usr_if ( data_usr_if ),
// Configuration memory
.cfg_usr_if ( cfg_usr_if )
);
task test_0();
task test_fp_point_mult();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
inst_t inst;
point_type_t pt;
logic [DAT_BITS-1:0] data = 0;
$display("Running test_0...");
inst = '{code:FP_POINT_MULT, a:0, b:0, c:0};
data_ram_usr_if.write_data(0, 100);
inst_ram_usr_if.write_data(0, inst);
pt = SCALAR;
data = 0;
inst = '{code:FP_FPOINT_MULT, a:0, b:0, c:0};
data_usr_if.put_data_multiple({pt, data}, 0);
inst_usr_if.put_data_multiple(inst, 0);
$display("test_0 PASSED");
end
@ -70,15 +76,15 @@ endtask;
initial begin
inst_ram_usr_if.reset_source();
data_ram_usr_if.reset_source();
inst_usr_if.reset_source();
data_usr_if.reset_source();
cfg_usr_if.reset_source();
#100ns;
// Wait for memories to reset
while(!bls12_381_top.inst_uram_reset.reset_done ||
!bls12_381_top.data_uram_reset.reset_done)
@(posedge clk);
test_0();
#1us $finish();