Updates to logic for point multiplication in Fp2

This commit is contained in:
bsdevlin 2019-06-13 17:57:03 +08:00
parent c3e527b87e
commit 8ebfdeb734
13 changed files with 757 additions and 781 deletions

View File

@ -1,6 +1,8 @@
/*
This provides the interface to perform
Fp^2 point logic (adding, subtracting, multiplication)
Fp^2 point logic (adding, subtracting, multiplication).
Improvements would be a control to bypass the Fp2 logic so we implement Fp faster.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@ -21,7 +23,8 @@
module ec_fe2_arithmetic
#(
parameter type FE_TYPE,
parameter type FE2_TYPE
parameter type FE2_TYPE,
parameter CTL_BIT = 8 // From this bit 2 bits are used for control
)(
input i_clk, i_rst,
// Interface to FE_TYPE multiplier (mod P)
@ -44,7 +47,6 @@ module ec_fe2_arithmetic
if_axi_stream.sink i_sub_fe2_if
);
localparam ADD_CTL_BIT = 8;
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_i [2] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_if_fe_o [2] (i_clk);
@ -76,7 +78,7 @@ always_ff @ (posedge i_clk) begin
add_if_fe_o[0].copy_if({i_add_fe2_if.dat[0 +: $bits(FE_TYPE)],
i_add_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
i_add_fe2_if.val, 1, 1, i_add_fe2_if.err, i_add_fe2_if.mod, i_add_fe2_if.ctl);
add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0;
add_if_fe_o[0].ctl[CTL_BIT] <= 0;
if (i_add_fe2_if.val) add_state <= ADD1;
end
end
@ -85,7 +87,7 @@ always_ff @ (posedge i_clk) begin
add_if_fe_o[0].copy_if({i_add_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
i_add_fe2_if.dat[$bits(FE2_TYPE)+$bits(FE_TYPE) +: $bits(FE_TYPE)]},
i_add_fe2_if.val, 1, 1, i_add_fe2_if.err, i_add_fe2_if.mod, i_add_fe2_if.ctl);
add_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1;
add_if_fe_o[0].ctl[CTL_BIT] <= 1;
if (i_add_fe2_if.val) add_state <= ADD0;
end
end
@ -94,7 +96,7 @@ always_ff @ (posedge i_clk) begin
// One process to assign outputs
if (~o_add_fe2_if.val || (o_add_fe2_if.val && o_add_fe2_if.rdy)) begin
o_add_fe2_if.ctl <= add_if_fe_i[0].ctl;
if (add_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin
if (add_if_fe_i[0].ctl[CTL_BIT] == 0) begin
if (add_if_fe_i[0].val)
o_add_fe2_if.dat[0 +: $bits(FE_TYPE)] <= add_if_fe_i[0].dat;
end else begin
@ -129,7 +131,7 @@ always_ff @ (posedge i_clk) begin
sub_if_fe_o[0].copy_if({i_sub_fe2_if.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)],
i_sub_fe2_if.dat[0 +: $bits(FE_TYPE)]},
i_sub_fe2_if.val, 1, 1, i_sub_fe2_if.err, i_sub_fe2_if.mod, i_sub_fe2_if.ctl);
sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 0;
sub_if_fe_o[0].ctl[CTL_BIT] <= 0;
if (i_sub_fe2_if.val) sub_state <= SUB1;
end
end
@ -138,7 +140,7 @@ always_ff @ (posedge i_clk) begin
sub_if_fe_o[0].copy_if({i_sub_fe2_if.dat[$bits(FE_TYPE) + $bits(FE2_TYPE) +: $bits(FE_TYPE)],
i_sub_fe2_if.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)]},
i_sub_fe2_if.val, 1, 1, i_sub_fe2_if.err, i_sub_fe2_if.mod, i_sub_fe2_if.ctl);
sub_if_fe_o[0].ctl[ADD_CTL_BIT] <= 1;
sub_if_fe_o[0].ctl[CTL_BIT] <= 1;
if (i_sub_fe2_if.val) sub_state <= SUB0;
end
end
@ -147,7 +149,7 @@ always_ff @ (posedge i_clk) begin
// One process to assign outputs
if (~o_sub_fe2_if.val || (o_sub_fe2_if.val && o_sub_fe2_if.rdy)) begin
o_sub_fe2_if.ctl <= sub_if_fe_i[0].ctl;
if (sub_if_fe_i[0].ctl[ADD_CTL_BIT] == 0) begin
if (sub_if_fe_i[0].ctl[CTL_BIT] == 0) begin
if (sub_if_fe_i[0].val)
o_sub_fe2_if.dat[0 +: $bits(FE_TYPE)] <= sub_if_fe_i[0].dat;
end else begin
@ -165,7 +167,7 @@ logic [1:0] add_sub_val;
always_comb begin
mul_if_fe2_i.rdy = mul_state == MUL3 && (~o_mul_fe_if.val || (o_mul_fe_if.val && o_mul_fe_if.rdy));
i_mul_fe_if.rdy = (i_mul_fe_if.ctl[ADD_CTL_BIT +: 2] == 0 || i_mul_fe_if.ctl[ADD_CTL_BIT +: 2] == 1) ?
i_mul_fe_if.rdy = (i_mul_fe_if.ctl[CTL_BIT +: 2] == 0 || i_mul_fe_if.ctl[CTL_BIT +: 2] == 1) ?
(~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy)) :
(~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy));
@ -202,28 +204,28 @@ always_ff @ (posedge i_clk) begin
o_mul_fe_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_fe_if.ctl[ADD_CTL_BIT +: 2] <= 0;
o_mul_fe_if.ctl[CTL_BIT +: 2] <= 0;
if (mul_if_fe2_i.val) mul_state <= MUL1;
end
MUL1: begin
o_mul_fe_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_fe_if.ctl[ADD_CTL_BIT +: 2] <= 1;
o_mul_fe_if.ctl[CTL_BIT +: 2] <= 1;
if (mul_if_fe2_i.val) mul_state <= MUL2;
end
MUL2: begin
o_mul_fe_if.copy_if({mul_if_fe2_i.dat[0 +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) + $bits(FE_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_fe_if.ctl[ADD_CTL_BIT +: 2] <= 2;
o_mul_fe_if.ctl[CTL_BIT +: 2] <= 2;
if (mul_if_fe2_i.val) mul_state <= MUL3;
end
MUL3: begin
o_mul_fe_if.copy_if({mul_if_fe2_i.dat[$bits(FE_TYPE) +: $bits(FE_TYPE)],
mul_if_fe2_i.dat[$bits(FE2_TYPE) +: $bits(FE_TYPE)]},
mul_if_fe2_i.val, 1, 1, mul_if_fe2_i.err, mul_if_fe2_i.mod, mul_if_fe2_i.ctl);
o_mul_fe_if.ctl[ADD_CTL_BIT +: 2] <= 3;
o_mul_fe_if.ctl[CTL_BIT +: 2] <= 3;
if (mul_if_fe2_i.val) mul_state <= MUL0;
end
endcase
@ -231,10 +233,10 @@ always_ff @ (posedge i_clk) begin
// Process multiplications and do subtraction
if (~sub_if_fe_o[1].val || (sub_if_fe_o[1].val && sub_if_fe_o[1].rdy)) begin
if (i_mul_fe_if.ctl[ADD_CTL_BIT +: 2] == 0) begin
if (i_mul_fe_if.ctl[CTL_BIT +: 2] == 0) begin
if (i_mul_fe_if.val) sub_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
end
if (i_mul_fe_if.ctl[ADD_CTL_BIT +: 2] == 1) begin
if (i_mul_fe_if.ctl[CTL_BIT +: 2] == 1) begin
sub_if_fe_o[1].val <= i_mul_fe_if.val;
sub_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
end
@ -243,10 +245,10 @@ always_ff @ (posedge i_clk) begin
// Process multiplications and do addition
if (~add_if_fe_o[1].val || (add_if_fe_o[1].val && add_if_fe_o[1].rdy)) begin
if (i_mul_fe_if.ctl[ADD_CTL_BIT +: 2] == 2) begin
if (i_mul_fe_if.ctl[CTL_BIT +: 2] == 2) begin
if (i_mul_fe_if.val) add_if_fe_o[1].dat[0 +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
end
if (i_mul_fe_if.ctl[ADD_CTL_BIT +: 2] == 3) begin
if (i_mul_fe_if.ctl[CTL_BIT +: 2] == 3) begin
add_if_fe_o[1].val <= i_mul_fe_if.val;
add_if_fe_o[1].dat[$bits(FE_TYPE) +: $bits(FE_TYPE)] <= i_mul_fe_if.dat;
end

View File

@ -27,17 +27,8 @@ module ec_fp_mult_mod #(
parameter CTL_BITS = 16
)(
input i_clk, i_rst,
// Input value
input [DAT_BITS-1:0] i_dat_a,
input [DAT_BITS-1:0] i_dat_b,
input i_val,
input [CTL_BITS-1:0] i_ctl,
output logic o_rdy,
// output
output logic [DAT_BITS-1:0] o_dat,
output logic [CTL_BITS-1:0] o_ctl,
input i_rdy,
output logic o_val
if_axi_stream.sink i_mul,
if_axi_stream.source o_mul
);
// The reduction mod takes DAT_BITS + 1 bits, but we also need to make sure we are a multiple of KARATSUBA_LVL*2
@ -54,12 +45,12 @@ karatsuba_ofman_mult # (
karatsuba_ofman_mult_0 (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_ctl ( i_ctl ),
.i_dat_a( {3'd0, i_dat_a} ),
.i_dat_b( {3'd0, i_dat_b} ),
.i_val ( i_val ),
.o_rdy ( o_rdy ),
.o_dat ( mult_if[0].dat ),
.i_ctl ( i_mul.ctl ),
.i_dat_a( {{(MLT_BITS-DAT_BITS){1'd0}}, i_mul.dat[0 +: DAT_BITS]} ),
.i_dat_b( {{(MLT_BITS-DAT_BITS){1'd0}}, i_mul.dat[DAT_BITS +: DAT_BITS]} ),
.i_val ( i_mul.val ),
.o_rdy ( i_mul.rdy ),
.o_dat ( mult_if[0].dat ),
.o_val ( mult_if[0].val ),
.i_rdy ( mult_if[0].rdy ),
.o_ctl ( mult_if[0].ctl )
@ -114,11 +105,11 @@ barret_mod_pipe (
.i_dat ( mult_if[0].dat ),
.i_val ( mult_if[0].val ),
.i_ctl ( mult_if[0].ctl ),
.o_ctl ( o_ctl ),
.o_ctl ( o_mul.ctl ),
.o_rdy ( mult_if[0].rdy ),
.o_dat ( o_dat ),
.o_val ( o_val ),
.i_rdy ( i_rdy ),
.o_dat ( o_mul.dat ),
.o_val ( o_mul.val ),
.i_rdy ( o_mul.rdy ),
.o_mult_if_0 ( mult_if[1] ),
.i_mult_if_0 ( mult_if[2] ),
.o_mult_if_1 ( mult_if[3] ),

View File

@ -1,394 +0,0 @@
/*
This performs point multiplication. We use the standard double
and add algorithm.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fp_point_mult
#(
parameter P,
parameter type POINT_TYPE,
parameter DAT_BITS = $clog2(P),
parameter RESOURCE_SHARE = "NO"
)(
input i_clk, i_rst,
// Input point and value to multiply
input POINT_TYPE i_p,
input logic [DAT_BITS-1:0] i_k,
input logic i_val,
output logic o_rdy,
// Output point
output POINT_TYPE o_p,
input logic i_rdy,
output logic o_val,
output logic o_err,
// Interface to shared logic (mod p) (if RESOURCE_SHARE == "YES")
if_axi_stream.source o_mult_if,
if_axi_stream.sink i_mult_if,
if_axi_stream.source o_add_if,
if_axi_stream.sink i_add_if,
if_axi_stream.source o_sub_if,
if_axi_stream.sink i_sub_if,
// We provide another input so that the final point addition can be done
input POINT_TYPE i_p2,
input i_p2_val
);
// [0] is connection from/to dbl block, [1] is add block, [2] is arbitrated value
if_axi_stream #(.DAT_BITS(DAT_BITS*2), .CTL_BITS(16)) mult_in_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS(DAT_BITS), .CTL_BITS(16)) mult_out_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS(DAT_BITS*2), .CTL_BITS(16)) add_in_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS(DAT_BITS), .CTL_BITS(16)) add_out_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS(DAT_BITS*2), .CTL_BITS(16)) sub_in_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS(DAT_BITS), .CTL_BITS(16)) sub_out_if [2:0] (i_clk);
logic [DAT_BITS-1:0] k_l;
POINT_TYPE p_n, p_q, p_dbl, p_add;
logic p_dbl_in_val, p_dbl_in_rdy, p_dbl_out_err, p_dbl_out_val, p_dbl_out_rdy, p_dbl_done;
logic p_add_in_val, p_add_in_rdy, p_add_out_err, p_add_out_val, p_add_out_rdy, p_add_done;
logic special_dbl, lookahead_dbl;
enum {IDLE, DOUBLE_ADD, ADD_ONLY, FINISHED} state;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_val <= 0;
o_err <= 0;
o_rdy <= 0;
k_l <= 0;
p_q <= 0;
p_dbl_in_val <= 0;
p_dbl_out_rdy <= 0;
p_add_in_val <= 0;
p_add_out_rdy <= 0;
state <= IDLE;
o_p <= 0;
p_n <= 0;
p_dbl_done <= 0;
p_add_done <= 0;
special_dbl <= 0;
lookahead_dbl <= 0;
end else begin
case (state)
{IDLE}: begin
p_dbl_out_rdy <= 1;
p_add_out_rdy <= 1;
p_dbl_done <= 1;
p_add_done <= 1;
special_dbl <= 0;
lookahead_dbl <= 0;
o_rdy <= 1;
o_err <= 0;
p_q <= 0; // p_q starts at 0
p_n <= i_p;
k_l <= i_k;
if (o_rdy && i_val) begin
o_rdy <= 0;
state <= DOUBLE_ADD;
end
if (o_rdy && i_p2_val) begin
o_rdy <= 0;
p_n <= i_p;
p_q <= i_p2;
state <= ADD_ONLY;
// Check for special cases to determine double or add
if (i_p.x == i_p2.x && i_p.y == i_p2.y) begin
p_dbl_in_val <= 1;
end else begin
p_add_in_val <= 1;
end
end
end
{DOUBLE_ADD}: begin
p_dbl_in_val <= (p_dbl_in_val && p_dbl_in_rdy) ? 0 : p_dbl_in_val;
p_add_in_val <= (p_add_in_val && p_add_in_rdy) ? 0 : p_add_in_val;
if (p_dbl_out_val && p_dbl_out_rdy) begin
p_dbl_done <= 1;
if (special_dbl) begin
p_q <= p_dbl;
special_dbl <= 0;
end
p_n <= p_dbl;
// We can look ahead and start the next double
if ((k_l >> 1) != 0 && ~lookahead_dbl && ~p_add_done) begin
p_dbl_in_val <= 1;
lookahead_dbl <= 1;
p_dbl_out_rdy <= 0; // Want to make sure we don't output while still waiting for add
end
end
if (p_add_out_val && p_add_out_rdy) begin
p_add_done <= 1;
p_q <= p_add;
end
// Update variables and issue new commands
if (p_add_done && p_dbl_done) begin
lookahead_dbl <= 0;
p_dbl_out_rdy <= 1;
p_add_done <= 0;
p_dbl_done <= 0;
k_l <= k_l >> 1;
if (k_l[0]) begin
p_add_in_val <= 1;
// Need to check for special case where the x, y point is the same
if (p_q.x == p_n.x && p_q.y == p_n.y) begin
special_dbl <= 1;
p_add_in_val <= 0;
p_add_done <= 1;
end
end else begin
p_add_done <= 1;
end
// Don't need to double on the final bit
if ((k_l >> 1) != 0)
p_dbl_in_val <= ~lookahead_dbl; // Don't do if we already started
else
p_dbl_done <= 1;
if (k_l == 0) begin
state <= FINISHED;
o_p <= p_add;
o_val <= 1;
p_dbl_in_val <= 0;
p_add_in_val <= 0;
end
end
end
{ADD_ONLY}: begin
p_dbl_in_val <= (p_dbl_in_val && p_dbl_in_rdy) ? 0 : p_dbl_in_val;
p_add_in_val <= (p_add_in_val && p_add_in_rdy) ? 0 : p_add_in_val;
if (p_dbl_out_val && p_dbl_out_rdy) begin
state <= FINISHED;
o_p <= p_dbl;
o_val <= 1;
end
if (p_add_out_val && p_add_out_rdy) begin
state <= FINISHED;
o_p <= p_add;
o_val <= 1;
end
end
{FINISHED}: begin
if (i_rdy && o_val) begin
o_val <= 0;
state <= IDLE;
end
end
endcase
if (p_dbl_out_err || p_add_out_err) begin
o_err <= 1;
o_val <= 1;
state <= FINISHED;
end
end
end
ec_fp_point_dbl #(
.P ( P ),
.POINT_TYPE ( POINT_TYPE )
)
ec_fp_point_dbl (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_p ( p_n ),
.i_val ( p_dbl_in_val ),
.o_rdy ( p_dbl_in_rdy ),
// Output point
.o_p ( p_dbl ),
.o_err ( p_dbl_out_err ),
.i_rdy ( p_dbl_out_rdy ),
.o_val ( p_dbl_out_val ),
// Interfaces to shared logic
.o_mult_if ( mult_in_if[0] ),
.i_mult_if ( mult_out_if[0] ),
.o_add_if ( add_in_if[0] ),
.i_add_if ( add_out_if[0] ),
.o_sub_if ( sub_in_if[0] ),
.i_sub_if ( sub_out_if[0] )
);
ec_fp_point_add #(
.P ( P ),
.POINT_TYPE ( POINT_TYPE )
)
ec_fp_point_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
// Input points
.i_p1 ( p_q ),
.i_p2 ( p_n ),
.i_val ( p_add_in_val ),
.o_rdy ( p_add_in_rdy ),
// Output point
.o_p ( p_add ),
.o_err ( p_add_out_err ),
.i_rdy ( p_add_out_rdy ),
.o_val ( p_add_out_val ),
// Interfaces to shared logic
.o_mult_if ( mult_in_if[1] ),
.i_mult_if ( mult_out_if[1] ),
.o_add_if ( add_in_if[1] ),
.i_add_if ( add_out_if[1] ),
.o_sub_if ( sub_in_if[1] ),
.i_sub_if ( sub_out_if[1] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_mult (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( mult_in_if[1:0] ),
.o_res ( mult_in_if[2] ),
.i_res ( mult_out_if[2] ),
.o_axi ( mult_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_add (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( add_in_if[1:0] ),
.o_res ( add_in_if[2] ),
.i_res ( add_out_if[2] ),
.o_axi ( add_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 8 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_sub (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_axi ( sub_in_if[1:0] ),
.o_res ( sub_in_if[2] ),
.i_res ( sub_out_if[2] ),
.o_axi ( sub_out_if[1:0] )
);
generate
if (RESOURCE_SHARE == "YES") begin: RESOURCE_GEN
always_comb begin
o_mult_if.copy_if_comb(mult_in_if[2].dat, mult_in_if[2].val, 1, 1, 0, 0, mult_in_if[2].ctl);
mult_in_if[2].rdy = o_mult_if.rdy;
mult_out_if[2].copy_if_comb(i_mult_if.dat, i_mult_if.val, 1, 1, 0, 0, i_mult_if.ctl);
i_mult_if.rdy = mult_out_if[2].rdy;
end
always_comb begin
o_add_if.copy_if_comb(add_in_if[2].dat, add_in_if[2].val, 1, 1, 0, 0, add_in_if[2].ctl);
add_in_if[2].rdy = o_add_if.rdy;
add_out_if[2].copy_if_comb(i_add_if.dat, i_add_if.val, 1, 1, 0, 0, i_add_if.ctl);
i_add_if.rdy = add_out_if[2].rdy;
end
always_comb begin
o_sub_if.copy_if_comb(sub_in_if[2].dat, sub_in_if[2].val, 1, 1, 0, 0, sub_in_if[2].ctl);
sub_in_if[2].rdy = o_sub_if.rdy;
sub_out_if[2].copy_if_comb(i_sub_if.dat, i_sub_if.val, 1, 1, 0, 0, i_sub_if.ctl);
i_sub_if.rdy = sub_out_if[2].rdy;
end
end else begin
always_comb begin
o_mult_if.reset_source();
i_mult_if.rdy = 0;
o_add_if.reset_source();
i_add_if.rdy = 0;
o_sub_if.reset_source();
i_sub_if.rdy = 0;
end
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 2 ),
.CTL_BITS ( 16 )
)
ec_fp_mult_mod (
.i_clk( i_clk ),
.i_rst( i_rst ),
.i_dat_a ( mult_in_if[2].dat[0 +: DAT_BITS] ),
.i_dat_b ( mult_in_if[2].dat[DAT_BITS +: DAT_BITS] ),
.i_val ( mult_in_if[2].val ),
.i_ctl ( mult_in_if[2].ctl ),
.o_rdy ( mult_in_if[2].rdy ),
.o_dat ( mult_out_if[2].dat ),
.i_rdy ( mult_out_if[2].rdy ),
.o_val ( mult_out_if[2].val ),
.o_ctl ( mult_out_if[2].ctl )
);
adder_pipe # (
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk( i_clk ),
.i_rst( i_rst ),
.i_dat_a ( add_in_if[2].dat[0 +: DAT_BITS] ),
.i_dat_b ( add_in_if[2].dat[DAT_BITS +: DAT_BITS] ),
.i_ctl ( add_in_if[2].ctl ),
.i_val ( add_in_if[2].val ),
.o_rdy ( add_in_if[2].rdy ),
.o_dat ( add_out_if[2].dat ),
.o_val ( add_out_if[2].val ),
.o_ctl ( add_out_if[2].ctl ),
.i_rdy ( add_out_if[2].rdy )
);
subtractor_pipe # (
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk( i_clk ),
.i_rst( i_rst ),
.i_dat_a ( sub_in_if[2].dat[0 +: DAT_BITS] ),
.i_dat_b ( sub_in_if[2].dat[DAT_BITS +: DAT_BITS] ),
.i_ctl ( sub_in_if[2].ctl ),
.i_val ( sub_in_if[2].val ),
.o_rdy ( sub_in_if[2].rdy ),
.o_dat ( sub_out_if[2].dat ),
.o_val ( sub_out_if[2].val ),
.o_ctl ( sub_out_if[2].ctl ),
.i_rdy ( sub_out_if[2].rdy )
);
end
endgenerate
endmodule

View File

@ -0,0 +1,165 @@
/*
This performs point multiplication. We use the standard double
and add algorithm, with some look ahead so we can perform
adds or doubles as early as possible.
Optimizations would be to use NAF.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_point_mult
#(
parameter P,
parameter type FP_TYPE,
parameter DAT_BITS = $clog2(P)
)(
input i_clk, i_rst,
// Input point and value to multiply in control
if_axi_stream.source o_pt_mult,
if_axi_stream.sink i_pt_mult,
// Interface to point adder / doubler
if_axi_stream.source o_dbl,
if_axi_stream.sink i_dbl,
if_axi_stream.source o_add,
if_axi_stream.sink i_add
);
logic [DAT_BITS-1:0] k_l;
logic p_dbl_done, p_add_done, special_dbl, lookahead_dbl;
enum {IDLE, DOUBLE_ADD, FINISHED} state;
always_comb begin
o_add.dat[$bits(FP_TYPE) +: $bits(FP_TYPE)] = o_dbl.dat;
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_dbl.copy_if(0, 0, 1, 1, 0, 0, 0);
o_add.val <= 0;
o_add.sop <= 1;
o_add.eop <= 1;
o_add.err <= 0;
o_add.ctl <= 0;
o_add.mod <= 0;
o_pt_mult.copy_if(0, 0, 1, 1, 0, 0, 0);
i_add.rdy <= 0;
i_dbl.rdy <= 0;
i_pt_mult.rdy <= 0;
k_l <= 0;
state <= IDLE;
p_dbl_done <= 0;
p_add_done <= 0;
special_dbl <= 0;
lookahead_dbl <= 0;
end else begin
case (state)
{IDLE}: begin
i_add.rdy <= 1;
i_dbl.rdy <= 1;
p_dbl_done <= 1;
p_add_done <= 1;
special_dbl <= 0;
lookahead_dbl <= 0;
i_pt_mult.rdy <= 1;
o_pt_mult.err <= 0;
o_add.dat[0 +: $bits(FP_TYPE)] <= 0;
o_dbl.dat <= i_pt_mult.dat;
k_l <= i_pt_mult.ctl;
if (i_pt_mult.rdy && i_pt_mult.val) begin
i_pt_mult.rdy <= 0;
state <= DOUBLE_ADD;
end
end
{DOUBLE_ADD}: begin
if (o_dbl.val && o_dbl.rdy) o_dbl.val <= 0;
if (o_add.val && o_add.rdy) o_add.val <= 0;
if (i_dbl.val && i_dbl.rdy) begin
p_dbl_done <= 1;
if (special_dbl) begin
o_add.dat[0 +: $bits(FP_TYPE)] <= i_dbl.dat;
special_dbl <= 0;
end
o_dbl.dat <= i_dbl.dat;
// We can look ahead and start the next double
if ((k_l >> 1) != 0 && ~lookahead_dbl && ~p_add_done) begin
o_dbl.val <= 1;
lookahead_dbl <= 1;
i_dbl.rdy <= 0; // Want to make sure we don't output while still waiting for add
end
end
if (i_add.val && i_add.rdy) begin
p_add_done <= 1;
o_add.dat[0 +: $bits(FP_TYPE)] <= i_add.dat;
end
// Update variables and issue new commands
if (p_add_done && p_dbl_done) begin
lookahead_dbl <= 0;
i_dbl.rdy <= 1;
p_add_done <= 0;
p_dbl_done <= 0;
k_l <= k_l >> 1;
if (k_l[0]) begin
o_add.val <= 1;
// Need to check for special case where the point coords are the same
if (o_add.dat[0 +: $bits(FP_TYPE)] == o_dbl.dat) begin
special_dbl <= 1;
o_add.val <= 0;
p_add_done <= 1;
end
end else begin
p_add_done <= 1;
end
// Don't need to double on the final bit
if ((k_l >> 1) != 0)
o_dbl.val <= ~lookahead_dbl; // Don't do if we already started
else
p_dbl_done <= 1;
if (k_l == 0) begin
state <= FINISHED;
o_pt_mult.dat <= i_add.dat;
o_pt_mult.val <= 1;
o_dbl.val <= 0;
o_add.val <= 0;
end
end
end
{FINISHED}: begin
if (o_pt_mult.rdy && o_pt_mult.val) begin
o_pt_mult.val <= 0;
state <= IDLE;
end
end
endcase
if (i_dbl.err || i_add.err) begin
o_pt_mult.err <= 1;
o_pt_mult.val <= 1;
state <= FINISHED;
end
end
end
endmodule

View File

@ -91,24 +91,6 @@ ec_fp2_point_add (
.i_sub_if ( sub_out_if )
);
always_comb begin
mult_out_if.sop = 1;
mult_out_if.eop = 1;
mult_out_if.err = 0;
mult_out_if.mod = 1;
add_out_if.sop = 1;
add_out_if.eop = 1;
add_out_if.err = 0;
add_out_if.mod = 1;
sub_out_if.sop = 1;
sub_out_if.eop = 1;
sub_out_if.err = 0;
sub_out_if.mod = 1;
end
// Attach a mod reduction unit and multiply - mod unit
ec_fp_mult_mod #(
.P ( P ),
@ -118,15 +100,8 @@ ec_fp_mult_mod #(
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat_a ( mult_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( mult_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_val ( mult_in_if.val ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl )
.i_mul ( mult_in_if ),
.o_mul ( mult_out_if )
);
adder_pipe # (
@ -138,15 +113,8 @@ adder_pipe # (
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( add_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( add_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( add_in_if.ctl ),
.i_val ( add_in_if.val ),
.o_rdy ( add_in_if.rdy ),
.o_dat ( add_out_if.dat ),
.o_val ( add_out_if.val ),
.o_ctl ( add_out_if.ctl ),
.i_rdy ( add_out_if.rdy )
.i_add ( add_in_if ),
.o_add ( add_out_if )
);
subtractor_pipe # (
@ -158,15 +126,8 @@ subtractor_pipe # (
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( sub_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( sub_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( sub_in_if.ctl ),
.i_val ( sub_in_if.val ),
.o_rdy ( sub_in_if.rdy ),
.o_dat ( sub_out_if.dat ),
.o_val ( sub_out_if.val ),
.o_ctl ( sub_out_if.ctl ),
.i_rdy ( sub_out_if.rdy )
.i_sub ( sub_in_if ),
.o_sub ( sub_out_if )
);
task test(input fp2_jb_point_t p1, p2, p_exp);

View File

@ -89,25 +89,6 @@ ec_fp2_point_dbl (
.i_sub_if ( sub_out_if )
);
always_comb begin
mult_out_if.sop = 1;
mult_out_if.eop = 1;
mult_out_if.err = 0;
mult_out_if.mod = 1;
add_out_if.sop = 1;
add_out_if.eop = 1;
add_out_if.err = 0;
add_out_if.mod = 1;
sub_out_if.sop = 1;
sub_out_if.eop = 1;
sub_out_if.err = 0;
sub_out_if.mod = 1;
end
// Attach a mod reduction unit and multiply - mod unit
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
@ -116,15 +97,8 @@ ec_fp_mult_mod #(
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat_a ( mult_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( mult_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_val ( mult_in_if.val ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl )
.i_mul ( mult_in_if ),
.o_mul ( mult_out_if )
);
adder_pipe # (
@ -136,15 +110,8 @@ adder_pipe # (
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( add_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( add_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( add_in_if.ctl ),
.i_val ( add_in_if.val ),
.o_rdy ( add_in_if.rdy ),
.o_dat ( add_out_if.dat ),
.o_val ( add_out_if.val ),
.o_ctl ( add_out_if.ctl ),
.i_rdy ( add_out_if.rdy )
.i_add ( add_in_if ),
.o_add ( add_out_if )
);
subtractor_pipe # (
@ -156,15 +123,8 @@ subtractor_pipe # (
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( sub_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( sub_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( sub_in_if.ctl ),
.i_val ( sub_in_if.val ),
.o_rdy ( sub_in_if.rdy ),
.o_dat ( sub_out_if.dat ),
.o_val ( sub_out_if.val ),
.o_ctl ( sub_out_if.ctl ),
.i_rdy ( sub_out_if.rdy )
.i_sub ( sub_in_if ),
.o_sub ( sub_out_if )
);
task test(input fp2_jb_point_t p1, p_exp);

View File

@ -0,0 +1,267 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module ec_fp2_point_mult_tb ();
import common_pkg::*;
import bls12_381_pkg::*;
localparam CLK_PERIOD = 1000;
logic clk, rst;
parameter type FP_TYPE = bls12_381_pkg::fp2_jb_point_t;
parameter type FE_TYPE = bls12_381_pkg::fe_t;
parameter type FE2_TYPE = bls12_381_pkg::fe2_t;
parameter KEY_BITS = bls12_381_pkg::DAT_BITS;
parameter P = bls12_381_pkg::P;
`define MULT_FUNC(K, IN_POINT) fp2_point_mult(K, IN_POINT);
`define PRINT_FUNC(IN_POINT) print_fp2_jb_point(IN_POINT);
`define G_POINT bls12_381_pkg::g2_point
if_axi_stream #(.DAT_BYTS(($bits(FP_TYPE)+7)/8), .CTL_BITS(KEY_BITS)) in_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FP_TYPE)+7)/8)) out_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FP_TYPE))) add_i_if(clk);
if_axi_stream #(.DAT_BITS($bits(FP_TYPE))) add_o_if(clk);
if_axi_stream #(.DAT_BITS($bits(FP_TYPE))) dbl_i_if(clk);
if_axi_stream #(.DAT_BITS($bits(FP_TYPE))) dbl_o_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) mult_in_if [2:0] (clk) ;
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) mult_out_if [2:0](clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_in_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_out_if [2:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_in_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) sub_out_if [2:0] (clk);
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #(CLK_PERIOD/2) clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err) begin
out_if.rdy = 1;
$error(1, "%m %t ERROR: output .err asserted", $time);
end
ec_point_mult #(
.P ( P ),
.FP_TYPE ( FP_TYPE )
)
ec_point_mult (
.i_clk ( clk ),
.i_rst ( rst ),
.o_pt_mult ( out_if ),
.i_pt_mult ( in_if ),
// Interface to point adder / doubler
.o_dbl ( dbl_i_if ),
.i_dbl ( dbl_o_if ),
.o_add ( add_i_if ),
.i_add ( add_o_if )
);
ec_fp2_point_add #(
.FP2_TYPE ( FP_TYPE ),
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE )
)
ec_fp2_point_add (
.i_clk ( clk ),
.i_rst ( rst ),
// Input points
.i_p1 ( add_i_if.dat[0 +: $bits(FP_TYPE)] ),
.i_p2 ( add_i_if.dat[$bits(FP_TYPE) +: $bits(FP_TYPE)] ),
.i_val ( add_i_if.val ),
.o_rdy ( add_i_if.rdy ),
.o_p ( add_o_if.dat ),
.o_err ( add_o_if.err ),
.i_rdy ( add_o_if.rdy ),
.o_val ( add_o_if.val ) ,
.o_mul_if ( mult_in_if[0] ),
.i_mul_if ( mult_out_if[0] ),
.o_add_if ( add_in_if[0] ),
.i_add_if ( add_out_if[0] ),
.o_sub_if ( sub_in_if[0] ),
.i_sub_if ( sub_out_if[0] )
);
ec_fp2_point_dbl #(
.FP2_TYPE ( FP_TYPE ),
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE )
)
ec_fp2_point_dbl (
.i_clk ( clk ),
.i_rst ( rst ),
.i_p ( dbl_i_if.dat),
.i_val ( dbl_i_if.val ),
.o_rdy ( dbl_i_if.rdy ),
.o_p ( dbl_o_if.dat ),
.o_err ( dbl_o_if.err ),
.i_rdy ( dbl_o_if.rdy ),
.o_val ( dbl_o_if.val ) ,
.o_mul_if ( mult_in_if[1] ),
.i_mul_if ( mult_out_if[1] ),
.o_add_if ( add_in_if[1] ),
.i_add_if ( add_out_if[1] ),
.o_sub_if ( sub_in_if[1] ),
.i_sub_if ( sub_out_if[1] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_mul (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( mult_in_if[1:0] ),
.o_res ( mult_in_if[2] ),
.i_res ( mult_out_if[2] ),
.o_axi ( mult_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_sub (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( sub_in_if[1:0] ),
.o_res ( sub_in_if[2] ),
.i_res ( sub_out_if[2] ),
.o_axi ( sub_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_add (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( add_in_if[1:0] ),
.o_res ( add_in_if[2] ),
.i_res ( add_out_if[2] ),
.o_axi ( add_out_if[1:0] )
);
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( 16 )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_mul ( mult_in_if[2] ),
.o_mul ( mult_out_if[2] )
);
adder_pipe # (
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_add ( add_in_if[2] ),
.o_add ( add_out_if[2] )
);
subtractor_pipe # (
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_sub ( sub_in_if[2] ),
.o_sub ( sub_out_if[2] )
);
// Test a point
task test(input logic [KEY_BITS-1:0] k);
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat;
integer start_time, finish_time;
FP_TYPE p_out, p_exp;
$display("Running test with k= %d", k);
p_exp = `MULT_FUNC(k, `G_POINT);
start_time = $time;
fork
in_if.put_stream(`G_POINT, ($bits(FP_TYPE)+7)/8, k);
out_if.get_stream(get_dat, get_len);
join
finish_time = $time;
p_out = get_dat;
$display("Expected:");
`PRINT_FUNC(p_exp);
$display("Was:");
`PRINT_FUNC(p_out);
if (p_exp != p_out) begin
$fatal(1, "%m %t ERROR: output was wrong", $time);
end
$display("test PASSED in %d clocks", (finish_time-start_time)/CLK_PERIOD);
end
endtask;
logic [380:0] in_k;
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test(4);
in_k = P-1;
//test(381'haaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);
//test(in_k);
#1us $finish();
end
endmodule

View File

@ -97,17 +97,10 @@ ec_fp_mult_mod #(
.CTL_BITS ( 8 )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat_a ( mult_in_if.dat[0 +: 381] ),
.i_dat_b ( mult_in_if.dat[381 +: 381] ),
.i_val ( mult_in_if.val ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl )
.i_clk( clk ),
.i_rst( rst ),
.i_mul ( mult_in_if ),
.o_mul ( mult_out_if )
);
adder_pipe # (
@ -119,15 +112,8 @@ adder_pipe # (
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( add_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( add_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( add_in_if.ctl ),
.i_val ( add_in_if.val ),
.o_rdy ( add_in_if.rdy ),
.o_dat ( add_out_if.dat ),
.o_val ( add_out_if.val ),
.o_ctl ( add_out_if.ctl ),
.i_rdy ( add_out_if.rdy )
.i_add ( add_in_if ),
.o_add ( add_out_if )
);
subtractor_pipe # (
@ -139,33 +125,10 @@ subtractor_pipe # (
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( sub_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( sub_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( sub_in_if.ctl ),
.i_val ( sub_in_if.val ),
.o_rdy ( sub_in_if.rdy ),
.o_dat ( sub_out_if.dat ),
.o_val ( sub_out_if.val ),
.o_ctl ( sub_out_if.ctl ),
.i_rdy ( sub_out_if.rdy )
.i_sub ( sub_in_if ),
.o_sub ( sub_out_if )
);
always_comb begin
mult_out_if.sop = 1;
mult_out_if.eop = 1;
mult_out_if.err = 0;
mult_out_if.mod = 1;
add_out_if.sop = 1;
add_out_if.eop = 1;
add_out_if.err = 0;
add_out_if.mod = 1;
sub_out_if.sop = 1;
sub_out_if.eop = 1;
sub_out_if.err = 0;
sub_out_if.mod = 1;
end
task test_0();
begin

View File

@ -24,18 +24,31 @@ localparam CLK_PERIOD = 1000;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(384*3/8)) in_if(clk);
if_axi_stream #(.DAT_BYTS(384*3/8)) out_if(clk);
parameter type FP_TYPE = bls12_381_pkg::jb_point_t;
parameter type FE_TYPE = bls12_381_pkg::fe_t;
parameter KEY_BITS = bls12_381_pkg::DAT_BITS;
parameter P = bls12_381_pkg::P;
if_axi_stream #(.DAT_BYTS(384*2/8), .CTL_BITS(16)) mult_in_if(clk);
if_axi_stream #(.DAT_BYTS(384/8), .CTL_BITS(16)) mult_out_if(clk);
if_axi_stream #(.DAT_BYTS(384*2/8), .CTL_BITS(16)) add_in_if(clk);
if_axi_stream #(.DAT_BYTS(384/8), .CTL_BITS(16)) add_out_if(clk);
if_axi_stream #(.DAT_BYTS(384*2/8), .CTL_BITS(16)) sub_in_if(clk);
if_axi_stream #(.DAT_BYTS(384/8), .CTL_BITS(16)) sub_out_if(clk);
`define MULT_FUNC(K, IN_POINT) point_mult(K, IN_POINT);
`define PRINT_FUNC(IN_POINT) print_jb_point(IN_POINT);
`define G_POINT bls12_381_pkg::g_point
if_axi_stream #(.DAT_BYTS(($bits(FP_TYPE)+7)/8), .CTL_BITS(KEY_BITS)) in_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FP_TYPE)+7)/8)) out_if(clk);
logic [DAT_BITS-1:0] k_in;
if_axi_stream #(.DAT_BITS(2*$bits(FP_TYPE))) add_i_if(clk);
if_axi_stream #(.DAT_BITS($bits(FP_TYPE))) add_o_if(clk);
if_axi_stream #(.DAT_BITS($bits(FP_TYPE))) dbl_i_if(clk);
if_axi_stream #(.DAT_BITS($bits(FP_TYPE))) dbl_o_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) mult_in_if [2:0] (clk) ;
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) mult_out_if [2:0](clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) add_in_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) add_out_if [2:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(16)) sub_in_if [2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(16)) sub_out_if [2:0] (clk);
initial begin
rst = 0;
@ -61,78 +74,172 @@ always_ff @ (posedge clk)
$error(1, "%m %t ERROR: output .err asserted", $time);
end
always_comb begin
mult_out_if.sop = 1;
mult_out_if.eop = 1;
mult_out_if.val = 0;
mult_out_if.mod = 0;
mult_in_if.rdy = 1;
add_out_if.sop = 1;
add_out_if.eop = 1;
add_out_if.val = 0;
add_out_if.mod = 0;
add_in_if.rdy = 1;
sub_out_if.sop = 1;
sub_out_if.eop = 1;
sub_out_if.val = 0;
sub_out_if.mod = 0;
sub_in_if.rdy = 1;
end
ec_fp_point_mult #(
.P ( P ),
.POINT_TYPE ( jb_point_t ),
.DAT_BITS ( DAT_BITS ),
.RESOURCE_SHARE ("NO")
ec_point_mult #(
.P ( P ),
.FP_TYPE ( FP_TYPE )
)
ec_fp_point_mult (
ec_point_mult (
.i_clk ( clk ),
.i_rst ( rst ),
.i_p ( in_if.dat ),
.i_k ( k_in ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.o_p ( out_if.dat ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ),
.o_err ( out_if.err ),
.o_mult_if ( mult_in_if ),
.i_mult_if ( mult_out_if ),
.o_add_if ( add_in_if ),
.i_add_if ( add_out_if ),
.o_sub_if ( sub_in_if ),
.i_sub_if ( sub_out_if ),
.i_p2_val ( 0),
.i_p2 ( 0 )
.o_pt_mult ( out_if ),
.i_pt_mult ( in_if ),
// Interface to point adder / doubler
.o_dbl ( dbl_i_if ),
.i_dbl ( dbl_o_if ),
.o_add ( add_i_if ),
.i_add ( add_o_if )
);
ec_point_add #(
.FP_TYPE ( FP_TYPE ),
.FE_TYPE ( FE_TYPE )
)
ec_point_add (
.i_clk ( clk ),
.i_rst ( rst ),
// Input points
.i_p1 ( add_i_if.dat[0 +: $bits(FP_TYPE)] ),
.i_p2 ( add_i_if.dat[$bits(FP_TYPE) +: $bits(FP_TYPE)] ),
.i_val ( add_i_if.val ),
.o_rdy ( add_i_if.rdy ),
.o_p ( add_o_if.dat ),
.o_err ( add_o_if.err ),
.i_rdy ( add_o_if.rdy ),
.o_val ( add_o_if.val ) ,
.o_mul_if ( mult_in_if[0] ),
.i_mul_if ( mult_out_if[0] ),
.o_add_if ( add_in_if[0] ),
.i_add_if ( add_out_if[0] ),
.o_sub_if ( sub_in_if[0] ),
.i_sub_if ( sub_out_if[0] )
);
ec_point_dbl #(
.FP_TYPE ( FP_TYPE ),
.FE_TYPE ( FE_TYPE )
)
ec_point_dbl (
.i_clk ( clk ),
.i_rst ( rst ),
.i_p ( dbl_i_if.dat),
.i_val ( dbl_i_if.val ),
.o_rdy ( dbl_i_if.rdy ),
.o_p ( dbl_o_if.dat ),
.o_err ( dbl_o_if.err ),
.i_rdy ( dbl_o_if.rdy ),
.o_val ( dbl_o_if.val ) ,
.o_mul_if ( mult_in_if[1] ),
.i_mul_if ( mult_out_if[1] ),
.o_add_if ( add_in_if[1] ),
.i_add_if ( add_out_if[1] ),
.o_sub_if ( sub_in_if[1] ),
.i_sub_if ( sub_out_if[1] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_mul (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( mult_in_if[1:0] ),
.o_res ( mult_in_if[2] ),
.i_res ( mult_out_if[2] ),
.o_axi ( mult_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_sub (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( sub_in_if[1:0] ),
.o_res ( sub_in_if[2] ),
.i_res ( sub_out_if[2] ),
.o_axi ( sub_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.OVR_WRT_BIT ( 12 ),
.PIPELINE_IN ( 0 ),
.PIPELINE_OUT ( 0 )
)
resource_share_add (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( add_in_if[1:0] ),
.o_res ( add_in_if[2] ),
.i_res ( add_out_if[2] ),
.o_axi ( add_out_if[1:0] )
);
ec_fp_mult_mod #(
.P ( P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( 16 )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_mul ( mult_in_if[2] ),
.o_mul ( mult_out_if[2] )
);
adder_pipe # (
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_add ( add_in_if[2] ),
.o_add ( add_out_if[2] )
);
subtractor_pipe # (
.P ( P ),
.CTL_BITS ( 16 ),
.LEVEL ( 2 )
)
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_sub ( sub_in_if[2] ),
.o_sub ( sub_out_if[2] )
);
// Test a point
task test(input logic [DAT_BITS-1:0] k);
task test(input logic [KEY_BITS-1:0] k);
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat;
integer start_time, finish_time;
jb_point_t p_out, p_exp;
$display("Running test with k= %d ...", k);
p_exp = point_mult(k, g_point);
k_in = k;
FP_TYPE p_out, p_exp;
$display("Running test with k= %d", k);
p_exp = `MULT_FUNC(k, `G_POINT);
start_time = $time;
fork
in_if.put_stream(g_point, 384*3/8);
in_if.put_stream(`G_POINT, ($bits(FP_TYPE)+7)/8, k);
out_if.get_stream(get_dat, get_len);
join
finish_time = $time;
p_out = get_dat;
$display("Expected:");
print_jb_point(p_exp);
`PRINT_FUNC(p_exp);
$display("Was:");
print_jb_point(p_out);
`PRINT_FUNC(p_out);
if (p_exp != p_out) begin
$fatal(1, "%m %t ERROR: output was wrong", $time);
end
@ -141,19 +248,17 @@ begin
end
endtask;
jb_point_t point;
logic [380:0] in_k;
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
/*test(381'h1);
test(381'h5);
test(381'h10);*/
//test(1 << 380);
test(381'h9f5193de96ab6e65e7c7df8adcec4e82b971dd5f54d1c62103776d3eef0255ae346eba9e29eb08c3a957e9a53afc3ce);
in_k = P-1;
test(381'haaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa);
test(in_k);
#1us $finish();
end
endmodule

View File

@ -90,22 +90,6 @@ ec_point_add (
.i_sub_if ( sub_out_if )
);
always_comb begin
mult_out_if.sop = 1;
mult_out_if.eop = 1;
mult_out_if.err = 0;
mult_out_if.mod = 1;
add_out_if.sop = 1;
add_out_if.eop = 1;
add_out_if.err = 0;
add_out_if.mod = 1;
sub_out_if.sop = 1;
sub_out_if.eop = 1;
sub_out_if.err = 0;
sub_out_if.mod = 1;
end
// Attach a mod reduction unit and multiply - mod unit
@ -117,15 +101,8 @@ ec_fp_mult_mod #(
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat_a ( mult_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( mult_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_val ( mult_in_if.val ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl )
.i_mul ( mult_in_if ),
.o_mul ( mult_out_if )
);
adder_pipe # (
@ -137,15 +114,8 @@ adder_pipe # (
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( add_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( add_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( add_in_if.ctl ),
.i_val ( add_in_if.val ),
.o_rdy ( add_in_if.rdy ),
.o_dat ( add_out_if.dat ),
.o_val ( add_out_if.val ),
.o_ctl ( add_out_if.ctl ),
.i_rdy ( add_out_if.rdy )
.i_add ( add_in_if ),
.o_add ( add_out_if )
);
subtractor_pipe # (
@ -157,15 +127,8 @@ subtractor_pipe # (
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( sub_in_if.dat[0 +: bls12_381_pkg::DAT_BITS] ),
.i_dat_b ( sub_in_if.dat[bls12_381_pkg::DAT_BITS +: bls12_381_pkg::DAT_BITS] ),
.i_ctl ( sub_in_if.ctl ),
.i_val ( sub_in_if.val ),
.o_rdy ( sub_in_if.rdy ),
.o_dat ( sub_out_if.dat ),
.o_val ( sub_out_if.val ),
.o_ctl ( sub_out_if.ctl ),
.i_rdy ( sub_out_if.rdy )
.i_sub ( sub_in_if ),
.o_sub ( sub_out_if )
);
task test();
@ -191,7 +154,7 @@ begin
$display("Was:");
print_jb_point(p_out);
if (p_exp != p_out) begin
if (~(p_exp == p_out)) begin
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
end

View File

@ -23,17 +23,10 @@ module adder_pipe # (
parameter CTL_BITS = 8,
parameter LEVEL = 1 // If LEVEL == 1 this is just an add with registered output
) (
input i_clk,
input i_rst,
input [BITS-1:0] i_dat_a,
input [BITS-1:0] i_dat_b,
input i_val,
input [CTL_BITS-1:0] i_ctl,
input i_rdy,
output logic o_rdy,
output logic o_val,
output logic [CTL_BITS-1:0] o_ctl,
output logic [BITS-1:0] o_dat
input i_clk,
input i_rst,
if_axi_stream.sink i_add,
if_axi_stream.source o_add
);
// Internally we want to use a even divisor for BITS of BITS/LEVEL
@ -53,47 +46,43 @@ always_comb begin
P_ = 0;
P_ = P;
carry_neg[0] = 0;
val[0] = i_val;
ctl[0] = i_ctl;
a[0] = i_dat_a;
b[0] = i_dat_b;
val[0] = i_add.val;
ctl[0] = i_add.ctl;
a[0] = 0;
a[0] = i_add.dat[0 +: BITS];
b[0] = 0;
b[0] = i_add.dat[BITS +: BITS];
result0[0] = 0;
result1[0] = 0;
o_val = val[LEVEL];
rdy[LEVEL] = i_rdy;
o_dat = carry_neg[LEVEL] ? result0[LEVEL] : result1[LEVEL];
o_ctl = ctl[LEVEL];
o_rdy = rdy[0];
end
always_comb begin
rdy[LEVEL] = o_add.rdy;
i_add.rdy = rdy[0];
o_add.copy_if_comb(carry_neg[LEVEL] ? result0[LEVEL] : result1[LEVEL], val[LEVEL], 1, 1, 1, 0, ctl[LEVEL]);
end
generate
genvar g;
for (g = 0; g < LEVEL; g++) begin: ADDER_GEN
logic [BITS_LEVEL:0] add_res0, add_res0_, add_res1;
logic cn;
always_comb begin
rdy[g] = ~val[g+1] || (val[g+1] && rdy[g+1]);
add_res0 = a[g][g*BITS_LEVEL +: BITS_LEVEL] +
b[g][g*BITS_LEVEL +: BITS_LEVEL] +
add_res0 = a[g][g*BITS_LEVEL +: BITS_LEVEL] +
b[g][g*BITS_LEVEL +: BITS_LEVEL] +
result0[g][g*BITS_LEVEL];
add_res0_ = a[g][g*BITS_LEVEL +: BITS_LEVEL] +
b[g][g*BITS_LEVEL +: BITS_LEVEL] +
add_res0_ = a[g][g*BITS_LEVEL +: BITS_LEVEL] +
b[g][g*BITS_LEVEL +: BITS_LEVEL] +
result1[g][g*BITS_LEVEL];
if (add_res0_ < (P_[g*BITS_LEVEL +: BITS_LEVEL] + carry_neg[g])) begin
cn = 1;
add_res1 = add_res0_ - P_[g*BITS_LEVEL +: BITS_LEVEL] + (1 << BITS_LEVEL) - carry_neg[g];
end else begin
cn = 0;
add_res1 = add_res0_ - P_[g*BITS_LEVEL +: BITS_LEVEL] - carry_neg[g];
end
end
end
always_ff @ (posedge i_clk) begin
@ -111,13 +100,13 @@ genvar g;
ctl[g+1] <= ctl[g];
a[g+1] <= a[g];
b[g+1] <= b[g];
result0[g+1] <= result0[g];
result0[g+1][g*BITS_LEVEL +: BITS_LEVEL + 1] <= add_res0;
result1[g+1] <= result1[g];
result1[g+1][g*BITS_LEVEL +: BITS_LEVEL + 1] <= add_res1;
carry_neg[g+1] <= cn;
end
end

View File

@ -23,17 +23,10 @@ module subtractor_pipe # (
parameter CTL_BITS = 8,
parameter LEVEL = 1 // If LEVEL == 1 this is just an add with registered output
) (
input i_clk,
input i_rst,
input [BITS-1:0] i_dat_a,
input [BITS-1:0] i_dat_b,
input i_val,
input [CTL_BITS-1:0] i_ctl,
input i_rdy,
output logic o_rdy,
output logic o_val,
output logic [CTL_BITS-1:0] o_ctl,
output logic [BITS-1:0] o_dat
input i_clk,
input i_rst,
if_axi_stream.sink i_sub,
if_axi_stream.source o_sub
);
// Internally we want to use a even divisor for BITS of BITS/LEVEL
@ -54,21 +47,18 @@ always_comb begin
P_ = P;
carry_neg0[0] = 0;
carry_neg1[0] = 0;
val[0] = i_val;
ctl[0] = i_ctl;
a[0] = i_dat_a;
b[0] = i_dat_b;
val[0] = i_sub.val;
ctl[0] = i_sub.ctl;
a[0] = 0;
a[0] = i_sub.dat[0 +: BITS];
b[0] = 0;
b[0] = i_sub.dat[BITS +: BITS];
result0[0] = 0;
result1[0] = 0;
o_val = val[LEVEL];
rdy[LEVEL] = i_rdy;
o_dat = carry_neg1[LEVEL] ? result0[LEVEL] : result1[LEVEL];
o_ctl = ctl[LEVEL];
o_rdy = rdy[0];
end
always_comb begin
rdy[LEVEL] = o_sub.rdy;
i_sub.rdy = rdy[0];
o_sub.dat = carry_neg1[LEVEL] ? result0[LEVEL] : result1[LEVEL];
o_sub.copy_if_comb(carry_neg1[LEVEL] ? result0[LEVEL] : result1[LEVEL], val[LEVEL], 1, 1, 1, 0, ctl[LEVEL]);
end
generate
@ -80,10 +70,10 @@ genvar g;
always_comb begin
rdy[g] = ~val[g+1] || (val[g+1] && rdy[g+1]);
sub_res0_ = a[g][g*BITS_LEVEL +: BITS_LEVEL] + P_[g*BITS_LEVEL +: BITS_LEVEL] + result0[g][g*BITS_LEVEL];
sub_res0__ = b[g][g*BITS_LEVEL +: BITS_LEVEL] + carry_neg0[g];
if (sub_res0_ < sub_res0__) begin
cn0 = 1;
sub_res0 = sub_res0_ - sub_res0__ + (1 << BITS_LEVEL);
@ -94,7 +84,7 @@ genvar g;
sub_res1_ = a[g][g*BITS_LEVEL +: BITS_LEVEL] + result1[g][g*BITS_LEVEL];
sub_res1__ = b[g][g*BITS_LEVEL +: BITS_LEVEL] + carry_neg1[g];
if (sub_res1_ < sub_res1__) begin
cn1 = 1;
sub_res1 = sub_res1_ - sub_res1__ + (1 << BITS_LEVEL);

View File

@ -44,7 +44,7 @@ package bls12_381_pkg;
} jb_point_t;
typedef struct packed {
fe_t c1, c0;
fe_t c1, c0;
} fe2_t;
fe2_t G2x = '{c0:381'd352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
@ -136,40 +136,40 @@ package bls12_381_pkg;
add_jb_point.y = fe_sub(A, add_jb_point.y);
endfunction
function fe_t fe_add(fe_t a, b);
logic [$bits(fe_t):0] a_, b_;
a_ = a;
b_ = b;
fe_add = a_ + b_ >= P ? a_ + b_ - P : a_ + b_;
endfunction
endfunction
function fe2_t fe2_add(fe2_t a, b);
fe2_add.c0 = fe_add(a.c0,b.c0);
fe2_add.c1 = fe_add(a.c1,b.c1);
endfunction
function fe_t fe_sub(fe_t a, b);
logic [$bits(fe_t):0] a_, b_;
a_ = a;
b_ = b;
fe_sub = b_ > a_ ? a_- b_ + P : a_ - b_;
endfunction
endfunction
function fe2_t fe2_sub(fe2_t a, b);
fe2_sub.c0 = fe_sub(a.c0, b.c0);
fe2_sub.c1 = fe_sub(a.c1, b.c1);
endfunction
endfunction
function fe_t fe_mul(fe2_t a, b);
fe_mul = (a * b) % P;
endfunction
endfunction
function fe2_t fe2_mul(fe2_t a, b);
fe2_mul.c0 = fe_sub(fe_mul(a.c0, b.c0), fe_mul(a.c1, b.c1));
fe2_mul.c1 = fe_add(fe_mul(a.c0, b.c1), fe_mul(a.c1, b.c0));
endfunction
endfunction
// Function to double point in Jacobian coordinates (for comparison in testbench)
// Here a is 0, and we also mod the result
function jb_point_t dbl_jb_point(input jb_point_t p);
@ -196,12 +196,12 @@ package bls12_381_pkg;
dbl_jb_point.z = Z;
return dbl_jb_point;
endfunction
function fp2_jb_point_t dbl_fp2_jb_point(input fp2_jb_point_t p);
fe2_t I_X, I_Y, I_Z, A, B, C, D, X, Y, Z;
if (p.z == 0) return p;
I_X = p.x;
I_Y = p.y;
I_Z = p.z;
@ -215,52 +215,52 @@ package bls12_381_pkg;
Y = fe2_mul(D, fe2_sub(B, X));
Y = fe2_sub(Y, C);
Z = fe2_mul(fe2_mul(2, I_Y), I_Z);
dbl_fp2_jb_point.x = X;
dbl_fp2_jb_point.y = Y;
dbl_fp2_jb_point.z = Z;
return dbl_fp2_jb_point;
endfunction
endfunction
function fp2_jb_point_t add_fp2_jb_point(fp2_jb_point_t p1, p2);
fe2_t A, U1, U2, S1, S2, H, H3, R;
if (p1.z == 0) return p2;
if (p2.z == 0) return p1;
if (p1.y == p2.y && p1.x == p2.x)
return (dbl_fp2_jb_point(p1));
U1 = fe2_mul(p1.x, p2.z);
U1 = fe2_mul(U1, p2.z);
U2 = fe2_mul(p2.x, p1.z);
U2 = fe2_mul(U2, p1.z);
S1 = fe2_mul(p1.y, p2.z);
S1 = fe2_mul(fe2_mul(S1, p2.z), p2.z);
S2 = fe2_mul(p2.y, p1.z);
S2 = fe2_mul(fe2_mul(S2, p1.z), p1.z);
H = fe2_sub(U2, U1);
R = fe2_sub(S2, S1);
H3 = fe2_mul(fe2_mul(H, H), H);
A = fe2_mul(fe2_mul(fe2_mul(2, U1), H), H);
add_fp2_jb_point.z = fe2_mul(fe2_mul(H, p1.z), p2.z);
add_fp2_jb_point.x = fe2_mul(R, R);
add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, H3);
add_fp2_jb_point.x = fe2_sub(add_fp2_jb_point.x, A);
A = fe2_mul(fe2_mul(U1, H), H);
A = fe2_sub(A, add_fp2_jb_point.x);
A = fe2_mul(A, R);
add_fp2_jb_point.y = fe2_mul(S1, H3);
add_fp2_jb_point.y = fe2_sub(A, add_fp2_jb_point.y);
endfunction
function jb_point_t point_mult(logic [DAT_BITS-1:0] c, jb_point_t p);
jb_point_t result, addend;
result = 0;
@ -275,15 +275,29 @@ package bls12_381_pkg;
return result;
endfunction
function fp2_jb_point_t fp2_point_mult(logic [DAT_BITS-1:0] c, fp2_jb_point_t p);
fp2_jb_point_t result, addend;
result = 0;
addend = p;
while (c > 0) begin
if (c[0]) begin
result = add_fp2_jb_point(result, addend);
end
addend = dbl_fp2_jb_point(addend);
c = c >> 1;
end
return result;
endfunction
function on_curve(jb_point_t p);
return (p.y*p.y - p.x*p.x*p.x - secp256k1_pkg::a*p.x*p.z*p.z*p.z*p.z - secp256k1_pkg::b*p.z*p.z*p.z*p.z*p.z*p.z);
endfunction
// Inversion using extended euclidean algorithm
function fe_t fe_inv(fe_t a, b = 1);
fe_t u, v;
logic [$bits(fe_t):0] x1, x2;
u = a; v = P;
x1 = b; x2 = 0;
while (u != 1 && v != 1) begin
@ -299,7 +313,7 @@ package bls12_381_pkg;
if (x2 % 2 == 0)
x2 = x2 / 2;
else
x2 = (x2 + P) / 2;
x2 = (x2 + P) / 2;
end
if (u >= v) begin
u = u - v;
@ -311,15 +325,15 @@ package bls12_381_pkg;
end
if (u == 1)
return x1;
else
else
return x2;
endfunction
endfunction
// This algorithm can also be used for division
function fe_t fe_div(fe_t a, b);
return fe_inv(a, b);
endfunction
function fe2_t fe2_inv(fe2_t a);
fe_t factor, t0, t1;
t0 = fe_mul(a.c0, a.c0);
@ -328,7 +342,7 @@ package bls12_381_pkg;
fe2_inv.c0 = fe_mul(a.c0, factor);
fe2_inv.c1 = fe_mul(fe_sub(P, a.c1), factor);
endfunction
function jb_point_t to_affine(jb_point_t p);
fe_t z_;
z_ = fe_mul(p.z, p.z);
@ -337,7 +351,7 @@ package bls12_381_pkg;
z_ = fe_mul(z_, p.z);
to_affine.y = fe_mul(p.y, fe_inv(z_));
endfunction
function fp2_jb_point_t fp2_to_affine(fp2_jb_point_t p);
fe2_t z_;
z_ = fe2_mul(p.z, p.z);
@ -346,19 +360,19 @@ package bls12_381_pkg;
z_ = fe2_mul(z_, p.z);
fp2_to_affine.y = fe2_mul(p.y, fe2_inv(z_));
endfunction
function print_jb_point(jb_point_t p);
$display("x:%h", p.x);
$display("y:%h", p.y);
$display("z:%h", p.z);
return;
endfunction
function print_fp2_jb_point(fp2_jb_point_t p);
$display("x:(c1:%h, c0:%h)", p.x.c1, p.x.c0);
$display("y:(c1:%h, c0:%h)", p.y.c1, p.y.c0);
$display("z:(c1:%h, c0:%h)", p.z.c1, p.z.c0);
return;
endfunction
endfunction
endpackage