Enable bls12-381 point multiplication using the miller loop of the

pairing engine instead of a dedicated unit to save area on the FPGA.
Updated the bls12-381 testbench.
This commit is contained in:
bsdevlin 2019-08-20 21:05:08 +08:00
parent 11bde6e1a8
commit d437d3a165
5 changed files with 211 additions and 186 deletions

View File

@ -40,12 +40,14 @@ module bls12_381_pairing
)(
input i_clk, i_rst,
// Inputs
input i_val,
output logic o_rdy,
input G1_FP_AF_TYPE i_g1_af,
input G2_FP_AF_TYPE i_g2_af,
input i_val,
input i_mode, // 0 == ate pairing, 1 == only point multiplication
input FE_TYPE i_key, // Input key when in mode == 1
output logic o_rdy,
input G1_FP_AF_TYPE i_g1_af,
input G2_FP_AF_TYPE i_g2_af,
if_axi_stream.source o_fe12_if,
output G2_FP_JB_TYPE o_pt_jb,
if_axi_stream.source o_p_jb_if, // Output point if we did a point multiplication
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if,
@ -92,7 +94,7 @@ if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) final_exp_fe12_o
logic dbl_i_val, dbl_o_rdy;
logic add_i_val, add_o_rdy;
logic wait_dbl, wait_add;
logic wait_dbl, wait_add, stage_done;
G1_FP_AF_TYPE g1_af_i;
G2_FP_JB_TYPE g2_r_jb_i, add_g2_o, dbl_g2_o;
@ -101,29 +103,26 @@ G2_FP_AF_TYPE g2_af_i;
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_f12_o_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) dbl_f12_o_if (i_clk);
logic [$clog2(ATE_X_START)-1:0] ate_loop_cnt;
logic [$clog2($bits(FE_TYPE))-1:0] ate_loop_cnt;
logic [1:0] miller_mult_cnt;
enum {IDLE, POINT_MULT, MILLER_LOOP, FINAL_EXP} pair_state;
enum {IDLE, POINT_MULT_DBL, POINT_MULT_ADD, POINT_MULT_DONE, MILLER_LOOP, FINAL_EXP} pair_state;
FE12_TYPE f;
logic f_val;
logic [3:0] out_cnt;
logic point_mul_mode;
logic point_mul_mode, found_one;
FE_TYPE key;
logic [$bits(FE_TYPE)/32-1:0] key_zero;
always_comb begin
dbl_f12_o_if.rdy = f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4)); // As this is a sparse f12 using full f12_mul
add_f12_o_if.rdy = f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4)); // As this is a sparse f12 using full f12_mul
dbl_f12_o_if.rdy = pair_state == POINT_MULT_DBL || (f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4))); // As this is a sparse f12 using full f12_mul
add_f12_o_if.rdy = pair_state == POINT_MULT_ADD || (f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4))); // As this is a sparse f12 using full f12_mul
final_exp_fe12_o_if.dat = f[0][0][0];
final_exp_fe12_o_if.err = 0;
final_exp_fe12_o_if.ctl = 0;
final_exp_fe12_o_if.mod = 0;
o_pt_jb = g2_r_jb_i;
end
always_ff @ (posedge i_clk) begin
@ -150,7 +149,10 @@ always_ff @ (posedge i_clk) begin
point_mul_mode <= 0;
key <= 0;
key_zero <= 0;
found_one <= 0;
stage_done <= 0;
o_p_jb_if.reset_source();
end else begin
@ -167,8 +169,7 @@ always_ff @ (posedge i_clk) begin
case(pair_state)
IDLE: begin
ate_loop_cnt <= ATE_X_START-1;
//for (int i = 0key <= ATE_X;
ate_loop_cnt <= i_mode == 0 ? ATE_X_START-1 : $bits(FE_TYPE)-1;
f <= FE12_one;
add_i_val <= 0;
dbl_i_val <= 0;
@ -178,9 +179,12 @@ always_ff @ (posedge i_clk) begin
f_val <= 0;
o_rdy <= 1;
miller_mult_cnt <= 0;
found_one <= 0;
stage_done <= 0;
if (i_val && o_rdy) begin
pair_state <= MILLER_LOOP;
point_mul_mode <= 0;
pair_state <= i_mode == 0 ? MILLER_LOOP : POINT_MULT_DBL;
key <= i_key;
point_mul_mode <= i_mode;
o_rdy <= 0;
g1_af_i <= i_g1_af;
@ -200,7 +204,6 @@ always_ff @ (posedge i_clk) begin
if (wait_dbl && dbl_f12_o_if.val && dbl_f12_o_if.sop && dbl_f12_o_if.rdy) begin
g2_r_jb_i <= dbl_g2_o;
// key[0] == 1?
if (~wait_add && ATE_X[ate_loop_cnt] == 1) begin
add_i_val <= 1;
wait_add <= 1;
@ -243,7 +246,6 @@ always_ff @ (posedge i_clk) begin
f_val <= 0;
out_cnt <= 0;
miller_mult_cnt <= ATE_X[ate_loop_cnt] == 0 ? 3 : 2;
//key[0] == 0 ?
end
end
end
@ -278,10 +280,6 @@ always_ff @ (posedge i_clk) begin
f_val <= 0;
wait_add <= 0;
miller_mult_cnt <= 0;
key <= key >> 1;
if (&key_zero) begin
end
ate_loop_cnt <= ate_loop_cnt - 1;
if (ate_loop_cnt == 0) begin
pair_state <= FINAL_EXP;
@ -303,6 +301,65 @@ always_ff @ (posedge i_clk) begin
end
end
end
POINT_MULT_DBL: begin
if(found_one == 0) begin
key <= key << 1;
ate_loop_cnt <= ate_loop_cnt - 1;
found_one = key[$bits(FE_TYPE)-1];
end else begin
if (~wait_dbl) begin
wait_dbl <= 1;
dbl_i_val <= 1;
end
if (dbl_f12_o_if.val) begin
wait_dbl <= 0;
dbl_i_val <= 0;
g2_r_jb_i <= dbl_g2_o;
if (key[$bits(FE_TYPE)-1] == 1) begin
pair_state <= POINT_MULT_ADD;
end else if (ate_loop_cnt == 0) begin
pair_state <= POINT_MULT_DONE;
end else begin
ate_loop_cnt <= ate_loop_cnt - 1;
key <= key << 1;
end
end
end
end
POINT_MULT_ADD: begin
if (~wait_add) begin
wait_add <= 1;
add_i_val <= 1;
end
if (add_f12_o_if.val) begin
wait_add <= 0;
add_i_val <= 0;
g2_r_jb_i <= add_g2_o;
if (ate_loop_cnt == 0) begin
pair_state <= POINT_MULT_DONE;
end else begin
ate_loop_cnt <= ate_loop_cnt - 1;
key <= key << 1;
pair_state <= POINT_MULT_DBL;
end
end
end
POINT_MULT_DONE: begin
if (~o_p_jb_if.val || (o_p_jb_if.val && o_p_jb_if.rdy)) begin
o_p_jb_if.val <= 1;
o_p_jb_if.sop <= out_cnt == 0;
o_p_jb_if.eop <= out_cnt == 5;
o_p_jb_if.dat <= g2_r_jb_i;
out_cnt <= out_cnt + 1;
g2_r_jb_i <= g2_r_jb_i >> $bits(FE_TYPE);
if (o_p_jb_if.val && o_p_jb_if.rdy && o_p_jb_if.eop) begin
pair_state <= IDLE;
out_cnt <= 0;
o_p_jb_if.val <= 0;
end
end
end
endcase
end

View File

@ -36,7 +36,10 @@ module bls12_381_pairing_wrapper
output logic o_rdy,
input G1_FP_AF_TYPE i_g1_af, // G1 input point
input G2_FP_AF_TYPE i_g2_af, // G2 input point
if_axi_stream.source o_fe12_if, // Result fe12 of ate pairing
input i_mode, // 0 == ate pairing, 1 == only point multiplication
input FE_TYPE i_key, // Input key when in mode == 1
if_axi_stream.source o_fe12_if, // Result fe12 of ate pairing (or point mult)
if_axi_stream.source o_p_jb_if, // Result of point multiplication
// Interface to FE_TYPE multiplier (mod P)
if_axi_stream.source o_mul_fe_if,
if_axi_stream.sink i_mul_fe_if,
@ -94,7 +97,10 @@ bls12_381_pairing (
.o_rdy ( o_rdy ),
.i_g1_af ( i_g1_af ),
.i_g2_af ( i_g2_af ),
.i_mode ( i_mode ),
.i_key ( i_key ),
.o_fe12_if ( o_fe12_if ),
.o_p_jb_if ( o_p_jb_if ),
.o_mul_fe2_if ( mul_fe2_o_if[1] ),
.i_mul_fe2_if ( mul_fe2_i_if[1] ),
.o_add_fe_if ( add_fe_o_if[4] ),

View File

@ -147,6 +147,9 @@ package bls12_381_pkg;
fp2_jb_point_t g2_point = '{x:G2x, y:G2y, z:FE2_one};
fp2_jb_point_t g_point_fp2 = '{x:{381'd0, Gx}, y:{381'd0, Gy}, z:FE2_one}; // Fp Generator point used in dual mode point multiplication
fp2_af_point_t g2_af_point_fp2 = '{x:G2x, y:G2y};
fp2_af_point_t g_af_point_fp2 = '{x:{381'd0, Gx}, y:{381'd0, Gy}};
// Instruction codes
typedef enum logic [7:0] {
@ -608,32 +611,57 @@ package bls12_381_pkg;
// P is an affine Fp point in G1
// Q is an affine Fp^2 point in G2 on the twisted curve
// f is a Fp^12 element, the result of the miller loop
task miller_loop(input af_point_t P, input fp2_af_point_t Q, output fe12_t f, output fp2_jb_point_t R);
task miller_loop(input af_point_t P, input fp2_af_point_t Q, output fe12_t f);
fe12_t lv_d, lv_a, f_sq;
fe_t key;
key = ATE_X;
fp2_jb_point_t R;
f = FE12_one;
R.x = Q.x;
R.y = Q.y;
R.z = 1;
for (int i = ATE_X_START-1; i >= 0; i--) begin
f_sq = fe12_sqr(f); // Full multiplication
miller_double_step(R, P, lv_d);
f = fe12_mul(f_sq, lv_d); // Sparse multiplication
if (key[i] == 1) begin
if (ATE_X[i] == 1) begin
miller_add_step(R, Q, P, lv_a);
f = fe12_mul(f, lv_a); // Sparse multiplication
end
end
endtask
// This uses the miller loop functions to do a point multiplication
task miller_loop_point_mult(input fp2_af_point_t Q, input fe_t k, output fp2_jb_point_t R);
fe12_t f;
af_point_t P;
logic found_one;
P.x = 0;
P.y = 0;
found_one = 0;
R.x = FE2_zero;
R.y = FE2_zero;
R.z = FE2_one;
print_fe12(f);
for (int i = $bits(fe_t)-1; i >= 0; i--) begin
if (~found_one) begin
found_one |= k[i];
R.x = Q.x;
R.y = Q.y;
R.z = FE2_one;
end else begin
miller_double_step(R, P, f);
if (k[i] == 1) begin
miller_add_step(R, Q, P, f);
end
end
end
endtask
task automatic ate_pairing(input af_point_t P, input fp2_af_point_t Q, ref fe12_t f);
fp2_jb_point_t R; // This is only used for point multiplication
miller_loop(P, Q, f, R);
miller_loop(P, Q, f);
final_exponent(f);
endtask;

View File

@ -65,15 +65,15 @@ if_axi_stream #(.DAT_BYTS(8)) interrupt_out_if(i_clk);
if_axi_stream #(.DAT_BYTS(3)) idx_in_if(i_clk);
if_axi_stream #(.DAT_BYTS(3)) idx_out_if(i_clk);
// Fp2 point multiplication
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t)), .CTL_BITS(DAT_BITS)) fp2_pt_mul_in_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) fp2_pt_mul_out_if(i_clk);
logic fp_pt_mult_mode;
// Point multiplication
logic pair_mode;
fe_t pair_key;
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mult_pt_if (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(bls12_381_pkg::fp2_jb_point_t))) add_i_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) add_o_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) dbl_i_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) dbl_o_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) add_o_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) dbl_i_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) dbl_o_if(i_clk);
localparam CTL_BITS = 128;
// Access to shared 381bit multiplier / adder / subtractor
@ -82,7 +82,7 @@ localparam CTL_BITS = 128;
// Fp6 23:16
// Top level muxes 31:24
// 67:32 Pairing engine - TODO conslidate the logic used here with the point multiplication
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_in_if [2:0] (i_clk) ;
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_in_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_out_if [2:0] (i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_in_if (i_clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_out_if (i_clk);
@ -125,8 +125,6 @@ always_ff @ (posedge i_clk) begin
data_ram_sys_if.a <= 0;
data_ram_sys_if.re <= 1;
data_ram_sys_if.en <= 1;
fp2_pt_mul_out_if.rdy <= 0;
fp2_pt_mul_in_if.reset_source();
inst_ram_read <= 0;
data_ram_read <= 0;
cnt <= 0;
@ -137,7 +135,6 @@ always_ff @ (posedge i_clk) begin
inst_state <= NOOP_WAIT;
pt_l <= SCALAR;
new_data <= 0;
fp_pt_mult_mode <= 0;
pt_size <= 0;
idx_in_if.reset_source();
interrupt_in_if.reset_source();
@ -157,6 +154,10 @@ always_ff @ (posedge i_clk) begin
pair_i_val <= 0;
pair_i_g1 <= 0;
pair_i_g2 <= 0;
pair_mode <= 0;
pair_key <= 0;
mult_pt_if.rdy <= 0;
end else begin
@ -178,7 +179,6 @@ always_ff @ (posedge i_clk) begin
data_ram_sys_if.we <= 0;
data_ram_read <= data_ram_read << 1;
if (fp2_pt_mul_in_if.rdy) fp2_pt_mul_in_if.val <= 0;
if (inv_fe_o_if.rdy) inv_fe_o_if.val <= 0;
if (inv_fe2_o_if.rdy) inv_fe2_o_if.val <= 0;
if (add_in_if.rdy) add_in_if.val <= 0;
@ -186,7 +186,7 @@ always_ff @ (posedge i_clk) begin
if (mul_in_if[1].rdy) mul_in_if[1].val <= 0;
if (pair_o_rdy) pair_i_val <= 0;
fp2_pt_mul_out_if.rdy <= 1;
mult_pt_if.rdy <= 1;
if (idx_in_if.val && idx_in_if.rdy) idx_in_if.val <= 0;
if (interrupt_in_if.val && interrupt_in_if.rdy) interrupt_in_if.val <= 0;
@ -299,7 +299,10 @@ bls12_381_pairing_wrapper (
.o_rdy ( pair_o_rdy ),
.i_g1_af ( pair_i_g1 ),
.i_g2_af ( pair_i_g2 ),
.i_mode ( pair_mode ),
.i_key ( pair_key ),
.o_fe12_if ( pair_o_res_if ),
.o_p_jb_if ( mult_pt_if ),
.o_mul_fe_if ( mul_in_if[0] ),
.i_mul_fe_if ( mul_out_if[0] ),
.o_inv_fe2_if ( inv_fe2_i_if ),
@ -314,7 +317,7 @@ resource_share # (
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 120 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
.PIPELINE_OUT ( 0 )
)
resource_share_mul (
.i_clk ( i_clk ),
@ -709,85 +712,61 @@ task task_inv_element();
endtask
task task_point_mult();
fp2_pt_mul_out_if.rdy <= 0;
pair_mode <= 1;
case(cnt) inside
0: begin
data_ram_sys_if.a <= curr_inst.a;
data_ram_read[0] <= 1;
cnt <= cnt + 1;
end
1: begin
if (|data_ram_read == 0) data_ram_read[0] <= 1;
if (data_ram_read[READ_CYCLE]) begin
cnt <= cnt + 1;
pair_key <= curr_data.dat;
data_ram_sys_if.a <= curr_inst.b;
data_ram_read[0] <= 1;
pt_size <= 0;
fp2_pt_mul_in_if.ctl <= curr_data.dat;
fp2_pt_mul_in_if.dat <= {FE2_one, {DAT_BITS*4{1'd0}}}; // This is in case we use affine coordinates
cnt <= cnt + 1;
end
end
2: begin
1,2,3,4: begin
if (data_ram_read[READ_CYCLE]) begin
fp_pt_mult_mode <= (curr_data.pt == FP_AF) || (curr_data.pt == FP_JB);
if (curr_data.pt == FP2_JB || curr_data.pt == FP2_AF) begin
fp2_pt_mul_in_if.dat[DAT_BITS*pt_size +: DAT_BITS] <= curr_data.dat;
end else begin
fp2_pt_mul_in_if.dat[2*DAT_BITS*pt_size +: 2*DAT_BITS] <= {(DAT_BITS)'(0), curr_data.dat};
end
if (pt_size == get_point_type_size(curr_data.pt)-1) begin
data_ram_read[0] <= 1;
data_ram_sys_if.a <= data_ram_sys_if.a + 1;
if (curr_data.pt == FP_AF && cnt % 2 == 0) data_ram_sys_if.a <= data_ram_sys_if.a;
case(cnt)
1: pair_i_g2.x[0] <= curr_data.dat;
2: pair_i_g2.x[1] <= curr_data.pt == FP_AF ? 0 : curr_data.dat;
3: pair_i_g2.y[0] <= curr_data.dat;
4: pair_i_g2.y[1] <= curr_data.pt == FP_AF ? 0 : curr_data.dat;
endcase
cnt <= cnt + 1;
if (cnt == 1) pt_l <= curr_data.pt;
if (cnt == 4) begin
pair_i_val <= 1;
data_ram_sys_if.a <= curr_inst.c;
if (curr_data.pt == FP2_AF || curr_data.pt == FP2_JB)
cnt <= 6;
else
cnt <= 3;
fp2_pt_mul_in_if.val <= 1;
end else begin
pt_size <= pt_size + 1;
data_ram_sys_if.a <= data_ram_sys_if.a + 1;
data_ram_read[0] <= 1;
end
end
end
// Wait for result of FP_JB
3,4,5: begin
if (fp2_pt_mul_out_if.val) begin
new_data.pt <= FP_JB;
new_data.dat <= fp2_pt_mul_out_if.dat >> ((cnt-3)*2*DAT_BITS);
// Wait for result
5,6,7,8,9,10: begin
mult_pt_if.rdy <= 1;
if (mult_pt_if.val) begin
new_data.pt <= pt_l == FP_AF ? FP_JB : FP2_JB;
new_data.dat <= mult_pt_if.dat;
data_ram_sys_if.we <= 1;
if (cnt > 3) data_ram_sys_if.a <= data_ram_sys_if.a + 1;
cnt <= cnt + 1;
if (cnt == 5) begin
fp2_pt_mul_out_if.rdy <= 1;
cnt <= 12;
if (cnt > 5) data_ram_sys_if.a <= data_ram_sys_if.a + 1;
if (pt_l == FP_AF && cnt % 2 == 0) begin // Even elements will be 0 for FP points
data_ram_sys_if.a <= data_ram_sys_if.a;
data_ram_sys_if.we <= 0;
end
end
end
// Wait for result of FP2_JB
6,7,8,9,10,11: begin
if (fp2_pt_mul_out_if.val) begin
new_data.pt <= FP2_JB;
new_data.dat <= fp2_pt_mul_out_if.dat >> ((cnt-6)*DAT_BITS);
data_ram_sys_if.we <= 1;
if (cnt > 6) data_ram_sys_if.a <= data_ram_sys_if.a + 1;
cnt <= cnt + 1;
if (cnt == 11) begin
fp2_pt_mul_out_if.rdy <= 1;
cnt <= 12;
end
end
end
12: begin
11: begin
pair_mode <= 0;
get_next_inst();
end
endcase
endcase
endtask
task task_fp_fpoint_mult();
fp2_pt_mul_out_if.rdy <= 0;
fp_pt_mult_mode <= 1;
pair_mode <= 1;
case(cnt) inside
0: begin
data_ram_sys_if.a <= curr_inst.a;
@ -797,34 +776,36 @@ task task_fp_fpoint_mult();
1: begin
if (data_ram_read[READ_CYCLE]) begin
data_ram_sys_if.a <= curr_inst.b;
fp2_pt_mul_in_if.ctl <= curr_data.dat;
fp2_pt_mul_in_if.dat <= g_point_fp2;
fp2_pt_mul_in_if.val <= 1;
pair_key <= curr_data.dat;
pair_i_g2 <= bls12_381_pkg::g_af_point_fp2;
pair_i_val <= 1;
cnt <= cnt + 1;
end
end
// Wait for result
2,3,4: begin
if (fp2_pt_mul_out_if.val) begin
2,3,4,5,6,7: begin
mult_pt_if.rdy <= 1;
if (mult_pt_if.val) begin
new_data.pt <= FP_JB;
new_data.dat <= fp2_pt_mul_out_if.dat >> ((cnt-2)*2*DAT_BITS);
new_data.dat <= mult_pt_if.dat;
data_ram_sys_if.we <= 1;
if (cnt > 2) data_ram_sys_if.a <= data_ram_sys_if.a + 1;
cnt <= cnt + 1;
if (cnt == 4) begin
fp2_pt_mul_out_if.rdy <= 1;
if (cnt % 2 == 1) begin // Odd elements will be 0
data_ram_sys_if.a <= data_ram_sys_if.a;
data_ram_sys_if.we <= 0;
end
cnt <= cnt + 1;
end
end
5: begin
8: begin
pair_mode <= 0;
get_next_inst();
end
endcase
endtask
task task_fp2_fpoint_mult();
fp2_pt_mul_out_if.rdy <= 0;
fp_pt_mult_mode <= 0;
pair_mode <= 1;
case(cnt) inside
0: begin
data_ram_sys_if.a <= curr_inst.a;
@ -834,32 +815,32 @@ task task_fp2_fpoint_mult();
1: begin
if (data_ram_read[READ_CYCLE]) begin
data_ram_sys_if.a <= curr_inst.b;
fp2_pt_mul_in_if.ctl <= curr_data.dat;
fp2_pt_mul_in_if.dat <= bls12_381_pkg::g2_point;
fp2_pt_mul_in_if.val <= 1;
pair_key <= curr_data.dat;
pair_i_g2 <= bls12_381_pkg::g2_af_point_fp2;
pair_i_val <= 1;
cnt <= cnt + 1;
end
end
// Wait for result
2,3,4,5,6,7: begin
if (fp2_pt_mul_out_if.val) begin
mult_pt_if.rdy <= 1;
if (mult_pt_if.val) begin
new_data.pt <= FP2_JB;
new_data.dat <= fp2_pt_mul_out_if.dat >> ((cnt-2)*DAT_BITS);
new_data.dat <= mult_pt_if.dat;
data_ram_sys_if.we <= 1;
if (cnt > 2) data_ram_sys_if.a <= data_ram_sys_if.a + 1;
cnt <= cnt + 1;
if (cnt == 7) begin
fp2_pt_mul_out_if.rdy <= 1;
end
end
end
8: begin
pair_mode <= 0;
get_next_inst();
end
endcase
endtask
task task_pairing();
pair_mode <= 0;
case(cnt) inside
0: begin
data_ram_sys_if.a <= curr_inst.a;

View File

@ -63,10 +63,11 @@ begin
jb_point_t out_p, exp_p;
logic [DAT_BITS-1:0] in_k;
bls12_381_interrupt_rpl_t interrupt_rpl;
failed = 0;
in_k = 381'h10;//381'haaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa;
in_k = 381'haaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa;
exp_p = point_mult(in_k, g_point);
$display("Running test_fp_fpoint_mult...");
axi_lite_if.peek(.addr(0), .data(rdata));
@ -95,10 +96,6 @@ begin
fork
begin
out_if.get_stream(get_dat, get_len, 50);
/*while(1) begin
out_if.rdy = ~out_if.rdy;
@(posedge clk);
end*/
interrupt_rpl = get_dat;
assert(interrupt_rpl.hdr.cmd == BLS12_381_INTERRUPT_RPL) else $fatal(1, "ERROR: Received non-interrupt message");
@ -110,7 +107,7 @@ begin
for (int i = 0; i < 3; i++)
out_p[i*381 +: 381] = get_dat[i*(48*8) +: 381];
if (out_p == exp_p) begin
if (to_affine(out_p) == to_affine(exp_p)) begin
$display("INFO: Output point matched expected:");
print_jb_point(out_p);
end else begin
@ -122,7 +119,7 @@ begin
end
end
begin
repeat(80000) @(posedge out_if.i_clk);
repeat(1000000) @(posedge out_if.i_clk);
$fatal("ERROR: Timeout while waiting for result");
end
join_any
@ -182,7 +179,7 @@ begin
for (int i = 0; i < 6; i++)
out_p[i*381 +: 381] = get_dat[i*(48*8) +: 381];
if (out_p == exp_p) begin
if (fp2_to_affine(out_p) == fp2_to_affine(exp_p)) begin
$display("INFO: Output point matched expected:");
print_fp2_jb_point(out_p);
end else begin
@ -194,7 +191,7 @@ begin
end
end
begin
repeat(100000) @(posedge out_if.i_clk);
repeat(1000000) @(posedge out_if.i_clk);
$fatal("ERROR: Timeout while waiting for result");
end
join_any
@ -524,7 +521,7 @@ task test_point_mult();
axi_lite_if.poke(.addr(32'h0), .data(2'b11));
axi_lite_if.poke(.addr(32'h10), .data(0));
for (int i = 0; i < 4; i++) begin
for (int i = 0; i < 2; i++) begin
in_k = random_vector(384/8) % P;
p_in = 0;
p2_in = 0;
@ -558,26 +555,9 @@ task test_point_mult();
p_exp = point_mult(in_k, p_in);
end
// FP_JB
1: begin
p_in.z = random_vector(384/8) % P;
data = '{dat:p_in.x, pt:FP_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 1*64), .len(48));
data = '{dat:p_in.y, pt:FP_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 2*64), .len(48));
data = '{dat:p_in.z, pt:FP_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 3*64), .len(48));
p_exp = point_mult(in_k, p_in);
end
// FP2_AF
2: begin
1: begin
p2_in.z = FE2_one;
@ -596,33 +576,6 @@ task test_point_mult();
p2_exp = fp2_point_mult(in_k, p2_in);
end
// FP2_JB
3: begin
p2_in.z[0] = random_vector(384/8) % P;
p2_in.z[1] = random_vector(384/8) % P;
data = '{dat:p2_in.x[0], pt:FP2_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 1*64), .len(48));
data = '{dat:p2_in.x[1], pt:FP2_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 2*64), .len(48));
data = '{dat:p2_in.y[0], pt:FP2_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 3*64), .len(48));
data = '{dat:p2_in.y[1], pt:FP2_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 4*64), .len(48));
data = '{dat:p2_in.z[0], pt:FP2_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 5*64), .len(48));
data = '{dat:p2_in.z[1], pt:FP2_JB};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 6*64), .len(48));
p2_exp = fp2_point_mult(in_k, p2_in);
end
endcase
inst = '{code:POINT_MULT, a:16'd0, b:16'd1, c:16'd10};
@ -639,13 +592,13 @@ task test_point_mult();
assert(interrupt_rpl.hdr.cmd == BLS12_381_INTERRUPT_RPL) else $fatal(1, "ERROR: Received non-interrupt message");
assert(interrupt_rpl.index == i) else $fatal(1, "ERROR: Received wrong index value in message");
if (i == 0 || i == 1) begin
if (i == 0) begin
assert(interrupt_rpl.data_type == FP_JB) else $fatal(1, "ERROR: Received wrong data type value in message");
p_out = 0;
for (int i = 0; i < 3; i++) p_out[i*381 +: 381] = get_dat[i*(48*8) +: 381];
if (p_out == p_exp) begin
if (to_affine(p_out) == to_affine(p_exp)) begin
$display("INFO: Output element matched expected:");
print_jb_point(p_out);
end else begin
@ -662,7 +615,7 @@ task test_point_mult();
p2_out = 0;
for (int i = 0; i < 6; i++) p2_out[i*381 +: 381] = get_dat[i*(48*8) +: 381];
if (p2_out == p2_exp) begin
if (fp2_to_affine(p2_out) == fp2_to_affine(p2_exp)) begin
$display("INFO: Output element matched expected:");
print_fp2_jb_point(p2_out);
end else begin
@ -675,7 +628,7 @@ task test_point_mult();
end
end
begin
repeat(100000) @(posedge out_if.i_clk);
repeat(1000000) @(posedge out_if.i_clk);
$fatal("ERROR: Timeout while waiting for result");
end
join_any
@ -805,11 +758,11 @@ initial begin
out_if.rdy = 0;
#100ns;
//test_fp_fpoint_mult();
//test_fp2_fpoint_mult();
test_fp_fpoint_mult();
test_fp2_fpoint_mult();
test_inv_element();
test_mul_add_sub_element();
//test_point_mult();
test_point_mult();
test_pairing();