Updates to secp256k1 core

This commit is contained in:
bsdevlin 2019-03-23 18:25:42 -04:00
parent e014bba045
commit f95ffeab0c
8 changed files with 466 additions and 95 deletions

View File

@ -1,7 +1,7 @@
/*
Multiplication using Karatsuba-Ofman algorithm.
Multiple of these can be instantiated, each one takes 2 clocks cycles
Multiple of these can be instantiated, each one takes 3 clocks cycles
per level. Fully pipelined so can accept a new input every clock.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@ -42,17 +42,31 @@ localparam HBITS = BITS/2;
logic [BITS-1:0] m0, m1, m2, dat_a, dat_b;
logic [BITS*2-1:0] q;
logic [HBITS-1:0] a0, a1;
logic sign, sign_;
logic val;
logic [CTL_BITS-1:0] ctl;
logic sign, sign_, sign_1;
logic val, val_, val_1;
logic [CTL_BITS-1:0] ctl, ctl_, ctl_1;
logic [HBITS-1:0] a0_, a1_;
logic [BITS-1:0] m0_, m1_, m2_;
always_ff @ (posedge i_clk) begin
dat_a <= i_dat_a;
dat_b <= i_dat_b;
o_dat <= q;
o_val <= val;
o_ctl <= ctl;
o_val <= val_1;
o_ctl <= ctl_1;
val_ <= val;
val_1 <= val_;
ctl_ <= ctl;
ctl_1 <= ctl_;
a0_ <= a0;
a1_ <= a1;
m0_ <= m0;
m1_ <= m1;
m2_ <= m2;
end
generate
@ -61,27 +75,29 @@ generate
a1 = i_dat_b[HBITS +: HBITS] > i_dat_b[0 +: HBITS] ? i_dat_b[HBITS +: HBITS] - i_dat_b[0 +: HBITS] : i_dat_b[0 +: HBITS] - i_dat_b[HBITS +: HBITS];
sign_ = ((i_dat_a[0 +: HBITS] < i_dat_a[HBITS +: HBITS]) ^
(i_dat_b[HBITS +: HBITS] < i_dat_b[0 +: HBITS]));
q = (m0 << BITS) + ((m0 + m2 + (sign == 1 ? -m1 : m1)) << HBITS) + m2;
q = (m0_ << BITS) + ((m0_ + m2_ + (sign == 1 ? -m1_ : m1_)) << HBITS) + m2_;
end
if (LEVEL == 1) begin: GEN_REC
always_comb begin
m0 = i_dat_a[HBITS +: HBITS] * i_dat_b[HBITS +: HBITS];
m2 = i_dat_a[0 +: HBITS] * i_dat_b[0 +: HBITS];
m1 = (a0 * a1);
sign = sign_;
m0 = dat_a[HBITS +: HBITS] * dat_b[HBITS +: HBITS];
m2 = dat_a[0 +: HBITS] * dat_b[0 +: HBITS];
m1 = (a0_ * a1_);
o_rdy = i_rdy;
val = i_val;
ctl = i_ctl;
end
always_ff @ (posedge i_clk) begin
sign <= sign_1;
sign_1 <= sign_;
end
end else begin
// pipeline the other non-mult values x clock cycles and add them after multipliers
logic [LEVEL-2:0] sign_r;
logic [LEVEL*3-1:0] sign_r;
always_comb begin
sign = sign_r[LEVEL-2];
sign = sign_r[LEVEL*3-2];
end
always_ff @ (posedge i_clk) begin
@ -95,8 +111,8 @@ generate
)
karatsuba_ofman_mult_m0 (
.i_clk ( i_clk ),
.i_dat_a ( i_dat_a[HBITS +: HBITS] ),
.i_dat_b ( i_dat_b[HBITS +: HBITS] ),
.i_dat_a ( dat_a[HBITS +: HBITS] ),
.i_dat_b ( dat_b[HBITS +: HBITS] ),
.i_val ( i_val ),
.o_val ( val ),
.i_ctl ( i_ctl ),
@ -113,8 +129,8 @@ generate
)
karatsuba_ofman_mult_m2 (
.i_clk ( i_clk ),
.i_dat_a ( i_dat_a[0 +: HBITS] ),
.i_dat_b ( i_dat_b[0 +: HBITS] ),
.i_dat_a ( dat_a[0 +: HBITS] ),
.i_dat_b ( dat_b[0 +: HBITS] ),
.i_val ( i_val ),
.o_val (),
.i_ctl ( 1'd0 ),
@ -131,8 +147,8 @@ generate
)
karatsuba_ofman_mult_m1 (
.i_clk ( i_clk ),
.i_dat_a ( a0 ),
.i_dat_b ( a1 ),
.i_dat_a ( a0_ ),
.i_dat_b ( a1_ ),
.i_val ( i_val ),
.o_val (),
.i_ctl ( 1'd0 ),

View File

@ -49,8 +49,8 @@ generate
// Optionally pipeline the input
if (PIPELINE == 0) begin: PIPELINE_GEN
always_comb i_axi[g].rdy = rdy[g];
always_comb begin
i_axi[g].rdy = rdy[g];
val[g] = i_axi[g].val;
eop[g] = i_axi[g].eop;
sop[g] = i_axi[g].sop;

View File

@ -69,6 +69,48 @@ karatsuba_ofman_mult (
.o_dat ( out_if.dat )
);
task test_pipeline();
begin
$display("Running test_pipeline...");
fork
begin
logic [255:0] in_a, in_b;
integer i = 1;
integer max = 10;
while (i < max) begin
in_a = i;
in_b = i;
//in_if.put_stream({in_b, in_a}, 512/8, i);
in_if.sop = 1;
in_if.eop = 1;
in_if.ctl = i;
in_if.dat = {in_a, in_b};
in_if.val = 1;
@(posedge in_if.i_clk);
i = i + 1;
end
in_if.val = 0;
end
begin
integer i = 1;
integer max = 10;
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
while (i < max) begin
expected = i*i;
out_if.get_stream(get_dat, get_len);
common_pkg::compare_and_print(get_dat, expected);
$display("test_pipeline PASSED loop %d/%d", i, max);
i = i + 1;
end
end
join
$display("test_pipeline PASSED");
end
endtask;
task test_loop();
begin
integer signed get_len;
@ -104,6 +146,7 @@ initial begin
in_if.val = 0;
#(40*CLK_PERIOD);
test_pipeline();
test_loop();
#1us $finish();

View File

@ -65,6 +65,8 @@ package secp256k1_pkg;
function jb_point_t dbl_jb_point(jb_point_t p);
logic signed [512:0] I_X, I_Y, I_Z, A, B, C, D, X, Y, Z;
if (p.z == 0) return p;
I_X = p.x;
I_Y = p.y;
I_Z = p.z;
@ -83,6 +85,51 @@ package secp256k1_pkg;
return dbl_jb_point;
endfunction
function jb_point_t add_jb_point(jb_point_t p1, p2);
logic signed [512:0] A, U1, U2, S1, S2, H, H3, R;
if (p1.z == 0) return p2;
if (p2.z == 0) return p1;
if (p1.y == p2.y && p1.x == p2.x)
return (dbl_jb_point(p1));
U1 = p1.x*p2.z % p_eq;
U1 = U1*p2.z % p_eq;
U2 = p2.x*p1.z % p_eq;
U2 = U2 *p1.z % p_eq;
S1 = p1.y *p2.z % p_eq;
S1 = (S1*p2.z % p_eq) *p2.z % p_eq;
S2 = p2.y * p1.z % p_eq;
S2 = (S2*p1.z % p_eq) *p1.z % p_eq;
H = U2 + (U1 > U2 ? p_eq : 0) -U1;
R = S2 + (S1 > S2 ? p_eq : 0) -S1;
//$display("R = %x", R);
//$display("H = %x", H);
//$display("H^2 = %x", (H * H %p_eq ));
H3 = ((H * H %p_eq ) * H ) % p_eq;
A = (((2*U1 % p_eq) *H % p_eq) * H % p_eq);
add_jb_point.z = ((H * p1.z % p_eq) * p2.z) % p_eq;
add_jb_point.x = R*R % p_eq;
//$display("R^2 = %x", add_jb_point.x);
//$display("H^3 = %x", H3);
add_jb_point.x = add_jb_point.x + (H3 > add_jb_point.x ? p_eq : 0) - H3;
add_jb_point.x = add_jb_point.x + (A > add_jb_point.x ? p_eq : 0) - A;
A = (U1*H % p_eq) * H % p_eq;
A = A + (add_jb_point.x > A ? p_eq : 0) - add_jb_point.x;
A = A*R % p_eq;
add_jb_point.y = S1*H3 % p_eq;
add_jb_point.y = A + (add_jb_point.y > A ? p_eq : 0) - add_jb_point.y;
endfunction
function on_curve(jb_point_t p);
return (p.y*p.y - p.x*p.x*p.x - secp256k1_pkg::a*p.x*p.z*p.z*p.z*p.z - secp256k1_pkg::b*p.z*p.z*p.z*p.z*p.z*p.z);
endfunction

View File

@ -41,32 +41,54 @@ module secp256k1_point_add
);
/*
* These are the equations that need to be computed, they are issued as variables
* become valid. We have a bitmask to track what equation results are valid which
* will trigger other equations. [] show what equations must be valid before this starts.
* We reuse input points (as they are latched) when possible to reduce register usage.
*
* 0. A = i_p1.y - i_p2.y mod p
* 1. B = i_p1.x - i_p2.x mod p
* 2. o_p.z = B * i_p1.z mod p [eq1]
* 3. i_p1.z = B * B mod p [eq2]
* 4. i_p2.x = A * A mod p [eq0, eq5]
* 5. o_p.x = i_p1.x + i_p2.x mod p
* 6. o_p.x = o_p.x * i_p1.z mod p [eq5, eq3]
* 7. o_p.x = i_p2.x - o_p.x mod p[eq6, eq4]
* 8. o_p.y = i_p1.x*i_p1.z mod p [eq3]
* 9. o_p.y = o_p.y - o_p.x mod p [eq3, eq7, eq8]
* 10. o_p.y = o_p.y * A mod p [eq0, eq9]
* 11. i_p2.y = B * i_p1.z mod p [eq1, eq3, eq0]
* 12. i_p2.y = i_p2.y * i_p1.y [eq11]
* 13. o_p.y = o_p.y - i_p2.y mod p [eq12, eq10]
These are the equations that need to be computed, they are issued as variables
become valid. We have a bitmask to track what equation results are valid which
will trigger other equations. [] show what equations must be valid before this starts.
We reuse input points (as they are latched) when possible to reduce register usage.
Taken from https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates
U1 = X1*Z2^2
U2 = X2*Z1^2
S1 = Y1*Z2^3
S2 = Y2*Z1^3
H = U2 - U1
R = S2 - S1
X3 = R^2 - H^3 - 2*U1*H^2
Y3 = R*(U1*H^2 - X3) - S1*H^3
Z3 = H*Z1*Z2
0. A = i_p2.z*i_p2.z mod p
1. i_p1.x = A * i_p1.x mod p [eq0] ..U1
2. C = i_p1.z*i_p1.z mod p
3. i_p2.x = C * i_p2.x mod p [eq2] ... U2
4. A = A * i_p2.z mod p [eq1]
5. A = A * i_p1.y [eq4] ... S1
6. C = C * i_p1.z mod p [eq3]
7. C = C * i_p2.y mod p [eq6] .. S2
8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H
9. i_p2.y = C - A mod p [eq5,eq7] ... R
10. o_p.x = i_p2.y * i_p2.y mod p [eq9] ... R^2
11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2
12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3
13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2
15. o_p.y = i_p1.x [eq14]
16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
18. o_p.y = o_p.y - o_p.x mod p [eq17, eq15]
19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9]
20. i_p2.x = i_p2.x * A [eq5, eq12]
21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
22. o_p.z = i_p1.z * i_p2.z mod p
23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8]
*/
// We also check in the inital state if one of the inputs is "None" (.z == 0), and set the output to the other point
logic [13:0] eq_val, eq_wait;
logic [23:0] eq_val, eq_wait;
// Temporary variables
logic [255:0] A, B;
logic [255:0] A, C;
jb_point_t i_p1_l, i_p2_l;
always_comb begin
@ -99,7 +121,7 @@ always_ff @ (posedge i_clk) begin
i_p2_l <= 0;
o_err <= 0;
A <= 0;
B <= 0;
C <= 0;
end else begin
if (o_mult_if.rdy) o_mult_if.val <= 0;
@ -115,7 +137,7 @@ always_ff @ (posedge i_clk) begin
i_p1_l <= i_p1;
i_p2_l <= i_p2;
A <= 0;
B <= 0;
C <= 0;
if (i_val && o_rdy) begin
state <= START;
o_rdy <= 0;
@ -145,11 +167,11 @@ always_ff @ (posedge i_clk) begin
i_mod_if.rdy <= 1;
i_mult_if.rdy <= 1;
// Check any results from multiplier
// Check any results from modulo
if (i_mod_if.val && i_mod_if.rdy) begin
eq_val[i_mod_if.ctl] <= 1;
case(i_mod_if.ctl)
5: o_p.x <= i_mod_if.dat;
16: i_p1_l.x <= i_mod_if.dat;
default: o_err <= 1;
endcase
end
@ -158,67 +180,107 @@ always_ff @ (posedge i_clk) begin
if (i_mult_if.val && i_mult_if.rdy) begin
eq_val[i_mult_if.ctl] <= 1;
case(i_mult_if.ctl) inside
2: o_p.z <= i_mult_if.dat;
3: i_p1_l.z <= i_mult_if.dat;
4: i_p2_l.x <= i_mult_if.dat;
6: o_p.x <= i_mult_if.dat;
8: o_p.y <= i_mult_if.dat;
10: o_p.y <= i_mult_if.dat;
11: i_p1_l.y <= i_mult_if.dat;
12: i_p2_l.y <= i_mult_if.dat;
0: A <= i_mult_if.dat;
1: i_p1_l.x <= i_mult_if.dat;
2: C <= i_mult_if.dat;
3: i_p2_l.x <= i_mult_if.dat;
4: A <= i_mult_if.dat;
5: A <= i_mult_if.dat;
6: C <= i_mult_if.dat;
7: C <= i_mult_if.dat;
10: o_p.x <= i_mult_if.dat;
11: C <= i_mult_if.dat;
12: i_p2_l.x <= i_mult_if.dat;
14: i_p1_l.x <= i_mult_if.dat;
19: o_p.y <= i_mult_if.dat;
20: i_p2_l.x <= i_mult_if.dat;
22: o_p.z <= i_mult_if.dat;
23: o_p.z <= i_mult_if.dat;
default: o_err <= 1;
endcase
end
// Issue new multiplies
if (eq_val[1] && ~eq_wait[2]) begin // 2. o_p.z = B * i_p1.z mod p [eq1]
multiply(2, B, i_p1_l.z);
if (~eq_wait[0]) begin // 0. A = i_p2.z*i_p2.z mod p
multiply(0, i_p2_l.z, i_p2_l.z);
end else
if (eq_val[2] && ~eq_wait[3]) begin // 3. i_p1.z = B * B mod p [eq2]
multiply(3, B, B);
if (eq_val[0] && ~eq_wait[1]) begin // 1. i_p1.x = A * i_p1.x mod p [eq0] ..U1
multiply(1, A, i_p1_l.x);
end else
if (eq_val[0] && eq_val[5] && ~eq_wait[4]) begin // 4. i_p2.x = A * A mod p [eq0, eq5]
multiply(4, A, A);
if (~eq_wait[2]) begin // 2. C = i_p1.z*i_p1.z mod p
multiply(2, i_p1_l.z, i_p1_l.z);
end else
if (eq_val[3] && eq_val[5] && ~eq_wait[6]) begin // 6. o_p.x = o_p.x * i_p1.z mod p [eq5, eq3]
multiply(6, o_p.x, i_p1_l.z);
if (eq_val[2] && ~eq_wait[3]) begin // 3. i_p2.x = C * i_p2.x mod p [eq2] ... U2
multiply(3, C, i_p2_l.x);
end else
if (eq_val[3] && ~eq_wait[8]) begin // 8. o_p.y = i_p1.x*i_p1.z mod p [eq3]
multiply(8, i_p1_l.x, i_p1_l.z);
if (eq_val[1] && ~eq_wait[4]) begin // 4. A = A * i_p2.z mod p [eq1]
multiply(4, A, i_p2_l.z);
end else
if (eq_val[0] && eq_val[9] && ~eq_wait[10]) begin // 10. o_p.y = o_p.y * A mod p [eq0, eq9]
multiply(10, o_p.y, A);
if (eq_val[4] && ~eq_wait[5]) begin // 5. A = A * i_p1.y [eq4] ... S1
multiply(5, A, i_p1_l.y);
end else
if (eq_val[0] && eq_val[1] && eq_val[3] && ~eq_wait[11]) begin // 11. i_p2.y = B * i_p1.z mod p [eq1, eq3, eq0]
multiply(11, B, i_p1_l.z);
if (eq_val[3] && ~eq_wait[6]) begin // 6. C = C * i_p1.z mod p [eq3]
multiply(6, C, i_p1_l.z);
end else
if (eq_val[11] && ~eq_wait[12]) begin // 12. i_p2.y = i_p2.y * i_p1.y [eq11]
multiply(12, i_p1_l.y, i_p2_l.y);
if (eq_val[6] && ~eq_wait[7]) begin // 7. C = C * i_p2.y mod p [eq6] .. S2
multiply(7, C, i_p2_l.y);
end else
if (eq_val[9] && ~eq_wait[10]) begin // 10. o_p.x = i_p2.y * i_p2.y mod p [eq9]
multiply(10, i_p2_l.y, i_p2_l.y);
end else
if (eq_val[9] && ~eq_wait[11]) begin // 11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2
multiply(11, i_p1_l.y, i_p1_l.y);
end else
if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3
multiply(12, C, i_p1_l.y);
end else
if (eq_val[11] && eq_val[8] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2
multiply(14, C, i_p1_l.x);
end else
if (eq_val[18] && eq_val[9] && ~eq_wait[19]) begin // 19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9]
multiply(19, o_p.y, i_p2_l.y);
end else
if (eq_val[5] && eq_val[12] && ~eq_wait[20]) begin // 20. i_p2.x = i_p2.x * A [eq5, eq12]
multiply(20, i_p2_l.x, A);
end else
if (~eq_wait[22]) begin // 22. o_p.z = i_p1.z * i_p2.z mod p
multiply(22, i_p1_l.z, i_p2_l.z);
end else
if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8]
multiply(23, o_p.z, i_p1_l.y);
end
// Issue new modulo reductions
if (~eq_wait[5]) begin // 5. o_p.x = i_p1.x + i_p2.x mod p
modulo(5, i_p1.x + i_p2.x);
if (eq_val[15] && eq_val[14] && ~eq_wait[16]) begin // 16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
modulo(16, 2 * i_p1_l.x);
end
// Subtractions we do in-module
if (~eq_wait[0]) begin //0. A = i_p1.y - i_p2.y mod p
A <= subtract(0, i_p1_l.y, i_p2_l.y);
if (eq_val[1] && eq_val[3] && eq_val[5] && ~eq_wait[8]) begin //8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H
i_p1_l.y <= subtract(8, i_p2_l.x, i_p1_l.x);
end
if (~eq_wait[1]) begin //1. B = i_p1.x - i_p2.x mod p
B <= subtract(1, i_p1_l.x, i_p2_l.x);
if (eq_val[5] && eq_val[7] && ~eq_wait[9]) begin //9. i_p2.y = C - A mod p [eq5,eq7] ... R
i_p2_l.y <= subtract(9, C, A);
end
if (~eq_wait[7] && eq_val[6] && eq_val[4]) begin //7. o_p.x = i_p2.x - o_p.x mod p[eq6, eq4]
o_p.x <= subtract(7, i_p2_l.x, o_p.x);
if (eq_val[12] && eq_val[10] && ~eq_wait[13]) begin //13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
o_p.x <= subtract(13, o_p.x, i_p2_l.x);
end
if (~eq_wait[9] && eq_val[3] && eq_val[7] && eq_val[8]) begin //9. o_p.y = o_p.y - o_p.x mod p [eq3, eq7, eq8]
o_p.y <= subtract(9, o_p.y, o_p.x);
if (eq_val[16] && eq_val[13] && ~eq_wait[17]) begin //17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
o_p.x <= subtract(17, o_p.x, i_p1_l.x);
end
if (~eq_wait[13] && eq_val[12] && eq_val[10]) begin //13. o_p.y = o_p.y - i_p2.y mod p [eq12, eq10]
o_p.y <= subtract(13, o_p.y, i_p2_l.y);
if (eq_val[17] && eq_val[15] && ~eq_wait[18]) begin //18. o_p.y = o_p.y - o_p.x mod p [eq17, eq15]
o_p.y <= subtract(18, o_p.y, o_p.x);
end
if (eq_val[20] && eq_val[19] && ~eq_wait[21]) begin //21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
o_p.y <= subtract(21, o_p.y, i_p2_l.x);
end
// Assignments
if (eq_val[14] && ~eq_wait[15]) begin //15. o_p.y = i_p1.x [eq14]
eq_wait[15] <= 1;
eq_val[15] <= 1;
o_p.y <= i_p1_l.x;
end
if (&eq_val) begin
state <= FINISHED;

View File

@ -116,7 +116,11 @@ always_ff @ (posedge i_clk) begin
p_add_done <= 1;
end
// Don't need to double on the final bit
if ((k_l >> 1) != 0)
p_dbl_in_val <= 1;
else
p_dbl_done <= 1;
if (k_l == 0) begin
state <= FINISHED;
@ -189,7 +193,7 @@ packet_arb # (
.DAT_BYTS ( 512/8 ),
.CTL_BITS ( 8 ),
.NUM_IN ( 2 ),
.PIPELINE ( 1 )
.PIPELINE ( 0 )
)
packet_arb_mult (
.i_clk ( i_clk ),
@ -202,7 +206,7 @@ packet_arb # (
.DAT_BYTS ( 512/8 ),
.CTL_BITS ( 8 ),
.NUM_IN ( 2 ),
.PIPELINE ( 1 )
.PIPELINE ( 0 )
)
packet_arb_mod (
.i_clk ( i_clk ),

View File

@ -0,0 +1,187 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module secp256k1_point_add_tb ();
import common_pkg::*;
import secp256k1_pkg::*;
localparam CLK_PERIOD = 1000;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(256*6/8)) in_if(clk); // Two points
if_axi_stream #(.DAT_BYTS(256*3/8)) out_if(clk);
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mult_in_if(clk);
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mult_out_if(clk);
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mod_in_if(clk);
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mod_out_if(clk);
jb_point_t in_p1, in_p2, out_p;
always_comb begin
in_p1 = in_if.dat[0 +: 256*3];
in_p2 = in_if.dat[256*3 +: 256*3];
out_if.dat = out_p;
end
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
secp256k1_point_add secp256k1_point_add(
.i_clk ( clk ),
.i_rst ( rst ),
// Input points
.i_p1 ( in_p1 ),
.i_p2 ( in_p2 ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.o_p ( out_p ),
.o_err ( out_if.err ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ) ,
.o_mult_if ( mult_in_if ),
.i_mult_if ( mult_out_if ),
.o_mod_if ( mod_in_if ),
.i_mod_if ( mod_out_if )
);
// Attach a mod reduction unit and multiply - mod unit
// In full design these could use dedicated multipliers or be arbitrated
secp256k1_mult_mod #(
.CTL_BITS ( 8 )
)
secp256k1_mult_mod (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( mult_in_if.dat[0 +: 256] ),
.i_dat_b ( mult_in_if.dat[256 +: 256] ),
.i_val ( mult_in_if.val ),
.i_err ( mult_in_if.err ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl ),
.o_err ( mult_out_if.err )
);
secp256k1_mod #(
.USE_MULT ( 0 ),
.CTL_BITS ( 8 )
)
secp256k1_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat( mod_in_if.dat ),
.i_val( mod_in_if.val ),
.i_err( mod_in_if.err ),
.i_ctl( mod_in_if.ctl ),
.o_rdy( mod_in_if.rdy ),
.o_dat( mod_out_if.dat ),
.o_ctl( mod_out_if.ctl ),
.o_err( mod_out_if.err ),
.i_rdy( mod_out_if.rdy ),
.o_val( mod_out_if.val )
);
task test_0();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
logic [255:0] in_a, in_b;
jb_point_t p1, p2, p_exp, p_temp, p_out;
$display("Running test_0...");
//p1 = {x:3, y:4, z:1};
// p2 = {x:1, y:2, z:1};
/*p1 = {x:256'h79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798,
y:256'h483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8,
z:256'h0000000000000000000000000000000000000000000000000000000000000001};
p2 = {x:256'h7d152c041ea8e1dc2191843d1fa9db55b68f88fef695e2c791d40444b365afc2,
y:256'h56915849f52cc8f76f5fd7e4bf60db4a43bf633e1b1383f85fe89164bfadcbdb,
z:256'h9075b4ee4d4788cabb49f7f81c221151fa2f68914d0aa833388fa11ff621a970};
*/
p1 = {x:256'h79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798,
y:256'h483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8,
z:256'h1};
p2 = {x:256'h7d152c041ea8e1dc2191843d1fa9db55b68f88fef695e2c791d40444b365afc2,
y:256'h56915849f52cc8f76f5fd7e4bf60db4a43bf633e1b1383f85fe89164bfadcbdb,
z:256'h9075b4ee4d4788cabb49f7f81c221151fa2f68914d0aa833388fa11ff621a970};
p_exp = add_jb_point(p1, p2);
fork
in_if.put_stream({p2, p1}, 256*6/8);
out_if.get_stream(get_dat, get_len);
join
p_out = get_dat;
$display("%d %d %d", on_curve(p1), on_curve(p2), on_curve(p_out));//, on_curve(p_temp));
if (p_exp != p_out) begin
$display("Expected:");
print_jb_point(p_exp);
$display("Was:");
print_jb_point(p_out);
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
end
$display("test_0 PASSED");
end
endtask;
function compare_point();
endfunction
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test_0();
#1us $finish();
end
endmodule

View File

@ -79,14 +79,17 @@ begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
logic [255:0] in_a, in_b;
integer start_time, finish_time;
jb_point_t p_in, p_out;
$display("Running test_0...");
p_in = secp256k1_pkg::G_p;
k_in = k;
start_time = $time;
fork
in_if.put_stream(p_in, 256*3/8);
out_if.get_stream(get_dat, get_len);
join
finish_time = $time;
p_out = get_dat;
@ -98,10 +101,11 @@ begin
$fatal(1, "%m %t ERROR: test with k=%d was wrong", $time, integer'(k));
end
$display("test with k=%d PASSED", integer'(k));
$display("test with k=%d PASSED in %d clocks", integer'(k), (finish_time-start_time)/CLK_PERIOD);
end
endtask;
initial begin
out_if.rdy = 0;
in_if.val = 0;
@ -119,6 +123,14 @@ initial begin
y:256'h435afe76017b8d55d04ff8a98dd60b2ba7eb6f87f6b28182ca4493d7165dd127,
z:256'h9242fa9c0b9f23a3bfea6a0eb6dbcfcbc4853fe9a25ee948105dc66a2a9b5baa});
test(4, {x:256'h9bae2d5bac61e6ea5de635bca754b2564b7d78c45277cad67e45c4cbbea6e706,
y:256'h34fb8147eed1c0fbe29ead4d6c472eb4ef7b2191fde09e494b2a9845fe3f605e,
z:256'hc327b5d2636b32f27b051e4742b1bbd5324432c1000bfedca4368a29f6654152});
test(1514155, {x:256'h759267d17957f567381462db6e240b75c9f6016091a7427cfbef33c398964a9d,
y:256'hd81ce7034647587a9b0ea5b52ac08c91f5cfae30f4eba2ade7fa68856fc0d691,
z:256'h7c9d27fb2de7927c982792630a0c86f411f2de60e8df44c5e9caff976658009c});
#1us $finish();
end
endmodule