Updates to secp256k1 core
This commit is contained in:
parent
e014bba045
commit
f95ffeab0c
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
Multiplication using Karatsuba-Ofman algorithm.
|
||||
|
||||
Multiple of these can be instantiated, each one takes 2 clocks cycles
|
||||
Multiple of these can be instantiated, each one takes 3 clocks cycles
|
||||
per level. Fully pipelined so can accept a new input every clock.
|
||||
|
||||
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
|
||||
|
@ -42,17 +42,31 @@ localparam HBITS = BITS/2;
|
|||
logic [BITS-1:0] m0, m1, m2, dat_a, dat_b;
|
||||
logic [BITS*2-1:0] q;
|
||||
logic [HBITS-1:0] a0, a1;
|
||||
logic sign, sign_;
|
||||
logic val;
|
||||
logic [CTL_BITS-1:0] ctl;
|
||||
logic sign, sign_, sign_1;
|
||||
logic val, val_, val_1;
|
||||
logic [CTL_BITS-1:0] ctl, ctl_, ctl_1;
|
||||
logic [HBITS-1:0] a0_, a1_;
|
||||
logic [BITS-1:0] m0_, m1_, m2_;
|
||||
|
||||
always_ff @ (posedge i_clk) begin
|
||||
dat_a <= i_dat_a;
|
||||
dat_b <= i_dat_b;
|
||||
|
||||
o_dat <= q;
|
||||
o_val <= val;
|
||||
o_ctl <= ctl;
|
||||
o_val <= val_1;
|
||||
o_ctl <= ctl_1;
|
||||
|
||||
val_ <= val;
|
||||
val_1 <= val_;
|
||||
ctl_ <= ctl;
|
||||
ctl_1 <= ctl_;
|
||||
|
||||
a0_ <= a0;
|
||||
a1_ <= a1;
|
||||
|
||||
m0_ <= m0;
|
||||
m1_ <= m1;
|
||||
m2_ <= m2;
|
||||
end
|
||||
|
||||
generate
|
||||
|
@ -61,27 +75,29 @@ generate
|
|||
a1 = i_dat_b[HBITS +: HBITS] > i_dat_b[0 +: HBITS] ? i_dat_b[HBITS +: HBITS] - i_dat_b[0 +: HBITS] : i_dat_b[0 +: HBITS] - i_dat_b[HBITS +: HBITS];
|
||||
sign_ = ((i_dat_a[0 +: HBITS] < i_dat_a[HBITS +: HBITS]) ^
|
||||
(i_dat_b[HBITS +: HBITS] < i_dat_b[0 +: HBITS]));
|
||||
q = (m0 << BITS) + ((m0 + m2 + (sign == 1 ? -m1 : m1)) << HBITS) + m2;
|
||||
q = (m0_ << BITS) + ((m0_ + m2_ + (sign == 1 ? -m1_ : m1_)) << HBITS) + m2_;
|
||||
end
|
||||
|
||||
if (LEVEL == 1) begin: GEN_REC
|
||||
|
||||
always_comb begin
|
||||
m0 = i_dat_a[HBITS +: HBITS] * i_dat_b[HBITS +: HBITS];
|
||||
m2 = i_dat_a[0 +: HBITS] * i_dat_b[0 +: HBITS];
|
||||
m1 = (a0 * a1);
|
||||
sign = sign_;
|
||||
m0 = dat_a[HBITS +: HBITS] * dat_b[HBITS +: HBITS];
|
||||
m2 = dat_a[0 +: HBITS] * dat_b[0 +: HBITS];
|
||||
m1 = (a0_ * a1_);
|
||||
o_rdy = i_rdy;
|
||||
val = i_val;
|
||||
ctl = i_ctl;
|
||||
end
|
||||
always_ff @ (posedge i_clk) begin
|
||||
sign <= sign_1;
|
||||
sign_1 <= sign_;
|
||||
end
|
||||
|
||||
|
||||
end else begin
|
||||
// pipeline the other non-mult values x clock cycles and add them after multipliers
|
||||
logic [LEVEL-2:0] sign_r;
|
||||
|
||||
logic [LEVEL*3-1:0] sign_r;
|
||||
always_comb begin
|
||||
sign = sign_r[LEVEL-2];
|
||||
sign = sign_r[LEVEL*3-2];
|
||||
end
|
||||
|
||||
always_ff @ (posedge i_clk) begin
|
||||
|
@ -95,8 +111,8 @@ generate
|
|||
)
|
||||
karatsuba_ofman_mult_m0 (
|
||||
.i_clk ( i_clk ),
|
||||
.i_dat_a ( i_dat_a[HBITS +: HBITS] ),
|
||||
.i_dat_b ( i_dat_b[HBITS +: HBITS] ),
|
||||
.i_dat_a ( dat_a[HBITS +: HBITS] ),
|
||||
.i_dat_b ( dat_b[HBITS +: HBITS] ),
|
||||
.i_val ( i_val ),
|
||||
.o_val ( val ),
|
||||
.i_ctl ( i_ctl ),
|
||||
|
@ -113,8 +129,8 @@ generate
|
|||
)
|
||||
karatsuba_ofman_mult_m2 (
|
||||
.i_clk ( i_clk ),
|
||||
.i_dat_a ( i_dat_a[0 +: HBITS] ),
|
||||
.i_dat_b ( i_dat_b[0 +: HBITS] ),
|
||||
.i_dat_a ( dat_a[0 +: HBITS] ),
|
||||
.i_dat_b ( dat_b[0 +: HBITS] ),
|
||||
.i_val ( i_val ),
|
||||
.o_val (),
|
||||
.i_ctl ( 1'd0 ),
|
||||
|
@ -131,8 +147,8 @@ generate
|
|||
)
|
||||
karatsuba_ofman_mult_m1 (
|
||||
.i_clk ( i_clk ),
|
||||
.i_dat_a ( a0 ),
|
||||
.i_dat_b ( a1 ),
|
||||
.i_dat_a ( a0_ ),
|
||||
.i_dat_b ( a1_ ),
|
||||
.i_val ( i_val ),
|
||||
.o_val (),
|
||||
.i_ctl ( 1'd0 ),
|
||||
|
|
|
@ -49,8 +49,8 @@ generate
|
|||
// Optionally pipeline the input
|
||||
if (PIPELINE == 0) begin: PIPELINE_GEN
|
||||
|
||||
always_comb i_axi[g].rdy = rdy[g];
|
||||
always_comb begin
|
||||
i_axi[g].rdy = rdy[g];
|
||||
val[g] = i_axi[g].val;
|
||||
eop[g] = i_axi[g].eop;
|
||||
sop[g] = i_axi[g].sop;
|
||||
|
|
|
@ -69,6 +69,48 @@ karatsuba_ofman_mult (
|
|||
.o_dat ( out_if.dat )
|
||||
);
|
||||
|
||||
task test_pipeline();
|
||||
begin
|
||||
|
||||
$display("Running test_pipeline...");
|
||||
fork
|
||||
begin
|
||||
logic [255:0] in_a, in_b;
|
||||
integer i = 1;
|
||||
integer max = 10;
|
||||
while (i < max) begin
|
||||
in_a = i;
|
||||
in_b = i;
|
||||
//in_if.put_stream({in_b, in_a}, 512/8, i);
|
||||
in_if.sop = 1;
|
||||
in_if.eop = 1;
|
||||
in_if.ctl = i;
|
||||
in_if.dat = {in_a, in_b};
|
||||
in_if.val = 1;
|
||||
@(posedge in_if.i_clk);
|
||||
i = i + 1;
|
||||
end
|
||||
in_if.val = 0;
|
||||
end
|
||||
begin
|
||||
integer i = 1;
|
||||
integer max = 10;
|
||||
integer signed get_len;
|
||||
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
|
||||
while (i < max) begin
|
||||
expected = i*i;
|
||||
out_if.get_stream(get_dat, get_len);
|
||||
common_pkg::compare_and_print(get_dat, expected);
|
||||
$display("test_pipeline PASSED loop %d/%d", i, max);
|
||||
i = i + 1;
|
||||
end
|
||||
end
|
||||
join
|
||||
|
||||
$display("test_pipeline PASSED");
|
||||
end
|
||||
endtask;
|
||||
|
||||
task test_loop();
|
||||
begin
|
||||
integer signed get_len;
|
||||
|
@ -104,6 +146,7 @@ initial begin
|
|||
in_if.val = 0;
|
||||
#(40*CLK_PERIOD);
|
||||
|
||||
test_pipeline();
|
||||
test_loop();
|
||||
|
||||
#1us $finish();
|
||||
|
|
|
@ -65,6 +65,8 @@ package secp256k1_pkg;
|
|||
function jb_point_t dbl_jb_point(jb_point_t p);
|
||||
logic signed [512:0] I_X, I_Y, I_Z, A, B, C, D, X, Y, Z;
|
||||
|
||||
if (p.z == 0) return p;
|
||||
|
||||
I_X = p.x;
|
||||
I_Y = p.y;
|
||||
I_Z = p.z;
|
||||
|
@ -83,6 +85,51 @@ package secp256k1_pkg;
|
|||
return dbl_jb_point;
|
||||
endfunction
|
||||
|
||||
function jb_point_t add_jb_point(jb_point_t p1, p2);
|
||||
logic signed [512:0] A, U1, U2, S1, S2, H, H3, R;
|
||||
|
||||
if (p1.z == 0) return p2;
|
||||
if (p2.z == 0) return p1;
|
||||
|
||||
if (p1.y == p2.y && p1.x == p2.x)
|
||||
return (dbl_jb_point(p1));
|
||||
|
||||
U1 = p1.x*p2.z % p_eq;
|
||||
U1 = U1*p2.z % p_eq;
|
||||
|
||||
U2 = p2.x*p1.z % p_eq;
|
||||
U2 = U2 *p1.z % p_eq;
|
||||
S1 = p1.y *p2.z % p_eq;
|
||||
S1 = (S1*p2.z % p_eq) *p2.z % p_eq;
|
||||
S2 = p2.y * p1.z % p_eq;
|
||||
S2 = (S2*p1.z % p_eq) *p1.z % p_eq;
|
||||
|
||||
H = U2 + (U1 > U2 ? p_eq : 0) -U1;
|
||||
R = S2 + (S1 > S2 ? p_eq : 0) -S1;
|
||||
//$display("R = %x", R);
|
||||
//$display("H = %x", H);
|
||||
//$display("H^2 = %x", (H * H %p_eq ));
|
||||
H3 = ((H * H %p_eq ) * H ) % p_eq;
|
||||
A = (((2*U1 % p_eq) *H % p_eq) * H % p_eq);
|
||||
|
||||
add_jb_point.z = ((H * p1.z % p_eq) * p2.z) % p_eq;
|
||||
add_jb_point.x = R*R % p_eq;
|
||||
|
||||
//$display("R^2 = %x", add_jb_point.x);
|
||||
//$display("H^3 = %x", H3);
|
||||
|
||||
add_jb_point.x = add_jb_point.x + (H3 > add_jb_point.x ? p_eq : 0) - H3;
|
||||
add_jb_point.x = add_jb_point.x + (A > add_jb_point.x ? p_eq : 0) - A;
|
||||
|
||||
A = (U1*H % p_eq) * H % p_eq;
|
||||
A = A + (add_jb_point.x > A ? p_eq : 0) - add_jb_point.x;
|
||||
A = A*R % p_eq;
|
||||
add_jb_point.y = S1*H3 % p_eq;
|
||||
|
||||
add_jb_point.y = A + (add_jb_point.y > A ? p_eq : 0) - add_jb_point.y;
|
||||
|
||||
endfunction
|
||||
|
||||
function on_curve(jb_point_t p);
|
||||
return (p.y*p.y - p.x*p.x*p.x - secp256k1_pkg::a*p.x*p.z*p.z*p.z*p.z - secp256k1_pkg::b*p.z*p.z*p.z*p.z*p.z*p.z);
|
||||
endfunction
|
||||
|
|
|
@ -41,32 +41,54 @@ module secp256k1_point_add
|
|||
);
|
||||
|
||||
/*
|
||||
* These are the equations that need to be computed, they are issued as variables
|
||||
* become valid. We have a bitmask to track what equation results are valid which
|
||||
* will trigger other equations. [] show what equations must be valid before this starts.
|
||||
* We reuse input points (as they are latched) when possible to reduce register usage.
|
||||
*
|
||||
* 0. A = i_p1.y - i_p2.y mod p
|
||||
* 1. B = i_p1.x - i_p2.x mod p
|
||||
* 2. o_p.z = B * i_p1.z mod p [eq1]
|
||||
* 3. i_p1.z = B * B mod p [eq2]
|
||||
* 4. i_p2.x = A * A mod p [eq0, eq5]
|
||||
* 5. o_p.x = i_p1.x + i_p2.x mod p
|
||||
* 6. o_p.x = o_p.x * i_p1.z mod p [eq5, eq3]
|
||||
* 7. o_p.x = i_p2.x - o_p.x mod p[eq6, eq4]
|
||||
* 8. o_p.y = i_p1.x*i_p1.z mod p [eq3]
|
||||
* 9. o_p.y = o_p.y - o_p.x mod p [eq3, eq7, eq8]
|
||||
* 10. o_p.y = o_p.y * A mod p [eq0, eq9]
|
||||
* 11. i_p2.y = B * i_p1.z mod p [eq1, eq3, eq0]
|
||||
* 12. i_p2.y = i_p2.y * i_p1.y [eq11]
|
||||
* 13. o_p.y = o_p.y - i_p2.y mod p [eq12, eq10]
|
||||
*/
|
||||
These are the equations that need to be computed, they are issued as variables
|
||||
become valid. We have a bitmask to track what equation results are valid which
|
||||
will trigger other equations. [] show what equations must be valid before this starts.
|
||||
We reuse input points (as they are latched) when possible to reduce register usage.
|
||||
Taken from https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates
|
||||
|
||||
U1 = X1*Z2^2
|
||||
U2 = X2*Z1^2
|
||||
S1 = Y1*Z2^3
|
||||
S2 = Y2*Z1^3
|
||||
H = U2 - U1
|
||||
R = S2 - S1
|
||||
X3 = R^2 - H^3 - 2*U1*H^2
|
||||
Y3 = R*(U1*H^2 - X3) - S1*H^3
|
||||
Z3 = H*Z1*Z2
|
||||
|
||||
|
||||
0. A = i_p2.z*i_p2.z mod p
|
||||
1. i_p1.x = A * i_p1.x mod p [eq0] ..U1
|
||||
2. C = i_p1.z*i_p1.z mod p
|
||||
3. i_p2.x = C * i_p2.x mod p [eq2] ... U2
|
||||
4. A = A * i_p2.z mod p [eq1]
|
||||
5. A = A * i_p1.y [eq4] ... S1
|
||||
6. C = C * i_p1.z mod p [eq3]
|
||||
7. C = C * i_p2.y mod p [eq6] .. S2
|
||||
8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H
|
||||
9. i_p2.y = C - A mod p [eq5,eq7] ... R
|
||||
10. o_p.x = i_p2.y * i_p2.y mod p [eq9] ... R^2
|
||||
11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2
|
||||
12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3
|
||||
13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
|
||||
14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2
|
||||
15. o_p.y = i_p1.x [eq14]
|
||||
16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
|
||||
17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
|
||||
18. o_p.y = o_p.y - o_p.x mod p [eq17, eq15]
|
||||
19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9]
|
||||
20. i_p2.x = i_p2.x * A [eq5, eq12]
|
||||
21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
|
||||
22. o_p.z = i_p1.z * i_p2.z mod p
|
||||
23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8]
|
||||
*/
|
||||
|
||||
// We also check in the inital state if one of the inputs is "None" (.z == 0), and set the output to the other point
|
||||
logic [13:0] eq_val, eq_wait;
|
||||
logic [23:0] eq_val, eq_wait;
|
||||
|
||||
// Temporary variables
|
||||
logic [255:0] A, B;
|
||||
logic [255:0] A, C;
|
||||
jb_point_t i_p1_l, i_p2_l;
|
||||
|
||||
always_comb begin
|
||||
|
@ -99,7 +121,7 @@ always_ff @ (posedge i_clk) begin
|
|||
i_p2_l <= 0;
|
||||
o_err <= 0;
|
||||
A <= 0;
|
||||
B <= 0;
|
||||
C <= 0;
|
||||
end else begin
|
||||
|
||||
if (o_mult_if.rdy) o_mult_if.val <= 0;
|
||||
|
@ -115,7 +137,7 @@ always_ff @ (posedge i_clk) begin
|
|||
i_p1_l <= i_p1;
|
||||
i_p2_l <= i_p2;
|
||||
A <= 0;
|
||||
B <= 0;
|
||||
C <= 0;
|
||||
if (i_val && o_rdy) begin
|
||||
state <= START;
|
||||
o_rdy <= 0;
|
||||
|
@ -145,11 +167,11 @@ always_ff @ (posedge i_clk) begin
|
|||
i_mod_if.rdy <= 1;
|
||||
i_mult_if.rdy <= 1;
|
||||
|
||||
// Check any results from multiplier
|
||||
// Check any results from modulo
|
||||
if (i_mod_if.val && i_mod_if.rdy) begin
|
||||
eq_val[i_mod_if.ctl] <= 1;
|
||||
case(i_mod_if.ctl)
|
||||
5: o_p.x <= i_mod_if.dat;
|
||||
16: i_p1_l.x <= i_mod_if.dat;
|
||||
default: o_err <= 1;
|
||||
endcase
|
||||
end
|
||||
|
@ -158,67 +180,107 @@ always_ff @ (posedge i_clk) begin
|
|||
if (i_mult_if.val && i_mult_if.rdy) begin
|
||||
eq_val[i_mult_if.ctl] <= 1;
|
||||
case(i_mult_if.ctl) inside
|
||||
2: o_p.z <= i_mult_if.dat;
|
||||
3: i_p1_l.z <= i_mult_if.dat;
|
||||
4: i_p2_l.x <= i_mult_if.dat;
|
||||
6: o_p.x <= i_mult_if.dat;
|
||||
8: o_p.y <= i_mult_if.dat;
|
||||
10: o_p.y <= i_mult_if.dat;
|
||||
11: i_p1_l.y <= i_mult_if.dat;
|
||||
12: i_p2_l.y <= i_mult_if.dat;
|
||||
0: A <= i_mult_if.dat;
|
||||
1: i_p1_l.x <= i_mult_if.dat;
|
||||
2: C <= i_mult_if.dat;
|
||||
3: i_p2_l.x <= i_mult_if.dat;
|
||||
4: A <= i_mult_if.dat;
|
||||
5: A <= i_mult_if.dat;
|
||||
6: C <= i_mult_if.dat;
|
||||
7: C <= i_mult_if.dat;
|
||||
10: o_p.x <= i_mult_if.dat;
|
||||
11: C <= i_mult_if.dat;
|
||||
12: i_p2_l.x <= i_mult_if.dat;
|
||||
14: i_p1_l.x <= i_mult_if.dat;
|
||||
19: o_p.y <= i_mult_if.dat;
|
||||
20: i_p2_l.x <= i_mult_if.dat;
|
||||
22: o_p.z <= i_mult_if.dat;
|
||||
23: o_p.z <= i_mult_if.dat;
|
||||
default: o_err <= 1;
|
||||
endcase
|
||||
end
|
||||
|
||||
// Issue new multiplies
|
||||
if (eq_val[1] && ~eq_wait[2]) begin // 2. o_p.z = B * i_p1.z mod p [eq1]
|
||||
multiply(2, B, i_p1_l.z);
|
||||
if (~eq_wait[0]) begin // 0. A = i_p2.z*i_p2.z mod p
|
||||
multiply(0, i_p2_l.z, i_p2_l.z);
|
||||
end else
|
||||
if (eq_val[2] && ~eq_wait[3]) begin // 3. i_p1.z = B * B mod p [eq2]
|
||||
multiply(3, B, B);
|
||||
if (eq_val[0] && ~eq_wait[1]) begin // 1. i_p1.x = A * i_p1.x mod p [eq0] ..U1
|
||||
multiply(1, A, i_p1_l.x);
|
||||
end else
|
||||
if (eq_val[0] && eq_val[5] && ~eq_wait[4]) begin // 4. i_p2.x = A * A mod p [eq0, eq5]
|
||||
multiply(4, A, A);
|
||||
if (~eq_wait[2]) begin // 2. C = i_p1.z*i_p1.z mod p
|
||||
multiply(2, i_p1_l.z, i_p1_l.z);
|
||||
end else
|
||||
if (eq_val[3] && eq_val[5] && ~eq_wait[6]) begin // 6. o_p.x = o_p.x * i_p1.z mod p [eq5, eq3]
|
||||
multiply(6, o_p.x, i_p1_l.z);
|
||||
if (eq_val[2] && ~eq_wait[3]) begin // 3. i_p2.x = C * i_p2.x mod p [eq2] ... U2
|
||||
multiply(3, C, i_p2_l.x);
|
||||
end else
|
||||
if (eq_val[3] && ~eq_wait[8]) begin // 8. o_p.y = i_p1.x*i_p1.z mod p [eq3]
|
||||
multiply(8, i_p1_l.x, i_p1_l.z);
|
||||
if (eq_val[1] && ~eq_wait[4]) begin // 4. A = A * i_p2.z mod p [eq1]
|
||||
multiply(4, A, i_p2_l.z);
|
||||
end else
|
||||
if (eq_val[0] && eq_val[9] && ~eq_wait[10]) begin // 10. o_p.y = o_p.y * A mod p [eq0, eq9]
|
||||
multiply(10, o_p.y, A);
|
||||
if (eq_val[4] && ~eq_wait[5]) begin // 5. A = A * i_p1.y [eq4] ... S1
|
||||
multiply(5, A, i_p1_l.y);
|
||||
end else
|
||||
if (eq_val[0] && eq_val[1] && eq_val[3] && ~eq_wait[11]) begin // 11. i_p2.y = B * i_p1.z mod p [eq1, eq3, eq0]
|
||||
multiply(11, B, i_p1_l.z);
|
||||
if (eq_val[3] && ~eq_wait[6]) begin // 6. C = C * i_p1.z mod p [eq3]
|
||||
multiply(6, C, i_p1_l.z);
|
||||
end else
|
||||
if (eq_val[11] && ~eq_wait[12]) begin // 12. i_p2.y = i_p2.y * i_p1.y [eq11]
|
||||
multiply(12, i_p1_l.y, i_p2_l.y);
|
||||
if (eq_val[6] && ~eq_wait[7]) begin // 7. C = C * i_p2.y mod p [eq6] .. S2
|
||||
multiply(7, C, i_p2_l.y);
|
||||
end else
|
||||
if (eq_val[9] && ~eq_wait[10]) begin // 10. o_p.x = i_p2.y * i_p2.y mod p [eq9]
|
||||
multiply(10, i_p2_l.y, i_p2_l.y);
|
||||
end else
|
||||
if (eq_val[9] && ~eq_wait[11]) begin // 11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2
|
||||
multiply(11, i_p1_l.y, i_p1_l.y);
|
||||
end else
|
||||
if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3
|
||||
multiply(12, C, i_p1_l.y);
|
||||
end else
|
||||
if (eq_val[11] && eq_val[8] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2
|
||||
multiply(14, C, i_p1_l.x);
|
||||
end else
|
||||
if (eq_val[18] && eq_val[9] && ~eq_wait[19]) begin // 19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9]
|
||||
multiply(19, o_p.y, i_p2_l.y);
|
||||
end else
|
||||
if (eq_val[5] && eq_val[12] && ~eq_wait[20]) begin // 20. i_p2.x = i_p2.x * A [eq5, eq12]
|
||||
multiply(20, i_p2_l.x, A);
|
||||
end else
|
||||
if (~eq_wait[22]) begin // 22. o_p.z = i_p1.z * i_p2.z mod p
|
||||
multiply(22, i_p1_l.z, i_p2_l.z);
|
||||
end else
|
||||
if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8]
|
||||
multiply(23, o_p.z, i_p1_l.y);
|
||||
end
|
||||
|
||||
|
||||
// Issue new modulo reductions
|
||||
if (~eq_wait[5]) begin // 5. o_p.x = i_p1.x + i_p2.x mod p
|
||||
modulo(5, i_p1.x + i_p2.x);
|
||||
if (eq_val[15] && eq_val[14] && ~eq_wait[16]) begin // 16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
|
||||
modulo(16, 2 * i_p1_l.x);
|
||||
end
|
||||
|
||||
// Subtractions we do in-module
|
||||
if (~eq_wait[0]) begin //0. A = i_p1.y - i_p2.y mod p
|
||||
A <= subtract(0, i_p1_l.y, i_p2_l.y);
|
||||
if (eq_val[1] && eq_val[3] && eq_val[5] && ~eq_wait[8]) begin //8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H
|
||||
i_p1_l.y <= subtract(8, i_p2_l.x, i_p1_l.x);
|
||||
end
|
||||
if (~eq_wait[1]) begin //1. B = i_p1.x - i_p2.x mod p
|
||||
B <= subtract(1, i_p1_l.x, i_p2_l.x);
|
||||
end
|
||||
if (~eq_wait[7] && eq_val[6] && eq_val[4]) begin //7. o_p.x = i_p2.x - o_p.x mod p[eq6, eq4]
|
||||
o_p.x <= subtract(7, i_p2_l.x, o_p.x);
|
||||
end
|
||||
if (~eq_wait[9] && eq_val[3] && eq_val[7] && eq_val[8]) begin //9. o_p.y = o_p.y - o_p.x mod p [eq3, eq7, eq8]
|
||||
o_p.y <= subtract(9, o_p.y, o_p.x);
|
||||
end
|
||||
if (~eq_wait[13] && eq_val[12] && eq_val[10]) begin //13. o_p.y = o_p.y - i_p2.y mod p [eq12, eq10]
|
||||
o_p.y <= subtract(13, o_p.y, i_p2_l.y);
|
||||
if (eq_val[5] && eq_val[7] && ~eq_wait[9]) begin //9. i_p2.y = C - A mod p [eq5,eq7] ... R
|
||||
i_p2_l.y <= subtract(9, C, A);
|
||||
end
|
||||
if (eq_val[12] && eq_val[10] && ~eq_wait[13]) begin //13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
|
||||
o_p.x <= subtract(13, o_p.x, i_p2_l.x);
|
||||
end
|
||||
if (eq_val[16] && eq_val[13] && ~eq_wait[17]) begin //17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
|
||||
o_p.x <= subtract(17, o_p.x, i_p1_l.x);
|
||||
end
|
||||
|
||||
|
||||
if (eq_val[17] && eq_val[15] && ~eq_wait[18]) begin //18. o_p.y = o_p.y - o_p.x mod p [eq17, eq15]
|
||||
o_p.y <= subtract(18, o_p.y, o_p.x);
|
||||
end
|
||||
if (eq_val[20] && eq_val[19] && ~eq_wait[21]) begin //21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
|
||||
o_p.y <= subtract(21, o_p.y, i_p2_l.x);
|
||||
end
|
||||
|
||||
// Assignments
|
||||
if (eq_val[14] && ~eq_wait[15]) begin //15. o_p.y = i_p1.x [eq14]
|
||||
eq_wait[15] <= 1;
|
||||
eq_val[15] <= 1;
|
||||
o_p.y <= i_p1_l.x;
|
||||
end
|
||||
|
||||
if (&eq_val) begin
|
||||
state <= FINISHED;
|
||||
|
|
|
@ -116,7 +116,11 @@ always_ff @ (posedge i_clk) begin
|
|||
p_add_done <= 1;
|
||||
end
|
||||
|
||||
p_dbl_in_val <= 1;
|
||||
// Don't need to double on the final bit
|
||||
if ((k_l >> 1) != 0)
|
||||
p_dbl_in_val <= 1;
|
||||
else
|
||||
p_dbl_done <= 1;
|
||||
|
||||
if (k_l == 0) begin
|
||||
state <= FINISHED;
|
||||
|
@ -189,7 +193,7 @@ packet_arb # (
|
|||
.DAT_BYTS ( 512/8 ),
|
||||
.CTL_BITS ( 8 ),
|
||||
.NUM_IN ( 2 ),
|
||||
.PIPELINE ( 1 )
|
||||
.PIPELINE ( 0 )
|
||||
)
|
||||
packet_arb_mult (
|
||||
.i_clk ( i_clk ),
|
||||
|
@ -202,7 +206,7 @@ packet_arb # (
|
|||
.DAT_BYTS ( 512/8 ),
|
||||
.CTL_BITS ( 8 ),
|
||||
.NUM_IN ( 2 ),
|
||||
.PIPELINE ( 1 )
|
||||
.PIPELINE ( 0 )
|
||||
)
|
||||
packet_arb_mod (
|
||||
.i_clk ( i_clk ),
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
`timescale 1ps/1ps
|
||||
|
||||
module secp256k1_point_add_tb ();
|
||||
import common_pkg::*;
|
||||
import secp256k1_pkg::*;
|
||||
|
||||
localparam CLK_PERIOD = 1000;
|
||||
|
||||
logic clk, rst;
|
||||
|
||||
if_axi_stream #(.DAT_BYTS(256*6/8)) in_if(clk); // Two points
|
||||
if_axi_stream #(.DAT_BYTS(256*3/8)) out_if(clk);
|
||||
|
||||
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mult_in_if(clk);
|
||||
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mult_out_if(clk);
|
||||
|
||||
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mod_in_if(clk);
|
||||
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mod_out_if(clk);
|
||||
|
||||
|
||||
jb_point_t in_p1, in_p2, out_p;
|
||||
|
||||
always_comb begin
|
||||
in_p1 = in_if.dat[0 +: 256*3];
|
||||
in_p2 = in_if.dat[256*3 +: 256*3];
|
||||
out_if.dat = out_p;
|
||||
end
|
||||
|
||||
initial begin
|
||||
rst = 0;
|
||||
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
|
||||
end
|
||||
|
||||
initial begin
|
||||
clk = 0;
|
||||
forever #CLK_PERIOD clk = ~clk;
|
||||
end
|
||||
|
||||
always_comb begin
|
||||
out_if.sop = 1;
|
||||
out_if.eop = 1;
|
||||
out_if.ctl = 0;
|
||||
out_if.mod = 0;
|
||||
end
|
||||
|
||||
// Check for errors
|
||||
always_ff @ (posedge clk)
|
||||
if (out_if.val && out_if.err)
|
||||
$error(1, "%m %t ERROR: output .err asserted", $time);
|
||||
|
||||
secp256k1_point_add secp256k1_point_add(
|
||||
.i_clk ( clk ),
|
||||
.i_rst ( rst ),
|
||||
// Input points
|
||||
.i_p1 ( in_p1 ),
|
||||
.i_p2 ( in_p2 ),
|
||||
.i_val ( in_if.val ),
|
||||
.o_rdy ( in_if.rdy ),
|
||||
.o_p ( out_p ),
|
||||
.o_err ( out_if.err ),
|
||||
.i_rdy ( out_if.rdy ),
|
||||
.o_val ( out_if.val ) ,
|
||||
.o_mult_if ( mult_in_if ),
|
||||
.i_mult_if ( mult_out_if ),
|
||||
.o_mod_if ( mod_in_if ),
|
||||
.i_mod_if ( mod_out_if )
|
||||
);
|
||||
|
||||
// Attach a mod reduction unit and multiply - mod unit
|
||||
// In full design these could use dedicated multipliers or be arbitrated
|
||||
secp256k1_mult_mod #(
|
||||
.CTL_BITS ( 8 )
|
||||
)
|
||||
secp256k1_mult_mod (
|
||||
.i_clk ( clk ),
|
||||
.i_rst ( rst ),
|
||||
.i_dat_a ( mult_in_if.dat[0 +: 256] ),
|
||||
.i_dat_b ( mult_in_if.dat[256 +: 256] ),
|
||||
.i_val ( mult_in_if.val ),
|
||||
.i_err ( mult_in_if.err ),
|
||||
.i_ctl ( mult_in_if.ctl ),
|
||||
.o_rdy ( mult_in_if.rdy ),
|
||||
.o_dat ( mult_out_if.dat ),
|
||||
.i_rdy ( mult_out_if.rdy ),
|
||||
.o_val ( mult_out_if.val ),
|
||||
.o_ctl ( mult_out_if.ctl ),
|
||||
.o_err ( mult_out_if.err )
|
||||
);
|
||||
|
||||
secp256k1_mod #(
|
||||
.USE_MULT ( 0 ),
|
||||
.CTL_BITS ( 8 )
|
||||
)
|
||||
secp256k1_mod (
|
||||
.i_clk( clk ),
|
||||
.i_rst( rst ),
|
||||
.i_dat( mod_in_if.dat ),
|
||||
.i_val( mod_in_if.val ),
|
||||
.i_err( mod_in_if.err ),
|
||||
.i_ctl( mod_in_if.ctl ),
|
||||
.o_rdy( mod_in_if.rdy ),
|
||||
.o_dat( mod_out_if.dat ),
|
||||
.o_ctl( mod_out_if.ctl ),
|
||||
.o_err( mod_out_if.err ),
|
||||
.i_rdy( mod_out_if.rdy ),
|
||||
.o_val( mod_out_if.val )
|
||||
);
|
||||
|
||||
task test_0();
|
||||
begin
|
||||
integer signed get_len;
|
||||
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
|
||||
logic [255:0] in_a, in_b;
|
||||
jb_point_t p1, p2, p_exp, p_temp, p_out;
|
||||
$display("Running test_0...");
|
||||
|
||||
//p1 = {x:3, y:4, z:1};
|
||||
// p2 = {x:1, y:2, z:1};
|
||||
|
||||
/*p1 = {x:256'h79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798,
|
||||
y:256'h483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8,
|
||||
z:256'h0000000000000000000000000000000000000000000000000000000000000001};
|
||||
p2 = {x:256'h7d152c041ea8e1dc2191843d1fa9db55b68f88fef695e2c791d40444b365afc2,
|
||||
y:256'h56915849f52cc8f76f5fd7e4bf60db4a43bf633e1b1383f85fe89164bfadcbdb,
|
||||
z:256'h9075b4ee4d4788cabb49f7f81c221151fa2f68914d0aa833388fa11ff621a970};
|
||||
*/
|
||||
p1 = {x:256'h79be667ef9dcbbac55a06295ce870b07029bfcdb2dce28d959f2815b16f81798,
|
||||
y:256'h483ada7726a3c4655da4fbfc0e1108a8fd17b448a68554199c47d08ffb10d4b8,
|
||||
z:256'h1};
|
||||
p2 = {x:256'h7d152c041ea8e1dc2191843d1fa9db55b68f88fef695e2c791d40444b365afc2,
|
||||
y:256'h56915849f52cc8f76f5fd7e4bf60db4a43bf633e1b1383f85fe89164bfadcbdb,
|
||||
z:256'h9075b4ee4d4788cabb49f7f81c221151fa2f68914d0aa833388fa11ff621a970};
|
||||
|
||||
|
||||
p_exp = add_jb_point(p1, p2);
|
||||
|
||||
fork
|
||||
in_if.put_stream({p2, p1}, 256*6/8);
|
||||
out_if.get_stream(get_dat, get_len);
|
||||
join
|
||||
|
||||
p_out = get_dat;
|
||||
|
||||
$display("%d %d %d", on_curve(p1), on_curve(p2), on_curve(p_out));//, on_curve(p_temp));
|
||||
|
||||
if (p_exp != p_out) begin
|
||||
$display("Expected:");
|
||||
print_jb_point(p_exp);
|
||||
$display("Was:");
|
||||
print_jb_point(p_out);
|
||||
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
|
||||
end
|
||||
|
||||
$display("test_0 PASSED");
|
||||
end
|
||||
endtask;
|
||||
|
||||
function compare_point();
|
||||
|
||||
endfunction
|
||||
|
||||
initial begin
|
||||
out_if.rdy = 0;
|
||||
in_if.val = 0;
|
||||
#(40*CLK_PERIOD);
|
||||
|
||||
test_0();
|
||||
|
||||
#1us $finish();
|
||||
end
|
||||
endmodule
|
|
@ -79,14 +79,17 @@ begin
|
|||
integer signed get_len;
|
||||
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
|
||||
logic [255:0] in_a, in_b;
|
||||
integer start_time, finish_time;
|
||||
jb_point_t p_in, p_out;
|
||||
$display("Running test_0...");
|
||||
p_in = secp256k1_pkg::G_p;
|
||||
k_in = k;
|
||||
start_time = $time;
|
||||
fork
|
||||
in_if.put_stream(p_in, 256*3/8);
|
||||
out_if.get_stream(get_dat, get_len);
|
||||
join
|
||||
finish_time = $time;
|
||||
|
||||
p_out = get_dat;
|
||||
|
||||
|
@ -98,10 +101,11 @@ begin
|
|||
$fatal(1, "%m %t ERROR: test with k=%d was wrong", $time, integer'(k));
|
||||
end
|
||||
|
||||
$display("test with k=%d PASSED", integer'(k));
|
||||
$display("test with k=%d PASSED in %d clocks", integer'(k), (finish_time-start_time)/CLK_PERIOD);
|
||||
end
|
||||
endtask;
|
||||
|
||||
|
||||
initial begin
|
||||
out_if.rdy = 0;
|
||||
in_if.val = 0;
|
||||
|
@ -117,7 +121,15 @@ initial begin
|
|||
|
||||
test(3, {x:256'hca90ef9b06d7eb51d650e9145e3083cbd8df8759168862036f97a358f089848,
|
||||
y:256'h435afe76017b8d55d04ff8a98dd60b2ba7eb6f87f6b28182ca4493d7165dd127,
|
||||
z:256'h9242fa9c0b9f23a3bfea6a0eb6dbcfcbc4853fe9a25ee948105dc66a2a9b5baa});
|
||||
z:256'h9242fa9c0b9f23a3bfea6a0eb6dbcfcbc4853fe9a25ee948105dc66a2a9b5baa});
|
||||
|
||||
test(4, {x:256'h9bae2d5bac61e6ea5de635bca754b2564b7d78c45277cad67e45c4cbbea6e706,
|
||||
y:256'h34fb8147eed1c0fbe29ead4d6c472eb4ef7b2191fde09e494b2a9845fe3f605e,
|
||||
z:256'hc327b5d2636b32f27b051e4742b1bbd5324432c1000bfedca4368a29f6654152});
|
||||
|
||||
test(1514155, {x:256'h759267d17957f567381462db6e240b75c9f6016091a7427cfbef33c398964a9d,
|
||||
y:256'hd81ce7034647587a9b0ea5b52ac08c91f5cfae30f4eba2ade7fa68856fc0d691,
|
||||
z:256'h7c9d27fb2de7927c982792630a0c86f411f2de60e8df44c5e9caff976658009c});
|
||||
|
||||
#1us $finish();
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue