added temp variables to improve performance

This commit is contained in:
bsdevlin 2019-04-10 17:19:23 -04:00
parent b4df9a43f4
commit 64cd0c6019
1 changed files with 43 additions and 38 deletions

View File

@ -1,6 +1,6 @@
/* /*
This performs point addition. This performs point addition.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
@ -17,7 +17,7 @@
along with this program. If not, see <https://www.gnu.org/licenses/>. along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
module secp256k1_point_add module secp256k1_point_add3
import secp256k1_pkg::*; import secp256k1_pkg::*;
#( #(
)( )(
@ -46,7 +46,7 @@ module secp256k1_point_add
will trigger other equations. [] show what equations must be valid before this starts. will trigger other equations. [] show what equations must be valid before this starts.
We reuse input points (as they are latched) when possible to reduce register usage. We reuse input points (as they are latched) when possible to reduce register usage.
Taken from https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates Taken from https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates
U1 = X1*Z2^2 U1 = X1*Z2^2
U2 = X2*Z1^2 U2 = X2*Z1^2
S1 = Y1*Z2^3 S1 = Y1*Z2^3
@ -66,13 +66,13 @@ module secp256k1_point_add
5. A = A * i_p1.y [eq4] ... S1 5. A = A * i_p1.y [eq4] ... S1
6. C = C * i_p1.z mod p [eq3] 6. C = C * i_p1.z mod p [eq3]
7. C = C * i_p2.y mod p [eq6] .. S2 7. C = C * i_p2.y mod p [eq6] .. S2
8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H 8. B = i_p2.x - i_p1.x mod p [eq3, eq1] .. H
9. i_p2.y = C - A mod p [eq5,eq7] ... R 9. i_p2.y = C - A mod p [eq5,eq7] ... R
10. o_p.x = i_p2.y * i_p2.y mod p [eq9] ... R^2 10. o_p.x = i_p2.y * i_p2.y mod p [eq9] ... R^2
11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2 11. D = B * B mod p [eq8] .. H^2
12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3 12. i_p2.x = D * B mod p [eq8, eq11] ..H^3
13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10] 13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2 14. i_p1.x = i_p1.x*D [eq1, eq8] ..U1*H^2
15. o_p.y = i_p1.x [eq14] 15. o_p.y = i_p1.x [eq14]
16. i_p1.x = 2* i_p1.x mod p [eq15, eq14] 16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
17. o_p.x = o_p.x - i_p1.x [eq16, eq13] 17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
@ -81,16 +81,17 @@ module secp256k1_point_add
20. i_p2.x = i_p2.x * A [eq5, eq12] 20. i_p2.x = i_p2.x * A [eq5, eq12]
21. o_p.y = o_p.y - i_p2.x [eq20, eq19] 21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
22. o_p.z = i_p1.z * i_p2.z mod p 22. o_p.z = i_p1.z * i_p2.z mod p
23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8] 23. o_p.z = o_p.z * B mod p [eq22, eq8]
*/ */
// We also check in the inital state if one of the inputs is "None" (.z == 0), and set the output to the other point // We also check in the inital state if one of the inputs is "None" (.z == 0), and set the output to the other point
logic [23:0] eq_val, eq_wait; logic [23:0] eq_val, eq_wait;
// Temporary variables // Temporary variables
logic [255:0] A, C; logic [255:0] A, B, C, D;
jb_point_t i_p1_l, i_p2_l; jb_point_t i_p1_l, i_p2_l;
always_comb begin always_comb begin
o_mult_if.sop = 1; o_mult_if.sop = 1;
o_mult_if.eop = 1; o_mult_if.eop = 1;
@ -123,14 +124,16 @@ always_ff @ (posedge i_clk) begin
i_p2_l <= 0; i_p2_l <= 0;
o_err <= 0; o_err <= 0;
A <= 0; A <= 0;
B <= 0;
C <= 0; C <= 0;
D <= 0;
end else begin end else begin
o_mult_if.ctl[7:6] <= 0; // All operations are mod p o_mult_if.ctl[7:6] <= 0; // All operations are mod p
if (o_mult_if.rdy) o_mult_if.val <= 0; if (o_mult_if.rdy) o_mult_if.val <= 0;
if (o_mod_if.rdy) o_mod_if.val <= 0; if (o_mod_if.rdy) o_mod_if.val <= 0;
case(state) case(state)
{IDLE}: begin {IDLE}: begin
o_rdy <= 1; o_rdy <= 1;
@ -141,7 +144,9 @@ always_ff @ (posedge i_clk) begin
i_p1_l <= i_p1; i_p1_l <= i_p1;
i_p2_l <= i_p2; i_p2_l <= i_p2;
A <= 0; A <= 0;
B <= 0;
C <= 0; C <= 0;
D <= 0;
if (i_val && o_rdy) begin if (i_val && o_rdy) begin
state <= START; state <= START;
o_rdy <= 0; o_rdy <= 0;
@ -179,7 +184,7 @@ always_ff @ (posedge i_clk) begin
default: o_err <= 1; default: o_err <= 1;
endcase endcase
end end
// Check any results from multiplier // Check any results from multiplier
if (i_mult_if.val && i_mult_if.rdy) begin if (i_mult_if.val && i_mult_if.rdy) begin
eq_val[i_mult_if.ctl[5:0]] <= 1; eq_val[i_mult_if.ctl[5:0]] <= 1;
@ -193,7 +198,7 @@ always_ff @ (posedge i_clk) begin
6: C <= i_mult_if.dat; 6: C <= i_mult_if.dat;
7: C <= i_mult_if.dat; 7: C <= i_mult_if.dat;
10: o_p.x <= i_mult_if.dat; 10: o_p.x <= i_mult_if.dat;
11: C <= i_mult_if.dat; 11: D <= i_mult_if.dat;
12: i_p2_l.x <= i_mult_if.dat; 12: i_p2_l.x <= i_mult_if.dat;
14: i_p1_l.x <= i_mult_if.dat; 14: i_p1_l.x <= i_mult_if.dat;
19: o_p.y <= i_mult_if.dat; 19: o_p.y <= i_mult_if.dat;
@ -202,8 +207,8 @@ always_ff @ (posedge i_clk) begin
23: o_p.z <= i_mult_if.dat; 23: o_p.z <= i_mult_if.dat;
default: o_err <= 1; default: o_err <= 1;
endcase endcase
end end
// Issue new multiplies // Issue new multiplies
if (~eq_wait[0]) begin // 0. A = i_p2.z*i_p2.z mod p if (~eq_wait[0]) begin // 0. A = i_p2.z*i_p2.z mod p
multiply(0, i_p2_l.z, i_p2_l.z); multiply(0, i_p2_l.z, i_p2_l.z);
@ -232,14 +237,14 @@ always_ff @ (posedge i_clk) begin
if (eq_val[9] && ~eq_wait[10]) begin // 10. o_p.x = i_p2.y * i_p2.y mod p [eq9] if (eq_val[9] && ~eq_wait[10]) begin // 10. o_p.x = i_p2.y * i_p2.y mod p [eq9]
multiply(10, i_p2_l.y, i_p2_l.y); multiply(10, i_p2_l.y, i_p2_l.y);
end else end else
if (eq_val[9] && ~eq_wait[11]) begin // 11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2 if (eq_val[8] && ~eq_wait[11]) begin // 11. C = B * B mod p [eq8] .. H^2
multiply(11, i_p1_l.y, i_p1_l.y); multiply(11, B, B);
end else end else
if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3 if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * B mod p [eq8, eq11] ..H^3
multiply(12, C, i_p1_l.y); multiply(12, D, B);
end else end else
if (eq_val[11] && eq_val[8] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2 if (eq_val[1] && eq_val[11] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq1, eq11] ..U1*H^2
multiply(14, C, i_p1_l.x); multiply(14, D, i_p1_l.x);
end else end else
if (eq_val[18] && eq_val[9] && ~eq_wait[19]) begin // 19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9] if (eq_val[18] && eq_val[9] && ~eq_wait[19]) begin // 19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9]
multiply(19, o_p.y, i_p2_l.y); multiply(19, o_p.y, i_p2_l.y);
@ -250,42 +255,42 @@ always_ff @ (posedge i_clk) begin
if (~eq_wait[22]) begin // 22. o_p.z = i_p1.z * i_p2.z mod p if (~eq_wait[22]) begin // 22. o_p.z = i_p1.z * i_p2.z mod p
multiply(22, i_p1_l.z, i_p2_l.z); multiply(22, i_p1_l.z, i_p2_l.z);
end else end else
if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8] if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * B mod p [eq22, eq8]
multiply(23, o_p.z, i_p1_l.y); multiply(23, o_p.z, B);
end end
// Issue new modulo reductions // Issue new modulo reductions
if (eq_val[15] && eq_val[14] && ~eq_wait[16]) begin // 16. i_p1.x = 2* i_p1.x mod p [eq15, eq14] if (eq_val[15] && eq_val[14] && ~eq_wait[16]) begin // 16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
modulo(16, 2 * i_p1_l.x); modulo(16, 2 * i_p1_l.x);
end end
// Subtractions we do in-module // Subtractions we do in-module
if (eq_val[1] && eq_val[3] && eq_val[5] && ~eq_wait[8]) begin //8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H if (eq_val[1] && eq_val[3] && ~eq_wait[8]) begin //8. B = i_p2.x - i_p1.x mod p [eq3, eq1] .. H
i_p1_l.y <= subtract(8, i_p2_l.x, i_p1_l.x); B <= subtract(8, i_p2_l.x, i_p1_l.x);
end end
if (eq_val[5] && eq_val[7] && ~eq_wait[9]) begin //9. i_p2.y = C - A mod p [eq5,eq7] ... R if (eq_val[5] && eq_val[7] && ~eq_wait[9]) begin //9. i_p2.y = C - A mod p [eq5,eq7] ... R
i_p2_l.y <= subtract(9, C, A); i_p2_l.y <= subtract(9, C, A);
end end
if (eq_val[12] && eq_val[10] && ~eq_wait[13]) begin //13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10] if (eq_val[12] && eq_val[10] && ~eq_wait[13]) begin //13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
o_p.x <= subtract(13, o_p.x, i_p2_l.x); o_p.x <= subtract(13, o_p.x, i_p2_l.x);
end end
if (eq_val[16] && eq_val[13] && ~eq_wait[17]) begin //17. o_p.x = o_p.x - i_p1.x [eq16, eq13] if (eq_val[16] && eq_val[13] && ~eq_wait[17]) begin //17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
o_p.x <= subtract(17, o_p.x, i_p1_l.x); o_p.x <= subtract(17, o_p.x, i_p1_l.x);
end end
if (eq_val[17] && eq_val[15] && ~eq_wait[18]) begin //18. o_p.y = o_p.y - o_p.x mod p [eq17, eq15] if (eq_val[17] && eq_val[15] && ~eq_wait[18]) begin //18. o_p.y = o_p.y - o_p.x mod p [eq17, eq15]
o_p.y <= subtract(18, o_p.y, o_p.x); o_p.y <= subtract(18, o_p.y, o_p.x);
end end
if (eq_val[20] && eq_val[19] && ~eq_wait[21]) begin //21. o_p.y = o_p.y - i_p2.x [eq20, eq19] if (eq_val[20] && eq_val[19] && ~eq_wait[21]) begin //21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
o_p.y <= subtract(21, o_p.y, i_p2_l.x); o_p.y <= subtract(21, o_p.y, i_p2_l.x);
end end
// Assignments // Assignments
if (eq_val[14] && ~eq_wait[15]) begin //15. o_p.y = i_p1.x [eq14] if (eq_val[14] && ~eq_wait[15]) begin //15. o_p.y = i_p1.x [eq14]
eq_wait[15] <= 1; eq_wait[15] <= 1;
eq_val[15] <= 1; eq_val[15] <= 1;
o_p.y <= i_p1_l.x; o_p.y <= i_p1_l.x;
end end
if (&eq_val) begin if (&eq_val) begin
state <= FINISHED; state <= FINISHED;
o_val <= 1; o_val <= 1;
@ -299,7 +304,7 @@ always_ff @ (posedge i_clk) begin
end end
end end
endcase endcase
if (o_err) begin if (o_err) begin
o_val <= 1; o_val <= 1;
if (o_val && i_rdy) begin if (o_val && i_rdy) begin
@ -307,7 +312,7 @@ always_ff @ (posedge i_clk) begin
state <= IDLE; state <= IDLE;
end end
end end
end end
end end