added temp variables to improve performance

This commit is contained in:
bsdevlin 2019-04-10 17:19:23 -04:00
parent b4df9a43f4
commit 64cd0c6019
1 changed files with 43 additions and 38 deletions

View File

@ -17,7 +17,7 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module secp256k1_point_add
module secp256k1_point_add3
import secp256k1_pkg::*;
#(
)(
@ -66,13 +66,13 @@ module secp256k1_point_add
5. A = A * i_p1.y [eq4] ... S1
6. C = C * i_p1.z mod p [eq3]
7. C = C * i_p2.y mod p [eq6] .. S2
8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H
8. B = i_p2.x - i_p1.x mod p [eq3, eq1] .. H
9. i_p2.y = C - A mod p [eq5,eq7] ... R
10. o_p.x = i_p2.y * i_p2.y mod p [eq9] ... R^2
11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2
12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3
11. D = B * B mod p [eq8] .. H^2
12. i_p2.x = D * B mod p [eq8, eq11] ..H^3
13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2
14. i_p1.x = i_p1.x*D [eq1, eq8] ..U1*H^2
15. o_p.y = i_p1.x [eq14]
16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
@ -81,16 +81,17 @@ module secp256k1_point_add
20. i_p2.x = i_p2.x * A [eq5, eq12]
21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
22. o_p.z = i_p1.z * i_p2.z mod p
23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8]
23. o_p.z = o_p.z * B mod p [eq22, eq8]
*/
// We also check in the inital state if one of the inputs is "None" (.z == 0), and set the output to the other point
logic [23:0] eq_val, eq_wait;
// Temporary variables
logic [255:0] A, C;
logic [255:0] A, B, C, D;
jb_point_t i_p1_l, i_p2_l;
always_comb begin
o_mult_if.sop = 1;
o_mult_if.eop = 1;
@ -123,7 +124,9 @@ always_ff @ (posedge i_clk) begin
i_p2_l <= 0;
o_err <= 0;
A <= 0;
B <= 0;
C <= 0;
D <= 0;
end else begin
o_mult_if.ctl[7:6] <= 0; // All operations are mod p
@ -141,7 +144,9 @@ always_ff @ (posedge i_clk) begin
i_p1_l <= i_p1;
i_p2_l <= i_p2;
A <= 0;
B <= 0;
C <= 0;
D <= 0;
if (i_val && o_rdy) begin
state <= START;
o_rdy <= 0;
@ -193,7 +198,7 @@ always_ff @ (posedge i_clk) begin
6: C <= i_mult_if.dat;
7: C <= i_mult_if.dat;
10: o_p.x <= i_mult_if.dat;
11: C <= i_mult_if.dat;
11: D <= i_mult_if.dat;
12: i_p2_l.x <= i_mult_if.dat;
14: i_p1_l.x <= i_mult_if.dat;
19: o_p.y <= i_mult_if.dat;
@ -232,14 +237,14 @@ always_ff @ (posedge i_clk) begin
if (eq_val[9] && ~eq_wait[10]) begin // 10. o_p.x = i_p2.y * i_p2.y mod p [eq9]
multiply(10, i_p2_l.y, i_p2_l.y);
end else
if (eq_val[9] && ~eq_wait[11]) begin // 11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2
multiply(11, i_p1_l.y, i_p1_l.y);
if (eq_val[8] && ~eq_wait[11]) begin // 11. C = B * B mod p [eq8] .. H^2
multiply(11, B, B);
end else
if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3
multiply(12, C, i_p1_l.y);
if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * B mod p [eq8, eq11] ..H^3
multiply(12, D, B);
end else
if (eq_val[11] && eq_val[8] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2
multiply(14, C, i_p1_l.x);
if (eq_val[1] && eq_val[11] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq1, eq11] ..U1*H^2
multiply(14, D, i_p1_l.x);
end else
if (eq_val[18] && eq_val[9] && ~eq_wait[19]) begin // 19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9]
multiply(19, o_p.y, i_p2_l.y);
@ -250,8 +255,8 @@ always_ff @ (posedge i_clk) begin
if (~eq_wait[22]) begin // 22. o_p.z = i_p1.z * i_p2.z mod p
multiply(22, i_p1_l.z, i_p2_l.z);
end else
if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8]
multiply(23, o_p.z, i_p1_l.y);
if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * B mod p [eq22, eq8]
multiply(23, o_p.z, B);
end
// Issue new modulo reductions
@ -260,8 +265,8 @@ always_ff @ (posedge i_clk) begin
end
// Subtractions we do in-module
if (eq_val[1] && eq_val[3] && eq_val[5] && ~eq_wait[8]) begin //8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H
i_p1_l.y <= subtract(8, i_p2_l.x, i_p1_l.x);
if (eq_val[1] && eq_val[3] && ~eq_wait[8]) begin //8. B = i_p2.x - i_p1.x mod p [eq3, eq1] .. H
B <= subtract(8, i_p2_l.x, i_p1_l.x);
end
if (eq_val[5] && eq_val[7] && ~eq_wait[9]) begin //9. i_p2.y = C - A mod p [eq5,eq7] ... R
i_p2_l.y <= subtract(9, C, A);