added temp variables to improve performance

This commit is contained in:
bsdevlin 2019-04-10 17:19:23 -04:00
parent b4df9a43f4
commit 64cd0c6019
1 changed files with 43 additions and 38 deletions

View File

@ -1,6 +1,6 @@
/*
This performs point addition.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
@ -17,7 +17,7 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module secp256k1_point_add
module secp256k1_point_add3
import secp256k1_pkg::*;
#(
)(
@ -46,7 +46,7 @@ module secp256k1_point_add
will trigger other equations. [] show what equations must be valid before this starts.
We reuse input points (as they are latched) when possible to reduce register usage.
Taken from https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates
U1 = X1*Z2^2
U2 = X2*Z1^2
S1 = Y1*Z2^3
@ -66,13 +66,13 @@ module secp256k1_point_add
5. A = A * i_p1.y [eq4] ... S1
6. C = C * i_p1.z mod p [eq3]
7. C = C * i_p2.y mod p [eq6] .. S2
8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H
8. B = i_p2.x - i_p1.x mod p [eq3, eq1] .. H
9. i_p2.y = C - A mod p [eq5,eq7] ... R
10. o_p.x = i_p2.y * i_p2.y mod p [eq9] ... R^2
11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2
12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3
11. D = B * B mod p [eq8] .. H^2
12. i_p2.x = D * B mod p [eq8, eq11] ..H^3
13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2
14. i_p1.x = i_p1.x*D [eq1, eq8] ..U1*H^2
15. o_p.y = i_p1.x [eq14]
16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
@ -81,16 +81,17 @@ module secp256k1_point_add
20. i_p2.x = i_p2.x * A [eq5, eq12]
21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
22. o_p.z = i_p1.z * i_p2.z mod p
23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8]
*/
23. o_p.z = o_p.z * B mod p [eq22, eq8]
*/
// We also check in the inital state if one of the inputs is "None" (.z == 0), and set the output to the other point
logic [23:0] eq_val, eq_wait;
// Temporary variables
logic [255:0] A, C;
logic [255:0] A, B, C, D;
jb_point_t i_p1_l, i_p2_l;
always_comb begin
o_mult_if.sop = 1;
o_mult_if.eop = 1;
@ -123,14 +124,16 @@ always_ff @ (posedge i_clk) begin
i_p2_l <= 0;
o_err <= 0;
A <= 0;
B <= 0;
C <= 0;
D <= 0;
end else begin
o_mult_if.ctl[7:6] <= 0; // All operations are mod p
if (o_mult_if.rdy) o_mult_if.val <= 0;
if (o_mod_if.rdy) o_mod_if.val <= 0;
case(state)
{IDLE}: begin
o_rdy <= 1;
@ -141,7 +144,9 @@ always_ff @ (posedge i_clk) begin
i_p1_l <= i_p1;
i_p2_l <= i_p2;
A <= 0;
B <= 0;
C <= 0;
D <= 0;
if (i_val && o_rdy) begin
state <= START;
o_rdy <= 0;
@ -179,7 +184,7 @@ always_ff @ (posedge i_clk) begin
default: o_err <= 1;
endcase
end
// Check any results from multiplier
if (i_mult_if.val && i_mult_if.rdy) begin
eq_val[i_mult_if.ctl[5:0]] <= 1;
@ -193,7 +198,7 @@ always_ff @ (posedge i_clk) begin
6: C <= i_mult_if.dat;
7: C <= i_mult_if.dat;
10: o_p.x <= i_mult_if.dat;
11: C <= i_mult_if.dat;
11: D <= i_mult_if.dat;
12: i_p2_l.x <= i_mult_if.dat;
14: i_p1_l.x <= i_mult_if.dat;
19: o_p.y <= i_mult_if.dat;
@ -202,8 +207,8 @@ always_ff @ (posedge i_clk) begin
23: o_p.z <= i_mult_if.dat;
default: o_err <= 1;
endcase
end
end
// Issue new multiplies
if (~eq_wait[0]) begin // 0. A = i_p2.z*i_p2.z mod p
multiply(0, i_p2_l.z, i_p2_l.z);
@ -232,14 +237,14 @@ always_ff @ (posedge i_clk) begin
if (eq_val[9] && ~eq_wait[10]) begin // 10. o_p.x = i_p2.y * i_p2.y mod p [eq9]
multiply(10, i_p2_l.y, i_p2_l.y);
end else
if (eq_val[9] && ~eq_wait[11]) begin // 11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2
multiply(11, i_p1_l.y, i_p1_l.y);
if (eq_val[8] && ~eq_wait[11]) begin // 11. C = B * B mod p [eq8] .. H^2
multiply(11, B, B);
end else
if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3
multiply(12, C, i_p1_l.y);
if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * B mod p [eq8, eq11] ..H^3
multiply(12, D, B);
end else
if (eq_val[11] && eq_val[8] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2
multiply(14, C, i_p1_l.x);
if (eq_val[1] && eq_val[11] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq1, eq11] ..U1*H^2
multiply(14, D, i_p1_l.x);
end else
if (eq_val[18] && eq_val[9] && ~eq_wait[19]) begin // 19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9]
multiply(19, o_p.y, i_p2_l.y);
@ -250,42 +255,42 @@ always_ff @ (posedge i_clk) begin
if (~eq_wait[22]) begin // 22. o_p.z = i_p1.z * i_p2.z mod p
multiply(22, i_p1_l.z, i_p2_l.z);
end else
if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8]
multiply(23, o_p.z, i_p1_l.y);
if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * B mod p [eq22, eq8]
multiply(23, o_p.z, B);
end
// Issue new modulo reductions
if (eq_val[15] && eq_val[14] && ~eq_wait[16]) begin // 16. i_p1.x = 2* i_p1.x mod p [eq15, eq14]
modulo(16, 2 * i_p1_l.x);
end
// Subtractions we do in-module
if (eq_val[1] && eq_val[3] && eq_val[5] && ~eq_wait[8]) begin //8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H
i_p1_l.y <= subtract(8, i_p2_l.x, i_p1_l.x);
if (eq_val[1] && eq_val[3] && ~eq_wait[8]) begin //8. B = i_p2.x - i_p1.x mod p [eq3, eq1] .. H
B <= subtract(8, i_p2_l.x, i_p1_l.x);
end
if (eq_val[5] && eq_val[7] && ~eq_wait[9]) begin //9. i_p2.y = C - A mod p [eq5,eq7] ... R
i_p2_l.y <= subtract(9, C, A);
end
end
if (eq_val[12] && eq_val[10] && ~eq_wait[13]) begin //13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10]
o_p.x <= subtract(13, o_p.x, i_p2_l.x);
end
end
if (eq_val[16] && eq_val[13] && ~eq_wait[17]) begin //17. o_p.x = o_p.x - i_p1.x [eq16, eq13]
o_p.x <= subtract(17, o_p.x, i_p1_l.x);
end
end
if (eq_val[17] && eq_val[15] && ~eq_wait[18]) begin //18. o_p.y = o_p.y - o_p.x mod p [eq17, eq15]
o_p.y <= subtract(18, o_p.y, o_p.x);
end
end
if (eq_val[20] && eq_val[19] && ~eq_wait[21]) begin //21. o_p.y = o_p.y - i_p2.x [eq20, eq19]
o_p.y <= subtract(21, o_p.y, i_p2_l.x);
end
end
// Assignments
if (eq_val[14] && ~eq_wait[15]) begin //15. o_p.y = i_p1.x [eq14]
eq_wait[15] <= 1;
eq_val[15] <= 1;
o_p.y <= i_p1_l.x;
end
end
if (&eq_val) begin
state <= FINISHED;
o_val <= 1;
@ -299,7 +304,7 @@ always_ff @ (posedge i_clk) begin
end
end
endcase
if (o_err) begin
o_val <= 1;
if (o_val && i_rdy) begin
@ -307,7 +312,7 @@ always_ff @ (posedge i_clk) begin
state <= IDLE;
end
end
end
end