From 64cd0c6019e9ded2d911b5cd7f334bac05129b44 Mon Sep 17 00:00:00 2001 From: bsdevlin Date: Wed, 10 Apr 2019 17:19:23 -0400 Subject: [PATCH] added temp variables to improve performance --- .../src/rtl/secp256k1/secp256k1_point_add.sv | 81 ++++++++++--------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/zcash_fpga/src/rtl/secp256k1/secp256k1_point_add.sv b/zcash_fpga/src/rtl/secp256k1/secp256k1_point_add.sv index 30e8a10..6d02ae0 100644 --- a/zcash_fpga/src/rtl/secp256k1/secp256k1_point_add.sv +++ b/zcash_fpga/src/rtl/secp256k1/secp256k1_point_add.sv @@ -1,6 +1,6 @@ /* This performs point addition. - + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation This program is free software: you can redistribute it and/or modify @@ -17,7 +17,7 @@ along with this program. If not, see . */ -module secp256k1_point_add +module secp256k1_point_add3 import secp256k1_pkg::*; #( )( @@ -46,7 +46,7 @@ module secp256k1_point_add will trigger other equations. [] show what equations must be valid before this starts. We reuse input points (as they are latched) when possible to reduce register usage. Taken from https://en.wikibooks.org/wiki/Cryptography/Prime_Curve/Jacobian_Coordinates - + U1 = X1*Z2^2 U2 = X2*Z1^2 S1 = Y1*Z2^3 @@ -66,13 +66,13 @@ module secp256k1_point_add 5. A = A * i_p1.y [eq4] ... S1 6. C = C * i_p1.z mod p [eq3] 7. C = C * i_p2.y mod p [eq6] .. S2 - 8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H + 8. B = i_p2.x - i_p1.x mod p [eq3, eq1] .. H 9. i_p2.y = C - A mod p [eq5,eq7] ... R 10. o_p.x = i_p2.y * i_p2.y mod p [eq9] ... R^2 - 11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2 - 12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3 + 11. D = B * B mod p [eq8] .. H^2 + 12. i_p2.x = D * B mod p [eq8, eq11] ..H^3 13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10] - 14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2 + 14. i_p1.x = i_p1.x*D [eq1, eq8] ..U1*H^2 15. o_p.y = i_p1.x [eq14] 16. i_p1.x = 2* i_p1.x mod p [eq15, eq14] 17. o_p.x = o_p.x - i_p1.x [eq16, eq13] @@ -81,16 +81,17 @@ module secp256k1_point_add 20. i_p2.x = i_p2.x * A [eq5, eq12] 21. o_p.y = o_p.y - i_p2.x [eq20, eq19] 22. o_p.z = i_p1.z * i_p2.z mod p - 23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8] - */ - + 23. o_p.z = o_p.z * B mod p [eq22, eq8] + */ + // We also check in the inital state if one of the inputs is "None" (.z == 0), and set the output to the other point logic [23:0] eq_val, eq_wait; // Temporary variables -logic [255:0] A, C; +logic [255:0] A, B, C, D; jb_point_t i_p1_l, i_p2_l; + always_comb begin o_mult_if.sop = 1; o_mult_if.eop = 1; @@ -123,14 +124,16 @@ always_ff @ (posedge i_clk) begin i_p2_l <= 0; o_err <= 0; A <= 0; + B <= 0; C <= 0; + D <= 0; end else begin - + o_mult_if.ctl[7:6] <= 0; // All operations are mod p if (o_mult_if.rdy) o_mult_if.val <= 0; if (o_mod_if.rdy) o_mod_if.val <= 0; - + case(state) {IDLE}: begin o_rdy <= 1; @@ -141,7 +144,9 @@ always_ff @ (posedge i_clk) begin i_p1_l <= i_p1; i_p2_l <= i_p2; A <= 0; + B <= 0; C <= 0; + D <= 0; if (i_val && o_rdy) begin state <= START; o_rdy <= 0; @@ -179,7 +184,7 @@ always_ff @ (posedge i_clk) begin default: o_err <= 1; endcase end - + // Check any results from multiplier if (i_mult_if.val && i_mult_if.rdy) begin eq_val[i_mult_if.ctl[5:0]] <= 1; @@ -193,7 +198,7 @@ always_ff @ (posedge i_clk) begin 6: C <= i_mult_if.dat; 7: C <= i_mult_if.dat; 10: o_p.x <= i_mult_if.dat; - 11: C <= i_mult_if.dat; + 11: D <= i_mult_if.dat; 12: i_p2_l.x <= i_mult_if.dat; 14: i_p1_l.x <= i_mult_if.dat; 19: o_p.y <= i_mult_if.dat; @@ -202,8 +207,8 @@ always_ff @ (posedge i_clk) begin 23: o_p.z <= i_mult_if.dat; default: o_err <= 1; endcase - end - + end + // Issue new multiplies if (~eq_wait[0]) begin // 0. A = i_p2.z*i_p2.z mod p multiply(0, i_p2_l.z, i_p2_l.z); @@ -232,14 +237,14 @@ always_ff @ (posedge i_clk) begin if (eq_val[9] && ~eq_wait[10]) begin // 10. o_p.x = i_p2.y * i_p2.y mod p [eq9] multiply(10, i_p2_l.y, i_p2_l.y); end else - if (eq_val[9] && ~eq_wait[11]) begin // 11. C = i_p1.y * i_p1.y mod p [eq9] .. H^2 - multiply(11, i_p1_l.y, i_p1_l.y); + if (eq_val[8] && ~eq_wait[11]) begin // 11. C = B * B mod p [eq8] .. H^2 + multiply(11, B, B); end else - if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * i_p1.y mod p [eq8, eq11] ..H^3 - multiply(12, C, i_p1_l.y); + if (eq_val[11] && eq_val[8] && ~eq_wait[12]) begin // 12. i_p2.x = C * B mod p [eq8, eq11] ..H^3 + multiply(12, D, B); end else - if (eq_val[11] && eq_val[8] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq11, eq8] ..U1*H^2 - multiply(14, C, i_p1_l.x); + if (eq_val[1] && eq_val[11] && ~eq_wait[14]) begin // 14. i_p1.x = i_p1.x*C [eq1, eq11] ..U1*H^2 + multiply(14, D, i_p1_l.x); end else if (eq_val[18] && eq_val[9] && ~eq_wait[19]) begin // 19. o_p.y = o_p.y * i_p2.y mod p [eq18, eq9] multiply(19, o_p.y, i_p2_l.y); @@ -250,42 +255,42 @@ always_ff @ (posedge i_clk) begin if (~eq_wait[22]) begin // 22. o_p.z = i_p1.z * i_p2.z mod p multiply(22, i_p1_l.z, i_p2_l.z); end else - if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * i_p1.y mod p [eq22, eq8] - multiply(23, o_p.z, i_p1_l.y); + if (eq_val[8] && eq_val[22] && ~eq_wait[23]) begin // 23. o_p.z = o_p.z * B mod p [eq22, eq8] + multiply(23, o_p.z, B); end - + // Issue new modulo reductions if (eq_val[15] && eq_val[14] && ~eq_wait[16]) begin // 16. i_p1.x = 2* i_p1.x mod p [eq15, eq14] modulo(16, 2 * i_p1_l.x); end - + // Subtractions we do in-module - if (eq_val[1] && eq_val[3] && eq_val[5] && ~eq_wait[8]) begin //8. i_p1.y = i_p2.x - i_p1.x mod p [eq3, eq1, eq5] .. H - i_p1_l.y <= subtract(8, i_p2_l.x, i_p1_l.x); + if (eq_val[1] && eq_val[3] && ~eq_wait[8]) begin //8. B = i_p2.x - i_p1.x mod p [eq3, eq1] .. H + B <= subtract(8, i_p2_l.x, i_p1_l.x); end if (eq_val[5] && eq_val[7] && ~eq_wait[9]) begin //9. i_p2.y = C - A mod p [eq5,eq7] ... R i_p2_l.y <= subtract(9, C, A); - end + end if (eq_val[12] && eq_val[10] && ~eq_wait[13]) begin //13. o_p.x = o_p.x - i_p2.x mod p [eq12, eq10] o_p.x <= subtract(13, o_p.x, i_p2_l.x); - end + end if (eq_val[16] && eq_val[13] && ~eq_wait[17]) begin //17. o_p.x = o_p.x - i_p1.x [eq16, eq13] o_p.x <= subtract(17, o_p.x, i_p1_l.x); - end + end if (eq_val[17] && eq_val[15] && ~eq_wait[18]) begin //18. o_p.y = o_p.y - o_p.x mod p [eq17, eq15] o_p.y <= subtract(18, o_p.y, o_p.x); - end + end if (eq_val[20] && eq_val[19] && ~eq_wait[21]) begin //21. o_p.y = o_p.y - i_p2.x [eq20, eq19] o_p.y <= subtract(21, o_p.y, i_p2_l.x); - end + end // Assignments if (eq_val[14] && ~eq_wait[15]) begin //15. o_p.y = i_p1.x [eq14] eq_wait[15] <= 1; eq_val[15] <= 1; o_p.y <= i_p1_l.x; - end - + end + if (&eq_val) begin state <= FINISHED; o_val <= 1; @@ -299,7 +304,7 @@ always_ff @ (posedge i_clk) begin end end endcase - + if (o_err) begin o_val <= 1; if (o_val && i_rdy) begin @@ -307,7 +312,7 @@ always_ff @ (posedge i_clk) begin state <= IDLE; end end - + end end