diff --git a/ip_cores/util/src/rtl/karatsuba_ofman_mult.sv b/ip_cores/util/src/rtl/karatsuba_ofman_mult.sv new file mode 100644 index 0000000..9ece9e5 --- /dev/null +++ b/ip_cores/util/src/rtl/karatsuba_ofman_mult.sv @@ -0,0 +1,110 @@ +/* + Multiplication using Karatsuba-Ofman algorithm. + + Multiple of these can be instantiated, each one takes 2 clocks cycles + per level. + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +module karatsuba_ofman_mult # ( + parameter BITS = 256, + parameter LEVEL = 1 +) ( + input i_clk, + input [BITS-1:0] i_dat_a, + input [BITS-1:0] i_dat_b, + output logic [BITS*2-1:0] o_dat +); + +localparam HBITS = BITS/2; + +logic [BITS-1:0] m0, m1, m2; +logic [BITS*2-1:0] q; +logic [HBITS-1:0] a0, a1; +logic sign, sign_; + +generate + always_comb begin + a0 = i_dat_a[0 +: HBITS] > i_dat_a[HBITS +: HBITS] ? i_dat_a[0 +: HBITS] - i_dat_a[HBITS +: HBITS] : i_dat_a[HBITS +: HBITS] - i_dat_a[0 +: HBITS]; + a1 = i_dat_b[HBITS +: HBITS] > i_dat_b[0 +: HBITS] ? i_dat_b[HBITS +: HBITS] - i_dat_b[0 +: HBITS] : i_dat_b[0 +: HBITS] - i_dat_b[HBITS +: HBITS]; + sign_ = ((i_dat_a[0 +: HBITS] < i_dat_a[HBITS +: HBITS]) ^ + (i_dat_b[HBITS +: HBITS] < i_dat_b[0 +: HBITS])); + q = (m0 << BITS) + ((m0 + m2 + (sign == 1 ? -m1 : m1)) << HBITS) + m2; + end + + if (LEVEL == 1) begin: GEN_REC + always_comb begin + m0 = i_dat_a[HBITS +: HBITS] * i_dat_b[HBITS +: HBITS]; + m2 = i_dat_a[0 +: HBITS] * i_dat_b[0 +: HBITS]; + m1 = (a0 * a1); + sign = sign_; + end + + end else begin + // pipeline the other non-mult values x clock cycles and add them after multipliers + logic [LEVEL-2:0] sign_r; + + always_comb begin + sign = sign_r[LEVEL-2]; + end + + always_ff @ (posedge i_clk) begin + sign_r <= {sign_r, sign_}; + end + + karatsuba_ofman_mult # ( + .BITS ( HBITS ), + .LEVEL( LEVEL-1 ) + ) + karatsuba_ofman_mult_m0 ( + .i_clk ( i_clk ), + .i_dat_a ( i_dat_a[HBITS +: HBITS] ), + .i_dat_b ( i_dat_b[HBITS +: HBITS] ), + .o_dat ( m0 ) + ); + + karatsuba_ofman_mult # ( + .BITS ( HBITS ), + .LEVEL( LEVEL-1 ) + ) + karatsuba_ofman_mult_m2 ( + .i_clk ( i_clk ), + .i_dat_a ( i_dat_a[0 +: HBITS] ), + .i_dat_b ( i_dat_b[0 +: HBITS] ), + .o_dat ( m2 ) + ); + + karatsuba_ofman_mult # ( + .BITS ( HBITS ), + .LEVEL( LEVEL-1 ) + ) + karatsuba_ofman_mult_m1 ( + .i_clk ( i_clk ), + .i_dat_a ( a0 ), + .i_dat_b ( a1 ), + .o_dat ( m1 ) + ); + + + end +endgenerate + +always_ff @ (posedge i_clk) begin + o_dat <= q; +end + +endmodule \ No newline at end of file diff --git a/ip_cores/util/src/tb/karatsuba_ofman_mult_tb.sv b/ip_cores/util/src/tb/karatsuba_ofman_mult_tb.sv new file mode 100644 index 0000000..a8c5488 --- /dev/null +++ b/ip_cores/util/src/tb/karatsuba_ofman_mult_tb.sv @@ -0,0 +1,119 @@ +/* + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +`timescale 1ps/1ps + +module karatsuba_ofman_mult_tb (); +import common_pkg::*; +import secp256k1_pkg::*; + +localparam CLK_PERIOD = 100; + +logic clk, rst; + +if_axi_stream #(.DAT_BYTS(512/8)) in_if(clk); +if_axi_stream #(.DAT_BYTS(512/8)) out_if(clk); + +logic [511:0] test; +initial begin + rst = 0; + repeat(2) #(20*CLK_PERIOD) rst = ~rst; +end + +initial begin + clk = 0; + forever #CLK_PERIOD clk = ~clk; +end + +always_comb begin + out_if.sop = 1; + out_if.eop = 1; + out_if.ctl = 0; + out_if.mod = 0; +end + +// Check for errors +always_ff @ (posedge clk) + if (out_if.val && out_if.err) + $error(1, "%m %t ERROR: output .err asserted", $time); + +localparam LEVEL = 3; +logic [LEVEL-1:0] val; + +karatsuba_ofman_mult # ( + .BITS (256), + .LEVEL (LEVEL) +) +karatsuba_ofman_mult ( + .i_clk ( clk ), + .i_dat_a( in_if.dat[0 +: 256] ), + .i_dat_b( in_if.dat[256 +: 256] ), + .o_dat ( out_if.dat ) +); + +always_ff @ (posedge clk) begin + if (rst) begin + val <= 0; + end else begin + val <= {val, in_if.val}; + end +end + +always_comb begin + out_if.val = val[LEVEL-1]; + in_if.rdy = out_if.rdy; +end + +task test_loop(); +begin + integer signed get_len; + logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat; + logic [255:0] in_a, in_b; + integer i, max; + + $display("Running test_loop..."); + i = 0; + max = 10000; + + while (i < max) begin + in_a = random_vector(256/8); + in_b = random_vector(256/8); + expected = (in_a * in_b); + + fork + in_if.put_stream({in_b, in_a}, 512/8); + out_if.get_stream(get_dat, get_len); + join + + common_pkg::compare_and_print(get_dat, expected); + $display("test_loop PASSED loop %d/%d", i, max); + i = i + 1; + end + + $display("test_loop PASSED"); +end +endtask; + +initial begin + out_if.rdy = 0; + in_if.val = 0; + #(40*CLK_PERIOD); + + test_loop(); + + #1us $finish(); +end +endmodule \ No newline at end of file diff --git a/zcash_fpga/src/rtl/secp256k1/secp256k1_mod.sv b/zcash_fpga/src/rtl/secp256k1/secp256k1_mod.sv index 76d15e9..f1364a4 100644 --- a/zcash_fpga/src/rtl/secp256k1/secp256k1_mod.sv +++ b/zcash_fpga/src/rtl/secp256k1/secp256k1_mod.sv @@ -5,9 +5,6 @@ p = 2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1 - Implemented with 2 stages of 8x 256b adds and one final optional - subtract in the case we are >= p. - returns o_dat = i_dat % p, where i_dat < p^2 Copyright (C) 2019 Benjamin Devlin and Zcash Foundation @@ -26,11 +23,14 @@ along with this program. If not, see . */ -module secp256k1_mod ( +module secp256k1_mod #( + parameter USE_MULT = 0 // Set to 1 to use multiple operation (should infer DSP and use less LUTs) +)( input i_clk, i_rst, // Input value input [256*2-1:0] i_dat, input i_val, + input i_err, output logic o_rdy, // output output logic [255:0] o_dat, @@ -41,53 +41,49 @@ module secp256k1_mod ( import secp256k1_pkg::*; -logic [256*2-1:0] b, a, a_; +logic [256*2-1:0] res0, res1; +logic [1:0] val, err; -always_comb begin - a_ = (a << 32) + (a << 9) + (a << 8) + (a << 7) + (a << 6) + (a << 4) + a + b; -end +generate + if (USE_MULT == 1) begin: GEN_MULT + logic [256*2-1:0] c; + always_comb begin + c = (1 << 32) + (1 << 9) + (1 << 8) + (1 << 7) + (1 << 6) + (1 << 4) + 1; + end + always_ff @ (posedge i_clk) begin + res0 <= i_dat[511:256]*c + i_dat[255:0]; + res1 <= res0[511:256]*c + res0[255:0]; + end + end else begin + logic [256*2-1:0] res0_, res1_; + always_comb begin + res0_ = (i_dat[511:256] << 32) + (i_dat[511:256] << 9) + (i_dat[511:256] << 8) + (i_dat[511:256] << 7) + (i_dat[511:256] << 6) + (i_dat[511:256] << 4) + i_dat[511:256]+ i_dat[255:0]; + res1_ = (res0[511:256] << 32) + (res0[511:256] << 9) + (res0[511:256] << 8) + (res0[511:256] << 7) + (res0[511:256] << 6) + (res0[511:256] << 4) + res0[511:256]+ res0[255:0]; + end + always_ff @ (posedge i_clk) begin + res0 <= res0_; + res1 <= res1_; + end + end +endgenerate -enum {IDLE, S1, S2} state; +always_comb o_rdy = i_rdy; always_ff @ (posedge i_clk) begin if (i_rst) begin - a <= 0; - b <= 0; - state <= IDLE; + val <= 0; + err <= 0; o_val <= 0; - o_rdy <= 0; o_err <= 0; end else begin - o_rdy <= 0; - o_dat <= a_ >= p_eq ? (a_ - p_eq) : a_; - - case(state) - IDLE: begin - o_rdy <= 1; - o_val <= 0; - if (i_val && o_rdy) begin - a <= i_dat[511:256]; - b <= i_dat[255:0]; - o_rdy <= 0; - state <= S1; - end - end - S1: begin - a <= a_[511:256]; - b <= a_[255:0]; - state <= S2; - end - S2: begin - o_val <= 1; - o_err <= a_ >= 2* p_eq; - if (o_val && i_rdy) begin - state <=IDLE; - o_rdy <= 1; - o_val <= 0; - o_err <= 0; - end - end - endcase + o_val <= 0; + val <= val << 1; + err <= err << 1; + val[0] <= i_val; + err[0] <= i_err; + o_dat <= res1 >= p_eq ? res1 - p_eq : res1; + o_err <= err[1] || (res1 >= 2*p_eq); + o_val <= val[1]; end end diff --git a/zcash_fpga/src/rtl/secp256k1/secp256k1_mult_mod.sv b/zcash_fpga/src/rtl/secp256k1/secp256k1_mult_mod.sv new file mode 100644 index 0000000..26d6f06 --- /dev/null +++ b/zcash_fpga/src/rtl/secp256k1/secp256k1_mult_mod.sv @@ -0,0 +1,99 @@ +/* + This performs a 256 bit multiplication followed by modulus + operation. + + Using Karatsuba-Ofman multiplication, where the factor of splitting + is parameterized. + + Each level in Karatsuba-Ofman multiplication adds 1 clock cycle. + The modulus reduction takes 3 clock cycles. + + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + */ + +module secp256k1_mult_mod ( + input i_clk, i_rst, + // Input value + input [255:0] i_dat_a, + input [255:0] i_dat_b, + input i_val, + input i_err, + output logic o_rdy, + // output + output logic [255:0] o_dat, + input i_rdy, + output logic o_val, + output logic o_err +); + +import secp256k1_pkg::*; +import common_pkg::*; + +localparam KARATSUBA_LEVEL = 3; +if_axi_stream #(.DAT_BYTS(512/8)) int_if(i_clk); + +always_comb o_rdy = int_if.rdy; + +logic [KARATSUBA_LEVEL-1:0] val, err; + +karatsuba_ofman_mult # ( + .BITS ( 256 ), + .LEVEL ( KARATSUBA_LEVEL ) +) +karatsuba_ofman_mult ( + .i_clk ( i_clk ), + .i_dat_a( i_dat_a ), + .i_dat_b( i_dat_b ), + .o_dat ( int_if.dat ) +); + +always_ff @ (posedge i_clk) begin + if (i_rst) begin + val <= 0; + err <= 0; + end else begin + val <= {val, i_val}; + err <= {err, i_err}; + end +end + +always_comb begin + int_if.val = val[KARATSUBA_LEVEL-1]; + int_if.err = err[KARATSUBA_LEVEL-1]; + int_if.mod = 0; + int_if.sop = 0; + int_if.eop = 0; + int_if.ctl = 0; +end + +secp256k1_mod #( + .USE_MULT ( 0 ) +) +secp256k1_mod ( + .i_clk( i_clk ), + .i_rst( i_rst ), + .i_dat( int_if.dat ), + .i_val( int_if.val ), + .i_err( int_if.err ), + .o_rdy( int_if.rdy ), + .o_dat( o_dat ), + .o_err( o_err ), + .i_rdy( i_rdy ), + .o_val( o_val ) +); + + +endmodule diff --git a/zcash_fpga/src/tb/secp256k1_mod_tb.sv b/zcash_fpga/src/tb/secp256k1_mod_tb.sv index f95e88d..a19d5a2 100644 --- a/zcash_fpga/src/tb/secp256k1_mod_tb.sv +++ b/zcash_fpga/src/tb/secp256k1_mod_tb.sv @@ -49,12 +49,15 @@ always_ff @ (posedge clk) if (out_if.val && out_if.err) $error(1, "%m %t ERROR: output .err asserted", $time); -secp256k1_mod secp256k1_mod -( +secp256k1_mod #( + .USE_MULT ( 0 ) +) +secp256k1_mod ( .i_clk( clk ), .i_rst( rst ), .i_dat( in_if.dat ), .i_val( in_if.val ), + .i_err( in_if.err ), .o_rdy( in_if.rdy ), .o_dat( out_if.dat ), .o_err( out_if.err ), @@ -88,7 +91,7 @@ begin in_dat = 1 << 433; expected = 256'd822752465816620949324161418291805943222876982255305228346720256; i = 0; - max = 1000; + max = 10000; while (i < max) begin in_dat = random_vector(512); diff --git a/zcash_fpga/src/tb/secp256k1_mult_mod_tb.sv b/zcash_fpga/src/tb/secp256k1_mult_mod_tb.sv new file mode 100644 index 0000000..a5f30ee --- /dev/null +++ b/zcash_fpga/src/tb/secp256k1_mult_mod_tb.sv @@ -0,0 +1,105 @@ +/* + Copyright (C) 2019 Benjamin Devlin and Zcash Foundation + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ +`timescale 1ps/1ps + +module secp256k1_mult_mod_tb (); +import common_pkg::*; +import secp256k1_pkg::*; + +localparam CLK_PERIOD = 100; + +logic clk, rst; + +if_axi_stream #(.DAT_BYTS(512/8)) in_if(clk); +if_axi_stream #(.DAT_BYTS(256/8)) out_if(clk); + +initial begin + rst = 0; + repeat(2) #(20*CLK_PERIOD) rst = ~rst; +end + +initial begin + clk = 0; + forever #CLK_PERIOD clk = ~clk; +end + +always_comb begin + out_if.sop = 1; + out_if.eop = 1; + out_if.ctl = 0; + out_if.mod = 0; +end + +// Check for errors +always_ff @ (posedge clk) + if (out_if.val && out_if.err) + $error(1, "%m %t ERROR: output .err asserted", $time); + +secp256k1_mult_mod secp256k1_mult_mod ( + .i_clk( clk ), + .i_rst( rst ), + .i_dat_a( in_if.dat[0 +: 256] ), + .i_dat_b( in_if.dat[256 +: 256] ), + .i_val( in_if.val ), + .i_err( in_if.err ), + .o_rdy( in_if.rdy ), + .o_dat( out_if.dat ), + .o_err( out_if.err ), + .i_rdy( out_if.rdy ), + .o_val( out_if.val ) +); + +task test_loop(); +begin + integer signed get_len; + logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat; + logic [255:0] in_a, in_b; + integer i, max; + + $display("Running test_loop..."); + i = 0; + max = 10000; + + while (i < max) begin + in_a = random_vector(256/8) % p_eq; + in_b = random_vector(256/8) % p_eq; + expected = (in_a * in_b) % p_eq; + + fork + in_if.put_stream({in_b, in_a}, 512/8); + out_if.get_stream(get_dat, get_len); + join + + common_pkg::compare_and_print(get_dat, expected); + $display("test_loop PASSED loop %d/%d", i, max); + i = i + 1; + end + + $display("test_loop PASSED"); +end +endtask; + +initial begin + out_if.rdy = 0; + in_if.val = 0; + #(40*CLK_PERIOD); + + test_loop(); + + #1us $finish(); +end +endmodule \ No newline at end of file