diff --git a/ip_cores/util/src/rtl/karatsuba_ofman_mult.sv b/ip_cores/util/src/rtl/karatsuba_ofman_mult.sv
new file mode 100644
index 0000000..9ece9e5
--- /dev/null
+++ b/ip_cores/util/src/rtl/karatsuba_ofman_mult.sv
@@ -0,0 +1,110 @@
+/*
+ Multiplication using Karatsuba-Ofman algorithm.
+
+ Multiple of these can be instantiated, each one takes 2 clocks cycles
+ per level.
+
+ Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+module karatsuba_ofman_mult # (
+ parameter BITS = 256,
+ parameter LEVEL = 1
+) (
+ input i_clk,
+ input [BITS-1:0] i_dat_a,
+ input [BITS-1:0] i_dat_b,
+ output logic [BITS*2-1:0] o_dat
+);
+
+localparam HBITS = BITS/2;
+
+logic [BITS-1:0] m0, m1, m2;
+logic [BITS*2-1:0] q;
+logic [HBITS-1:0] a0, a1;
+logic sign, sign_;
+
+generate
+ always_comb begin
+ a0 = i_dat_a[0 +: HBITS] > i_dat_a[HBITS +: HBITS] ? i_dat_a[0 +: HBITS] - i_dat_a[HBITS +: HBITS] : i_dat_a[HBITS +: HBITS] - i_dat_a[0 +: HBITS];
+ a1 = i_dat_b[HBITS +: HBITS] > i_dat_b[0 +: HBITS] ? i_dat_b[HBITS +: HBITS] - i_dat_b[0 +: HBITS] : i_dat_b[0 +: HBITS] - i_dat_b[HBITS +: HBITS];
+ sign_ = ((i_dat_a[0 +: HBITS] < i_dat_a[HBITS +: HBITS]) ^
+ (i_dat_b[HBITS +: HBITS] < i_dat_b[0 +: HBITS]));
+ q = (m0 << BITS) + ((m0 + m2 + (sign == 1 ? -m1 : m1)) << HBITS) + m2;
+ end
+
+ if (LEVEL == 1) begin: GEN_REC
+ always_comb begin
+ m0 = i_dat_a[HBITS +: HBITS] * i_dat_b[HBITS +: HBITS];
+ m2 = i_dat_a[0 +: HBITS] * i_dat_b[0 +: HBITS];
+ m1 = (a0 * a1);
+ sign = sign_;
+ end
+
+ end else begin
+ // pipeline the other non-mult values x clock cycles and add them after multipliers
+ logic [LEVEL-2:0] sign_r;
+
+ always_comb begin
+ sign = sign_r[LEVEL-2];
+ end
+
+ always_ff @ (posedge i_clk) begin
+ sign_r <= {sign_r, sign_};
+ end
+
+ karatsuba_ofman_mult # (
+ .BITS ( HBITS ),
+ .LEVEL( LEVEL-1 )
+ )
+ karatsuba_ofman_mult_m0 (
+ .i_clk ( i_clk ),
+ .i_dat_a ( i_dat_a[HBITS +: HBITS] ),
+ .i_dat_b ( i_dat_b[HBITS +: HBITS] ),
+ .o_dat ( m0 )
+ );
+
+ karatsuba_ofman_mult # (
+ .BITS ( HBITS ),
+ .LEVEL( LEVEL-1 )
+ )
+ karatsuba_ofman_mult_m2 (
+ .i_clk ( i_clk ),
+ .i_dat_a ( i_dat_a[0 +: HBITS] ),
+ .i_dat_b ( i_dat_b[0 +: HBITS] ),
+ .o_dat ( m2 )
+ );
+
+ karatsuba_ofman_mult # (
+ .BITS ( HBITS ),
+ .LEVEL( LEVEL-1 )
+ )
+ karatsuba_ofman_mult_m1 (
+ .i_clk ( i_clk ),
+ .i_dat_a ( a0 ),
+ .i_dat_b ( a1 ),
+ .o_dat ( m1 )
+ );
+
+
+ end
+endgenerate
+
+always_ff @ (posedge i_clk) begin
+ o_dat <= q;
+end
+
+endmodule
\ No newline at end of file
diff --git a/ip_cores/util/src/tb/karatsuba_ofman_mult_tb.sv b/ip_cores/util/src/tb/karatsuba_ofman_mult_tb.sv
new file mode 100644
index 0000000..a8c5488
--- /dev/null
+++ b/ip_cores/util/src/tb/karatsuba_ofman_mult_tb.sv
@@ -0,0 +1,119 @@
+/*
+ Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+`timescale 1ps/1ps
+
+module karatsuba_ofman_mult_tb ();
+import common_pkg::*;
+import secp256k1_pkg::*;
+
+localparam CLK_PERIOD = 100;
+
+logic clk, rst;
+
+if_axi_stream #(.DAT_BYTS(512/8)) in_if(clk);
+if_axi_stream #(.DAT_BYTS(512/8)) out_if(clk);
+
+logic [511:0] test;
+initial begin
+ rst = 0;
+ repeat(2) #(20*CLK_PERIOD) rst = ~rst;
+end
+
+initial begin
+ clk = 0;
+ forever #CLK_PERIOD clk = ~clk;
+end
+
+always_comb begin
+ out_if.sop = 1;
+ out_if.eop = 1;
+ out_if.ctl = 0;
+ out_if.mod = 0;
+end
+
+// Check for errors
+always_ff @ (posedge clk)
+ if (out_if.val && out_if.err)
+ $error(1, "%m %t ERROR: output .err asserted", $time);
+
+localparam LEVEL = 3;
+logic [LEVEL-1:0] val;
+
+karatsuba_ofman_mult # (
+ .BITS (256),
+ .LEVEL (LEVEL)
+)
+karatsuba_ofman_mult (
+ .i_clk ( clk ),
+ .i_dat_a( in_if.dat[0 +: 256] ),
+ .i_dat_b( in_if.dat[256 +: 256] ),
+ .o_dat ( out_if.dat )
+);
+
+always_ff @ (posedge clk) begin
+ if (rst) begin
+ val <= 0;
+ end else begin
+ val <= {val, in_if.val};
+ end
+end
+
+always_comb begin
+ out_if.val = val[LEVEL-1];
+ in_if.rdy = out_if.rdy;
+end
+
+task test_loop();
+begin
+ integer signed get_len;
+ logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
+ logic [255:0] in_a, in_b;
+ integer i, max;
+
+ $display("Running test_loop...");
+ i = 0;
+ max = 10000;
+
+ while (i < max) begin
+ in_a = random_vector(256/8);
+ in_b = random_vector(256/8);
+ expected = (in_a * in_b);
+
+ fork
+ in_if.put_stream({in_b, in_a}, 512/8);
+ out_if.get_stream(get_dat, get_len);
+ join
+
+ common_pkg::compare_and_print(get_dat, expected);
+ $display("test_loop PASSED loop %d/%d", i, max);
+ i = i + 1;
+ end
+
+ $display("test_loop PASSED");
+end
+endtask;
+
+initial begin
+ out_if.rdy = 0;
+ in_if.val = 0;
+ #(40*CLK_PERIOD);
+
+ test_loop();
+
+ #1us $finish();
+end
+endmodule
\ No newline at end of file
diff --git a/zcash_fpga/src/rtl/secp256k1/secp256k1_mod.sv b/zcash_fpga/src/rtl/secp256k1/secp256k1_mod.sv
index 76d15e9..f1364a4 100644
--- a/zcash_fpga/src/rtl/secp256k1/secp256k1_mod.sv
+++ b/zcash_fpga/src/rtl/secp256k1/secp256k1_mod.sv
@@ -5,9 +5,6 @@
p = 2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1
- Implemented with 2 stages of 8x 256b adds and one final optional
- subtract in the case we are >= p.
-
returns o_dat = i_dat % p, where i_dat < p^2
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@@ -26,11 +23,14 @@
along with this program. If not, see .
*/
-module secp256k1_mod (
+module secp256k1_mod #(
+ parameter USE_MULT = 0 // Set to 1 to use multiple operation (should infer DSP and use less LUTs)
+)(
input i_clk, i_rst,
// Input value
input [256*2-1:0] i_dat,
input i_val,
+ input i_err,
output logic o_rdy,
// output
output logic [255:0] o_dat,
@@ -41,53 +41,49 @@ module secp256k1_mod (
import secp256k1_pkg::*;
-logic [256*2-1:0] b, a, a_;
+logic [256*2-1:0] res0, res1;
+logic [1:0] val, err;
-always_comb begin
- a_ = (a << 32) + (a << 9) + (a << 8) + (a << 7) + (a << 6) + (a << 4) + a + b;
-end
+generate
+ if (USE_MULT == 1) begin: GEN_MULT
+ logic [256*2-1:0] c;
+ always_comb begin
+ c = (1 << 32) + (1 << 9) + (1 << 8) + (1 << 7) + (1 << 6) + (1 << 4) + 1;
+ end
+ always_ff @ (posedge i_clk) begin
+ res0 <= i_dat[511:256]*c + i_dat[255:0];
+ res1 <= res0[511:256]*c + res0[255:0];
+ end
+ end else begin
+ logic [256*2-1:0] res0_, res1_;
+ always_comb begin
+ res0_ = (i_dat[511:256] << 32) + (i_dat[511:256] << 9) + (i_dat[511:256] << 8) + (i_dat[511:256] << 7) + (i_dat[511:256] << 6) + (i_dat[511:256] << 4) + i_dat[511:256]+ i_dat[255:0];
+ res1_ = (res0[511:256] << 32) + (res0[511:256] << 9) + (res0[511:256] << 8) + (res0[511:256] << 7) + (res0[511:256] << 6) + (res0[511:256] << 4) + res0[511:256]+ res0[255:0];
+ end
+ always_ff @ (posedge i_clk) begin
+ res0 <= res0_;
+ res1 <= res1_;
+ end
+ end
+endgenerate
-enum {IDLE, S1, S2} state;
+always_comb o_rdy = i_rdy;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
- a <= 0;
- b <= 0;
- state <= IDLE;
+ val <= 0;
+ err <= 0;
o_val <= 0;
- o_rdy <= 0;
o_err <= 0;
end else begin
- o_rdy <= 0;
- o_dat <= a_ >= p_eq ? (a_ - p_eq) : a_;
-
- case(state)
- IDLE: begin
- o_rdy <= 1;
- o_val <= 0;
- if (i_val && o_rdy) begin
- a <= i_dat[511:256];
- b <= i_dat[255:0];
- o_rdy <= 0;
- state <= S1;
- end
- end
- S1: begin
- a <= a_[511:256];
- b <= a_[255:0];
- state <= S2;
- end
- S2: begin
- o_val <= 1;
- o_err <= a_ >= 2* p_eq;
- if (o_val && i_rdy) begin
- state <=IDLE;
- o_rdy <= 1;
- o_val <= 0;
- o_err <= 0;
- end
- end
- endcase
+ o_val <= 0;
+ val <= val << 1;
+ err <= err << 1;
+ val[0] <= i_val;
+ err[0] <= i_err;
+ o_dat <= res1 >= p_eq ? res1 - p_eq : res1;
+ o_err <= err[1] || (res1 >= 2*p_eq);
+ o_val <= val[1];
end
end
diff --git a/zcash_fpga/src/rtl/secp256k1/secp256k1_mult_mod.sv b/zcash_fpga/src/rtl/secp256k1/secp256k1_mult_mod.sv
new file mode 100644
index 0000000..26d6f06
--- /dev/null
+++ b/zcash_fpga/src/rtl/secp256k1/secp256k1_mult_mod.sv
@@ -0,0 +1,99 @@
+/*
+ This performs a 256 bit multiplication followed by modulus
+ operation.
+
+ Using Karatsuba-Ofman multiplication, where the factor of splitting
+ is parameterized.
+
+ Each level in Karatsuba-Ofman multiplication adds 1 clock cycle.
+ The modulus reduction takes 3 clock cycles.
+
+ Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+ */
+
+module secp256k1_mult_mod (
+ input i_clk, i_rst,
+ // Input value
+ input [255:0] i_dat_a,
+ input [255:0] i_dat_b,
+ input i_val,
+ input i_err,
+ output logic o_rdy,
+ // output
+ output logic [255:0] o_dat,
+ input i_rdy,
+ output logic o_val,
+ output logic o_err
+);
+
+import secp256k1_pkg::*;
+import common_pkg::*;
+
+localparam KARATSUBA_LEVEL = 3;
+if_axi_stream #(.DAT_BYTS(512/8)) int_if(i_clk);
+
+always_comb o_rdy = int_if.rdy;
+
+logic [KARATSUBA_LEVEL-1:0] val, err;
+
+karatsuba_ofman_mult # (
+ .BITS ( 256 ),
+ .LEVEL ( KARATSUBA_LEVEL )
+)
+karatsuba_ofman_mult (
+ .i_clk ( i_clk ),
+ .i_dat_a( i_dat_a ),
+ .i_dat_b( i_dat_b ),
+ .o_dat ( int_if.dat )
+);
+
+always_ff @ (posedge i_clk) begin
+ if (i_rst) begin
+ val <= 0;
+ err <= 0;
+ end else begin
+ val <= {val, i_val};
+ err <= {err, i_err};
+ end
+end
+
+always_comb begin
+ int_if.val = val[KARATSUBA_LEVEL-1];
+ int_if.err = err[KARATSUBA_LEVEL-1];
+ int_if.mod = 0;
+ int_if.sop = 0;
+ int_if.eop = 0;
+ int_if.ctl = 0;
+end
+
+secp256k1_mod #(
+ .USE_MULT ( 0 )
+)
+secp256k1_mod (
+ .i_clk( i_clk ),
+ .i_rst( i_rst ),
+ .i_dat( int_if.dat ),
+ .i_val( int_if.val ),
+ .i_err( int_if.err ),
+ .o_rdy( int_if.rdy ),
+ .o_dat( o_dat ),
+ .o_err( o_err ),
+ .i_rdy( i_rdy ),
+ .o_val( o_val )
+);
+
+
+endmodule
diff --git a/zcash_fpga/src/tb/secp256k1_mod_tb.sv b/zcash_fpga/src/tb/secp256k1_mod_tb.sv
index f95e88d..a19d5a2 100644
--- a/zcash_fpga/src/tb/secp256k1_mod_tb.sv
+++ b/zcash_fpga/src/tb/secp256k1_mod_tb.sv
@@ -49,12 +49,15 @@ always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
-secp256k1_mod secp256k1_mod
-(
+secp256k1_mod #(
+ .USE_MULT ( 0 )
+)
+secp256k1_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat( in_if.dat ),
.i_val( in_if.val ),
+ .i_err( in_if.err ),
.o_rdy( in_if.rdy ),
.o_dat( out_if.dat ),
.o_err( out_if.err ),
@@ -88,7 +91,7 @@ begin
in_dat = 1 << 433;
expected = 256'd822752465816620949324161418291805943222876982255305228346720256;
i = 0;
- max = 1000;
+ max = 10000;
while (i < max) begin
in_dat = random_vector(512);
diff --git a/zcash_fpga/src/tb/secp256k1_mult_mod_tb.sv b/zcash_fpga/src/tb/secp256k1_mult_mod_tb.sv
new file mode 100644
index 0000000..a5f30ee
--- /dev/null
+++ b/zcash_fpga/src/tb/secp256k1_mult_mod_tb.sv
@@ -0,0 +1,105 @@
+/*
+ Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see .
+*/
+`timescale 1ps/1ps
+
+module secp256k1_mult_mod_tb ();
+import common_pkg::*;
+import secp256k1_pkg::*;
+
+localparam CLK_PERIOD = 100;
+
+logic clk, rst;
+
+if_axi_stream #(.DAT_BYTS(512/8)) in_if(clk);
+if_axi_stream #(.DAT_BYTS(256/8)) out_if(clk);
+
+initial begin
+ rst = 0;
+ repeat(2) #(20*CLK_PERIOD) rst = ~rst;
+end
+
+initial begin
+ clk = 0;
+ forever #CLK_PERIOD clk = ~clk;
+end
+
+always_comb begin
+ out_if.sop = 1;
+ out_if.eop = 1;
+ out_if.ctl = 0;
+ out_if.mod = 0;
+end
+
+// Check for errors
+always_ff @ (posedge clk)
+ if (out_if.val && out_if.err)
+ $error(1, "%m %t ERROR: output .err asserted", $time);
+
+secp256k1_mult_mod secp256k1_mult_mod (
+ .i_clk( clk ),
+ .i_rst( rst ),
+ .i_dat_a( in_if.dat[0 +: 256] ),
+ .i_dat_b( in_if.dat[256 +: 256] ),
+ .i_val( in_if.val ),
+ .i_err( in_if.err ),
+ .o_rdy( in_if.rdy ),
+ .o_dat( out_if.dat ),
+ .o_err( out_if.err ),
+ .i_rdy( out_if.rdy ),
+ .o_val( out_if.val )
+);
+
+task test_loop();
+begin
+ integer signed get_len;
+ logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
+ logic [255:0] in_a, in_b;
+ integer i, max;
+
+ $display("Running test_loop...");
+ i = 0;
+ max = 10000;
+
+ while (i < max) begin
+ in_a = random_vector(256/8) % p_eq;
+ in_b = random_vector(256/8) % p_eq;
+ expected = (in_a * in_b) % p_eq;
+
+ fork
+ in_if.put_stream({in_b, in_a}, 512/8);
+ out_if.get_stream(get_dat, get_len);
+ join
+
+ common_pkg::compare_and_print(get_dat, expected);
+ $display("test_loop PASSED loop %d/%d", i, max);
+ i = i + 1;
+ end
+
+ $display("test_loop PASSED");
+end
+endtask;
+
+initial begin
+ out_if.rdy = 0;
+ in_if.val = 0;
+ #(40*CLK_PERIOD);
+
+ test_loop();
+
+ #1us $finish();
+end
+endmodule
\ No newline at end of file