Added 256 bit multiplier and testbenches

This commit is contained in:
bsdevlin 2019-03-18 16:05:55 -04:00
parent e1705a1d5b
commit b94b2a7c5d
6 changed files with 478 additions and 46 deletions

View File

@ -0,0 +1,110 @@
/*
Multiplication using Karatsuba-Ofman algorithm.
Multiple of these can be instantiated, each one takes 2 clocks cycles
per level.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module karatsuba_ofman_mult # (
parameter BITS = 256,
parameter LEVEL = 1
) (
input i_clk,
input [BITS-1:0] i_dat_a,
input [BITS-1:0] i_dat_b,
output logic [BITS*2-1:0] o_dat
);
localparam HBITS = BITS/2;
logic [BITS-1:0] m0, m1, m2;
logic [BITS*2-1:0] q;
logic [HBITS-1:0] a0, a1;
logic sign, sign_;
generate
always_comb begin
a0 = i_dat_a[0 +: HBITS] > i_dat_a[HBITS +: HBITS] ? i_dat_a[0 +: HBITS] - i_dat_a[HBITS +: HBITS] : i_dat_a[HBITS +: HBITS] - i_dat_a[0 +: HBITS];
a1 = i_dat_b[HBITS +: HBITS] > i_dat_b[0 +: HBITS] ? i_dat_b[HBITS +: HBITS] - i_dat_b[0 +: HBITS] : i_dat_b[0 +: HBITS] - i_dat_b[HBITS +: HBITS];
sign_ = ((i_dat_a[0 +: HBITS] < i_dat_a[HBITS +: HBITS]) ^
(i_dat_b[HBITS +: HBITS] < i_dat_b[0 +: HBITS]));
q = (m0 << BITS) + ((m0 + m2 + (sign == 1 ? -m1 : m1)) << HBITS) + m2;
end
if (LEVEL == 1) begin: GEN_REC
always_comb begin
m0 = i_dat_a[HBITS +: HBITS] * i_dat_b[HBITS +: HBITS];
m2 = i_dat_a[0 +: HBITS] * i_dat_b[0 +: HBITS];
m1 = (a0 * a1);
sign = sign_;
end
end else begin
// pipeline the other non-mult values x clock cycles and add them after multipliers
logic [LEVEL-2:0] sign_r;
always_comb begin
sign = sign_r[LEVEL-2];
end
always_ff @ (posedge i_clk) begin
sign_r <= {sign_r, sign_};
end
karatsuba_ofman_mult # (
.BITS ( HBITS ),
.LEVEL( LEVEL-1 )
)
karatsuba_ofman_mult_m0 (
.i_clk ( i_clk ),
.i_dat_a ( i_dat_a[HBITS +: HBITS] ),
.i_dat_b ( i_dat_b[HBITS +: HBITS] ),
.o_dat ( m0 )
);
karatsuba_ofman_mult # (
.BITS ( HBITS ),
.LEVEL( LEVEL-1 )
)
karatsuba_ofman_mult_m2 (
.i_clk ( i_clk ),
.i_dat_a ( i_dat_a[0 +: HBITS] ),
.i_dat_b ( i_dat_b[0 +: HBITS] ),
.o_dat ( m2 )
);
karatsuba_ofman_mult # (
.BITS ( HBITS ),
.LEVEL( LEVEL-1 )
)
karatsuba_ofman_mult_m1 (
.i_clk ( i_clk ),
.i_dat_a ( a0 ),
.i_dat_b ( a1 ),
.o_dat ( m1 )
);
end
endgenerate
always_ff @ (posedge i_clk) begin
o_dat <= q;
end
endmodule

View File

@ -0,0 +1,119 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module karatsuba_ofman_mult_tb ();
import common_pkg::*;
import secp256k1_pkg::*;
localparam CLK_PERIOD = 100;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(512/8)) in_if(clk);
if_axi_stream #(.DAT_BYTS(512/8)) out_if(clk);
logic [511:0] test;
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
localparam LEVEL = 3;
logic [LEVEL-1:0] val;
karatsuba_ofman_mult # (
.BITS (256),
.LEVEL (LEVEL)
)
karatsuba_ofman_mult (
.i_clk ( clk ),
.i_dat_a( in_if.dat[0 +: 256] ),
.i_dat_b( in_if.dat[256 +: 256] ),
.o_dat ( out_if.dat )
);
always_ff @ (posedge clk) begin
if (rst) begin
val <= 0;
end else begin
val <= {val, in_if.val};
end
end
always_comb begin
out_if.val = val[LEVEL-1];
in_if.rdy = out_if.rdy;
end
task test_loop();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
logic [255:0] in_a, in_b;
integer i, max;
$display("Running test_loop...");
i = 0;
max = 10000;
while (i < max) begin
in_a = random_vector(256/8);
in_b = random_vector(256/8);
expected = (in_a * in_b);
fork
in_if.put_stream({in_b, in_a}, 512/8);
out_if.get_stream(get_dat, get_len);
join
common_pkg::compare_and_print(get_dat, expected);
$display("test_loop PASSED loop %d/%d", i, max);
i = i + 1;
end
$display("test_loop PASSED");
end
endtask;
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test_loop();
#1us $finish();
end
endmodule

View File

@ -5,9 +5,6 @@
p = 2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1
Implemented with 2 stages of 8x 256b adds and one final optional
subtract in the case we are >= p.
returns o_dat = i_dat % p, where i_dat < p^2
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@ -26,11 +23,14 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module secp256k1_mod (
module secp256k1_mod #(
parameter USE_MULT = 0 // Set to 1 to use multiple operation (should infer DSP and use less LUTs)
)(
input i_clk, i_rst,
// Input value
input [256*2-1:0] i_dat,
input i_val,
input i_err,
output logic o_rdy,
// output
output logic [255:0] o_dat,
@ -41,53 +41,49 @@ module secp256k1_mod (
import secp256k1_pkg::*;
logic [256*2-1:0] b, a, a_;
logic [256*2-1:0] res0, res1;
logic [1:0] val, err;
always_comb begin
a_ = (a << 32) + (a << 9) + (a << 8) + (a << 7) + (a << 6) + (a << 4) + a + b;
end
generate
if (USE_MULT == 1) begin: GEN_MULT
logic [256*2-1:0] c;
always_comb begin
c = (1 << 32) + (1 << 9) + (1 << 8) + (1 << 7) + (1 << 6) + (1 << 4) + 1;
end
always_ff @ (posedge i_clk) begin
res0 <= i_dat[511:256]*c + i_dat[255:0];
res1 <= res0[511:256]*c + res0[255:0];
end
end else begin
logic [256*2-1:0] res0_, res1_;
always_comb begin
res0_ = (i_dat[511:256] << 32) + (i_dat[511:256] << 9) + (i_dat[511:256] << 8) + (i_dat[511:256] << 7) + (i_dat[511:256] << 6) + (i_dat[511:256] << 4) + i_dat[511:256]+ i_dat[255:0];
res1_ = (res0[511:256] << 32) + (res0[511:256] << 9) + (res0[511:256] << 8) + (res0[511:256] << 7) + (res0[511:256] << 6) + (res0[511:256] << 4) + res0[511:256]+ res0[255:0];
end
always_ff @ (posedge i_clk) begin
res0 <= res0_;
res1 <= res1_;
end
end
endgenerate
enum {IDLE, S1, S2} state;
always_comb o_rdy = i_rdy;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
a <= 0;
b <= 0;
state <= IDLE;
val <= 0;
err <= 0;
o_val <= 0;
o_rdy <= 0;
o_err <= 0;
end else begin
o_rdy <= 0;
o_dat <= a_ >= p_eq ? (a_ - p_eq) : a_;
case(state)
IDLE: begin
o_rdy <= 1;
o_val <= 0;
if (i_val && o_rdy) begin
a <= i_dat[511:256];
b <= i_dat[255:0];
o_rdy <= 0;
state <= S1;
end
end
S1: begin
a <= a_[511:256];
b <= a_[255:0];
state <= S2;
end
S2: begin
o_val <= 1;
o_err <= a_ >= 2* p_eq;
if (o_val && i_rdy) begin
state <=IDLE;
o_rdy <= 1;
o_val <= 0;
o_err <= 0;
end
end
endcase
o_val <= 0;
val <= val << 1;
err <= err << 1;
val[0] <= i_val;
err[0] <= i_err;
o_dat <= res1 >= p_eq ? res1 - p_eq : res1;
o_err <= err[1] || (res1 >= 2*p_eq);
o_val <= val[1];
end
end

View File

@ -0,0 +1,99 @@
/*
This performs a 256 bit multiplication followed by modulus
operation.
Using Karatsuba-Ofman multiplication, where the factor of splitting
is parameterized.
Each level in Karatsuba-Ofman multiplication adds 1 clock cycle.
The modulus reduction takes 3 clock cycles.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module secp256k1_mult_mod (
input i_clk, i_rst,
// Input value
input [255:0] i_dat_a,
input [255:0] i_dat_b,
input i_val,
input i_err,
output logic o_rdy,
// output
output logic [255:0] o_dat,
input i_rdy,
output logic o_val,
output logic o_err
);
import secp256k1_pkg::*;
import common_pkg::*;
localparam KARATSUBA_LEVEL = 3;
if_axi_stream #(.DAT_BYTS(512/8)) int_if(i_clk);
always_comb o_rdy = int_if.rdy;
logic [KARATSUBA_LEVEL-1:0] val, err;
karatsuba_ofman_mult # (
.BITS ( 256 ),
.LEVEL ( KARATSUBA_LEVEL )
)
karatsuba_ofman_mult (
.i_clk ( i_clk ),
.i_dat_a( i_dat_a ),
.i_dat_b( i_dat_b ),
.o_dat ( int_if.dat )
);
always_ff @ (posedge i_clk) begin
if (i_rst) begin
val <= 0;
err <= 0;
end else begin
val <= {val, i_val};
err <= {err, i_err};
end
end
always_comb begin
int_if.val = val[KARATSUBA_LEVEL-1];
int_if.err = err[KARATSUBA_LEVEL-1];
int_if.mod = 0;
int_if.sop = 0;
int_if.eop = 0;
int_if.ctl = 0;
end
secp256k1_mod #(
.USE_MULT ( 0 )
)
secp256k1_mod (
.i_clk( i_clk ),
.i_rst( i_rst ),
.i_dat( int_if.dat ),
.i_val( int_if.val ),
.i_err( int_if.err ),
.o_rdy( int_if.rdy ),
.o_dat( o_dat ),
.o_err( o_err ),
.i_rdy( i_rdy ),
.o_val( o_val )
);
endmodule

View File

@ -49,12 +49,15 @@ always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
secp256k1_mod secp256k1_mod
(
secp256k1_mod #(
.USE_MULT ( 0 )
)
secp256k1_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat( in_if.dat ),
.i_val( in_if.val ),
.i_err( in_if.err ),
.o_rdy( in_if.rdy ),
.o_dat( out_if.dat ),
.o_err( out_if.err ),
@ -88,7 +91,7 @@ begin
in_dat = 1 << 433;
expected = 256'd822752465816620949324161418291805943222876982255305228346720256;
i = 0;
max = 1000;
max = 10000;
while (i < max) begin
in_dat = random_vector(512);

View File

@ -0,0 +1,105 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module secp256k1_mult_mod_tb ();
import common_pkg::*;
import secp256k1_pkg::*;
localparam CLK_PERIOD = 100;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(512/8)) in_if(clk);
if_axi_stream #(.DAT_BYTS(256/8)) out_if(clk);
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
secp256k1_mult_mod secp256k1_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat_a( in_if.dat[0 +: 256] ),
.i_dat_b( in_if.dat[256 +: 256] ),
.i_val( in_if.val ),
.i_err( in_if.err ),
.o_rdy( in_if.rdy ),
.o_dat( out_if.dat ),
.o_err( out_if.err ),
.i_rdy( out_if.rdy ),
.o_val( out_if.val )
);
task test_loop();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
logic [255:0] in_a, in_b;
integer i, max;
$display("Running test_loop...");
i = 0;
max = 10000;
while (i < max) begin
in_a = random_vector(256/8) % p_eq;
in_b = random_vector(256/8) % p_eq;
expected = (in_a * in_b) % p_eq;
fork
in_if.put_stream({in_b, in_a}, 512/8);
out_if.get_stream(get_dat, get_len);
join
common_pkg::compare_and_print(get_dat, expected);
$display("test_loop PASSED loop %d/%d", i, max);
i = i + 1;
end
$display("test_loop PASSED");
end
endtask;
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test_loop();
#1us $finish();
end
endmodule