New files for point multiplication

This commit is contained in:
bsdevlin 2019-03-20 23:16:13 -04:00
parent 54d09f1744
commit ee603cbf0e
15 changed files with 1165 additions and 104 deletions

View File

@ -97,7 +97,9 @@ interface if_axi_stream # (
endtask
// Task used in simulation to drive data on a source interface
task automatic put_stream(input logic [common_pkg::MAX_SIM_BYTS*8-1:0] data, input integer signed len);
task automatic put_stream(input logic [common_pkg::MAX_SIM_BYTS*8-1:0] data,
input integer signed len,
input logic [CTL_BITS-1:0] ctl_in = 0);
logic sop_l=0;
reset_source();
@ -105,6 +107,7 @@ interface if_axi_stream # (
while (len > 0) begin
sop = ~sop_l;
ctl = ctl_in;
eop = len - DAT_BYTS <= 0;
val = 1;
dat = data;
@ -154,18 +157,29 @@ interface if_axi_mm # (
input i_clk
);
logic [A_BITS-1:0] raddr;
logic [A_BITS-1:0] waddr;
logic [D_BITS-1:0] rdat;
logic [D_BITS-1:0] wdat;
logic rval;
logic wval;
logic rrdy;
logic wrdy;
logic [A_BITS-1:0] addr;
logic [D_BITS-1:0] rd_dat;
logic [D_BITS-1:0] wr_dat;
logic wr;
logic rd;
logic rd_dat_val;
logic wait_rq;
modport sink (input raddr, waddr, wdat, wval, rrdy, i_clk, output rdat, rval, wrdy);
modport source (input rdat, rval, wrdy , i_clk, output raddr, waddr, wdat, wval, rrdy);
modport sink (input addr, wr_dat, wr, rd, i_clk, output rd_dat, rd_dat_val, wait_rq, import task reset_sink());
modport source (input rd_dat, rd_dat_val, wait_rq , i_clk, output addr, wr_dat, wr, rd, import task reset_source());
task reset_source();
addr <= 0;
wr_dat <= 0;
wr <= 0;
rd <= 0;
endtask
task reset_sink();
rd_dat <= 0;
rd_dat_val <= 0;
wait_rq <= 0;
endtask
endinterface

View File

@ -2,6 +2,9 @@
Accumulating multiplier. Inputs can be of different bit size and the
level each is accumulated over can be different.
If using Xilinx FPGA it is best to have a 1.5:1 ratio on BITS,
since the multiplier is 27x18
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
@ -35,7 +38,7 @@ module accum_mult # (
output logic o_val,
input i_rdy
);
localparam BITS_A_LVL = BITS_A/LEVEL_A;
localparam BITS_B_LVL = BITS_B/LEVEL_B;

View File

@ -120,12 +120,13 @@ always_ff @ (posedge i_clk) begin
end
end
//To do the multiplications
// Do the multiplications
generate
if (MULTIPLIER == "ACCUM_MULT") begin: MULTIPLIER_GEN
accum_mult # (
.BITS_A ( OUT_BITS +8 ),
.LEVEL_A ( 4 ) // 32 bit multiply
.LEVEL_A ( 6 ), // 32 bit multiply
.LEVEL_B ( 4 )
)
accum_mult (
.i_clk ( i_clk ),

View File

@ -2,7 +2,7 @@
Multiplication using Karatsuba-Ofman algorithm.
Multiple of these can be instantiated, each one takes 2 clocks cycles
per level.
per level. Fully pipelined so can accept a new input every clock.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@ -22,12 +22,19 @@
module karatsuba_ofman_mult # (
parameter BITS = 256,
parameter CTL_BITS = 8,
parameter LEVEL = 1
) (
input i_clk,
input [BITS-1:0] i_dat_a,
input [BITS-1:0] i_dat_b,
output logic [BITS*2-1:0] o_dat
input i_clk,
input [BITS-1:0] i_dat_a,
input [BITS-1:0] i_dat_b,
input i_val,
input [CTL_BITS-1:0] i_ctl,
input i_rdy,
output logic o_rdy,
output logic o_val,
output logic [CTL_BITS-1:0] o_ctl,
output logic [BITS*2-1:0] o_dat
);
localparam HBITS = BITS/2;
@ -36,6 +43,8 @@ logic [BITS-1:0] m0, m1, m2;
logic [BITS*2-1:0] q;
logic [HBITS-1:0] a0, a1;
logic sign, sign_;
logic val;
logic [CTL_BITS-1:0] ctl;
generate
always_comb begin
@ -52,7 +61,11 @@ generate
m2 = i_dat_a[0 +: HBITS] * i_dat_b[0 +: HBITS];
m1 = (a0 * a1);
sign = sign_;
o_rdy = i_rdy;
val = i_val;
ctl = i_ctl;
end
end else begin
// pipeline the other non-mult values x clock cycles and add them after multipliers
@ -67,35 +80,56 @@ generate
end
karatsuba_ofman_mult # (
.BITS ( HBITS ),
.LEVEL( LEVEL-1 )
.BITS ( HBITS ),
.CTL_BITS ( CTL_BITS ),
.LEVEL ( LEVEL-1 )
)
karatsuba_ofman_mult_m0 (
.i_clk ( i_clk ),
.i_dat_a ( i_dat_a[HBITS +: HBITS] ),
.i_dat_b ( i_dat_b[HBITS +: HBITS] ),
.i_val ( i_val ),
.o_val ( val ),
.i_ctl ( i_ctl ),
.o_ctl ( ctl ),
.i_rdy ( i_rdy ),
.o_rdy ( o_rdy ),
.o_dat ( m0 )
);
karatsuba_ofman_mult # (
.BITS ( HBITS ),
.LEVEL( LEVEL-1 )
.BITS ( HBITS ),
.CTL_BITS ( 1 ),
.LEVEL ( LEVEL-1 )
)
karatsuba_ofman_mult_m2 (
.i_clk ( i_clk ),
.i_dat_a ( i_dat_a[0 +: HBITS] ),
.i_dat_b ( i_dat_b[0 +: HBITS] ),
.i_val ( i_val ),
.o_val (),
.i_ctl ( 1'd0 ),
.o_ctl (),
.i_rdy ( i_rdy ),
.o_rdy (),
.o_dat ( m2 )
);
karatsuba_ofman_mult # (
.BITS ( HBITS ),
.LEVEL( LEVEL-1 )
.BITS ( HBITS ),
.CTL_BITS ( 1 ),
.LEVEL ( LEVEL-1 )
)
karatsuba_ofman_mult_m1 (
.i_clk ( i_clk ),
.i_dat_a ( a0 ),
.i_dat_b ( a1 ),
.i_val ( i_val ),
.o_val (),
.i_ctl ( 1'd0 ),
.o_ctl (),
.i_rdy ( i_rdy ),
.o_rdy (),
.o_dat ( m1 )
);
@ -105,6 +139,8 @@ endgenerate
always_ff @ (posedge i_clk) begin
o_dat <= q;
o_val <= val;
o_ctl <= ctl;
end
endmodule

View File

@ -23,8 +23,8 @@ localparam CLK_PERIOD = 100;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(512/8)) in_if(clk);
if_axi_stream #(.DAT_BYTS(512/8)) out_if(clk);
if_axi_stream #(.DAT_BYTS(66)) in_if(clk);
if_axi_stream #(.DAT_BYTS(66)) out_if(clk);
initial begin
rst = 0;
@ -50,15 +50,15 @@ always_ff @ (posedge clk)
$error(1, "%m %t ERROR: output .err asserted", $time);
accum_mult # (
.BITS_A ( 256 ),
.BITS_A ( 264 ),
.LEVEL_A ( 4 ),
.LEVEL_B ( 4 )
.LEVEL_B ( 6 )
)
accum_mult (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( in_if.dat[0 +: 256] ),
.i_dat_b ( in_if.dat[256 +: 256] ),
.i_dat_a ( in_if.dat[0 +: 264] ),
.i_dat_b ( in_if.dat[264 +: 264] ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.o_dat ( out_if.dat ),
@ -72,23 +72,23 @@ task test_loop();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
logic [255:0] in_a, in_b;
logic [263:0] in_a, in_b;
integer i, max;
get_dat = 0;
$display("Running test_loop...");
i = 0;
max = 10000;
while (i < max) begin
in_a = random_vector(256/8);
in_b = random_vector(256/8);
in_a = random_vector(264/8);
in_b = random_vector(264/8);
expected = (in_a * in_b);
fork
in_if.put_stream({in_b, in_a}, 512/8);
in_if.put_stream({in_b, in_a}, 528/8);
out_if.get_stream(get_dat, get_len);
join
common_pkg::compare_and_print(get_dat, expected);
$display("test_loop PASSED loop %d/%d", i, max);
i = i + 1;

View File

@ -24,10 +24,9 @@ localparam CLK_PERIOD = 100;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(512/8)) in_if(clk);
if_axi_stream #(.DAT_BYTS(512/8)) out_if(clk);
if_axi_stream #(.DAT_BYTS(512/8), .CTL_BITS(8)) in_if(clk);
if_axi_stream #(.DAT_BYTS(512/8), .CTL_BITS(8)) out_if(clk);
logic [511:0] test;
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
@ -50,33 +49,26 @@ always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
localparam LEVEL = 3;
logic [LEVEL-1:0] val;
localparam LEVEL = 2;
karatsuba_ofman_mult # (
.BITS (256),
.LEVEL (LEVEL)
.BITS ( 256 ),
.CTL_BITS ( 8 ),
.LEVEL ( LEVEL )
)
karatsuba_ofman_mult (
.i_clk ( clk ),
.i_dat_a( in_if.dat[0 +: 256] ),
.i_dat_b( in_if.dat[256 +: 256] ),
.o_dat ( out_if.dat )
.i_dat_b( in_if.dat[256 +: 256] ),
.i_val ( in_if.val ),
.o_val ( out_if.val ),
.i_ctl ( in_if.ctl ),
.o_ctl ( out_if.ctl ),
.i_rdy ( out_if.rdy ),
.o_rdy ( in_if.rdy ),
.o_dat ( out_if.dat )
);
always_ff @ (posedge clk) begin
if (rst) begin
val <= 0;
end else begin
val <= {val, in_if.val};
end
end
always_comb begin
out_if.val = val[LEVEL-1];
in_if.rdy = out_if.rdy;
end
task test_loop();
begin
integer signed get_len;
@ -94,7 +86,7 @@ begin
expected = (in_a * in_b);
fork
in_if.put_stream({in_b, in_a}, 512/8);
in_if.put_stream({in_b, in_a}, 512/8, i);
out_if.get_stream(get_dat, get_len);
join

View File

@ -24,25 +24,29 @@
*/
module secp256k1_mod #(
parameter USE_MULT = 0 // Set to 1 to use multiple operation (should infer DSP and use less LUTs)
parameter USE_MULT = 0, // Set to 1 to use multiple operation (should infer DSP and use less LUTs)
parameter CTL_BITS = 8
)(
input i_clk, i_rst,
// Input value
input [256*2-1:0] i_dat,
input i_val,
input i_err,
output logic o_rdy,
input [256*2-1:0] i_dat,
input i_val,
input i_err,
input [CTL_BITS-1:0] i_ctl,
output logic o_rdy,
// output
output logic [255:0] o_dat,
input i_rdy,
output logic o_val,
output logic o_err // Will go high if after 1 reduction we are still >= p
output logic [255:0] o_dat,
output logic [CTL_BITS-1:0] o_ctl,
input i_rdy,
output logic o_val,
output logic o_err // Will go high if after 1 reduction we are still >= p
);
import secp256k1_pkg::*;
logic [256*2-1:0] res0, res1;
logic [1:0] val, err;
logic [1:0][CTL_BITS-1:0] ctl;
generate
if (USE_MULT == 1) begin: GEN_MULT
@ -74,16 +78,20 @@ always_ff @ (posedge i_clk) begin
val <= 0;
err <= 0;
o_val <= 0;
ctl <= 0;
o_err <= 0;
end else begin
o_val <= 0;
val <= val << 1;
ctl <= {ctl, i_ctl};
err <= err << 1;
val[0] <= i_val;
err[0] <= i_err;
o_dat <= res1 >= p_eq ? res1 - p_eq : res1;
o_err <= err[1] || (res1 >= 2*p_eq);
o_val <= val[1];
o_ctl <= ctl[1];
end
end

View File

@ -24,19 +24,23 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module secp256k1_mult_mod (
module secp256k1_mult_mod #(
parameter CTL_BITS = 8
)(
input i_clk, i_rst,
// Input value
input [255:0] i_dat_a,
input [255:0] i_dat_b,
input i_val,
input i_err,
output logic o_rdy,
input [255:0] i_dat_a,
input [255:0] i_dat_b,
input [CTL_BITS-1:0] i_ctl,
input i_val,
input i_err,
output logic o_rdy,
// output
output logic [255:0] o_dat,
input i_rdy,
output logic o_val,
output logic o_err
output logic [255:0] o_dat,
output logic [CTL_BITS-1:0] o_ctl,
input i_rdy,
output logic o_val,
output logic o_err
);
import secp256k1_pkg::*;
@ -45,51 +49,55 @@ import common_pkg::*;
localparam KARATSUBA_LEVEL = 2;
if_axi_stream #(.DAT_BYTS(512/8)) int_if(i_clk);
always_comb o_rdy = int_if.rdy;
logic [KARATSUBA_LEVEL-1:0] val, err;
logic [KARATSUBA_LEVEL-1:0] err;
karatsuba_ofman_mult # (
.BITS ( 256 ),
.LEVEL ( KARATSUBA_LEVEL )
.BITS ( 256 ),
.LEVEL ( KARATSUBA_LEVEL ),
.CTL_BITS ( CTL_BITS )
)
karatsuba_ofman_mult (
.i_clk ( i_clk ),
.i_ctl ( i_ctl ),
.i_dat_a( i_dat_a ),
.i_dat_b( i_dat_b ),
.o_dat ( int_if.dat )
.i_dat_b( i_dat_b ),
.i_val ( i_val ),
.o_rdy ( o_rdy ),
.o_dat ( int_if.dat ),
.o_val ( int_if.val ),
.i_rdy ( int_if.rdy ),
.o_ctl ( int_if.ctl )
);
always_ff @ (posedge i_clk) begin
if (i_rst) begin
val <= 0;
err <= 0;
end else begin
val <= {val, i_val};
err <= {err, i_err};
end
end
always_comb begin
int_if.val = val[KARATSUBA_LEVEL-1];
int_if.err = err[KARATSUBA_LEVEL-1];
int_if.mod = 0;
int_if.sop = 0;
int_if.eop = 0;
int_if.ctl = 0;
end
secp256k1_mod #(
.USE_MULT ( 0 )
.USE_MULT ( 0 ),
.CTL_BITS ( CTL_BITS )
)
secp256k1_mod (
.i_clk( i_clk ),
.i_rst( i_rst ),
.i_dat( int_if.dat ),
.i_val( int_if.val ),
.i_ctl( int_if.ctl ),
.i_err( int_if.err ),
.o_rdy( int_if.rdy ),
.o_dat( o_dat ),
.o_ctl( o_ctl ),
.o_err( o_err ),
.i_rdy( i_rdy ),
.o_val( o_val )

View File

@ -30,4 +30,51 @@ package secp256k1_pkg;
parameter [255:0] p_eq = (1 << 256) - (1 << 32) - (1 << 9) - (1 << 8) - (1 << 7) - (1 << 6) - (1 << 4) - 1;
// Use register map for debug, holds information on current operation
parameter REGISTER_SIZE = 64;
// The mapping to index
parameter CURR_CMD = 0; // What command are we processing
parameter CURR_STATE = 1; // What state are we in
// If it is processing a signature verification, these bits will be populated:
parameter SIG_VER_HASH = 8; // 256 bits
parameter SIG_VER_S = 12; // 256 bits
parameter SIG_VER_R = 16; // 256 bits
parameter SIG_VER_Q = 20; // 512 bits
parameter SIG_VER_W = 28; // 256 bits - Result of invert(s)
// Expected to be in Jacobian coordinates
typedef struct packed {
logic [255:0] x, y, z;
} jb_point_t;
typedef struct packed {
logic [5:0] padding;
logic X_INFINITY_POINT;
logic OUT_OF_RANGE_S;
logic OUT_OF_RANGE_R;
} secp256k1_ver_t;
function is_zero(jb_point_t p);
is_zero = (p.x == 0 && p.y == 0 && p.z == 1);
endfunction
// Function to double point in Jacobian coordinates (for comparison in testbench)
// Here a is 0, and we also mod p the result
function jb_point_t dbl_jb_point(jb_point_t p);
logic [1023:0] A, B, C, D;
A = (p.y*p.y) % p_eq;
B = (4*p.x*A) % p_eq;
C = (8*A*A) % p_eq;
D = (3*p.x*p.x) % p_eq;
dbl_jb_point.x = (D*D - 2*B) % p_eq;
dbl_jb_point.y = (D*(B-dbl_jb_point.x) - C) % p_eq;
dbl_jb_point.z = (2*p.y*p.z) % p_eq;
endfunction
function print_jb_point(jb_point_t p);
$display("x:%h", p.x);
$display("y:%h", p.y);
$display("z:%h", p.z);
endfunction
endpackage

View File

@ -0,0 +1,258 @@
/*
This performs point doubling.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module secp256k1_point_dbl
import secp256k1_pkg::*;
#(
)(
input i_clk, i_rst,
// Input point
input jb_point_t i_p,
input logic i_val,
output logic o_rdy,
// Output point
output jb_point_t o_p,
input logic i_rdy,
output logic o_val,
output logic o_err,
// Interface to 256bit multiplier (mod p)
if_axi_stream.source o_mult_if,
if_axi_stream.source i_mult_if,
// Interface to only mod reduction block
if_axi_stream.source o_mod_if,
if_axi_stream.source i_mod_if
);
/*
* These are the equations that need to be computed, they are issued as variables
* become valid. We have a bitmask to track what equation results are valid which
* will trigger other equations. [] show what equations must be valid before this starts.
*
* 0. A = (i_p.y)^2 mod p
* 1. B = (i_p.x)*A mod p [eq0]
* 2. B = 4*B mod p [eq1]
* 3. C = A^2 mod p [eq0]
* 4. C = C*8 mod p [eq3]
* 5. D = (i_p.x)^2 mod p
* 6. D = 3*D mod p [eq5]
* 7. (o_p.x) = D^2 mod p[eq6]
* 8. E = 2*B mod p [eq2]
* 9. (o_p.x) = o_p.x - E mod p [eq8, eq7]
* 10 (o_p.y) = B - o_p.x mod p [eq9, eq2]
* 11. (o_p.y) = D*(o_p.y) [eq10, eq6]
* 12. (o_p.y) = (o_p.y) - C mod p [eq11]
* 13. (o_p.z) = 2*(i_p.y) mod p
* 14. (o_p.z) = o_p.y * i_p.z mod p [eq14]
*/
logic [14:0] eq_val, eq_wait;
// Temporary variables
logic [255:0] A, B, C, D, E;
jb_point_t i_p_l;
enum {IDLE, START, FINISHED} state;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_val <= 0;
o_rdy <= 0;
o_p <= 0;
o_mult_if.reset_source();
o_mod_if.reset_source();
i_mult_if.rdy <= 0;
i_mod_if.rdy <= 0;
eq_val <= 0;
state <= IDLE;
eq_wait <= 0;
i_p_l <= 0;
o_err <= 0;
A <= 0;
B <= 0;
C <= 0;
D <= 0;
E <= 0;
end else begin
if (o_mult_if.rdy)
o_mult_if.val <= 0;
if (o_mod_if.rdy)
o_mod_if.val <= 0;
case(state)
{IDLE}: begin
o_rdy <= 1;
eq_val <= 0;
eq_wait <= 0;
o_err <= 0;
i_mult_if.rdy <= 1;
i_p_l <= i_p;
A <= 0;
B <= 0;
C <= 0;
D <= 0;
E <= 0;
if (i_val && o_rdy) begin
state <= START;
o_rdy <= 0;
if (i_p.z == 0) begin
o_err <= 1;
state <= IDLE;
end
end
end
// Just a big if tree where we issue equations if the required inputs
// are valid
{START}: begin
i_mod_if.rdy <= 1;
i_mult_if.rdy <= 1;
// Check any results from multiplier
if (i_mod_if.val && i_mod_if.rdy) begin
eq_val[i_mod_if.ctl] <= 1;
case(i_mod_if.ctl)
2: B <= i_mod_if.dat;
4: C <= i_mod_if.dat;
8: E <= i_mod_if.dat;
13: o_p.z <= i_mod_if.dat;
default: o_err <= 1;
endcase
end
// Check any results from multiplier
if (i_mult_if.val && i_mult_if.rdy) begin
eq_val[i_mult_if.ctl] <= 1;
case(i_mult_if.ctl) inside
0: A <= i_mult_if.dat;
1: B <= i_mult_if.dat;
3: C <= i_mult_if.dat;
5: D <= i_mult_if.dat;
6: D <= i_mult_if.dat;
7: o_p.x <= i_mult_if.dat;
11: o_p.y <= i_mult_if.dat;
14: o_p.z <= i_mult_if.dat;
default: o_err <= 1;
endcase
end
// Issue new multiplies
if (~eq_wait[0]) begin //0. A = (i_p.y)^2 mod p
multiply(0, i_p_l.y, i_p_l.y);
end else
if (eq_val[0] && ~eq_wait[1]) begin //1. B = (i_p.x)*A mod p [eq0]
multiply(1, i_p_l.x, A);
end else
if (eq_val[0] && ~eq_wait[3]) begin //3. C = A^2 mod p [eq0]
multiply(3, A, A);
end else
if (~eq_wait[5]) begin //5. D = (i_p.x)^2 mod p
multiply(5, i_p_l.x, i_p_l.x);
end else
if (eq_val[5] && ~eq_wait[6]) begin //6. D = 3*D mod p [eq5]
multiply(6, 256'd3, D);
end else
if (eq_val[6] && ~eq_wait[7]) begin //7. (o_p.x) = D^2 mod p[eq6]
multiply(7, D, D);
end else
if (eq_val[10] && eq_val[6] && ~eq_wait[11]) begin //11. (o_p.y) = D*(o_p.y) [eq10, eq6]
multiply(11, D, o_p.y);
end else
if (eq_val[13] && ~eq_wait[14]) begin //14. (o_p.z) = o_p.z * i_p.z mod p [eq13]
multiply(14, i_p_l.z, o_p.z);
end
// Issue new modulo reductions
if (eq_val[1] && ~eq_wait[2]) begin //2. B = 4*B mod p [eq1]
modulo(2, B*4);
end else
if (eq_val[3] && ~eq_wait[4]) begin //4. C = C*8 mod p [eq3]
modulo(4, C*8);
end else
if (eq_val[2] && ~eq_wait[8]) begin //8. E = 2*B mod p [eq2]
modulo(8, B*2);
end else
if (~eq_wait[13]) begin //13. (o_p.z) = 2*(i_p.y) mod p
modulo(13, 2*i_p_l.y);
end
// Additions / subtractions we do in-module
if (eq_val[8] && eq_val[7] && ~eq_wait[9]) begin //9. (o_p.x) = o_p.x - E mod p [eq8, eq7]
eq_wait[9] <= 1;
eq_val[9] <= 1;
o_p.x <= o_p.x + (E > o_p.x ? secp256k1_pkg::p : 0) - E;
end
if (eq_val[9] && eq_val[2] && ~eq_wait[10]) begin //10. (o_p.y) = B - o_p.x mod p [eq9, eq2]
eq_wait[10] <= 1;
eq_val[10] <= 1;
o_p.y <= B + (o_p.x > B ? secp256k1_pkg::p : 0) - o_p.x;
end
if (eq_val[11] && ~eq_wait[12]) begin //12. (o_p.y) = (o_p.y) - C mod p [eq11]
eq_wait[12] <= 1;
eq_val[12] <= 1;
o_p.y <= o_p.y + (C > o_p.y ? secp256k1_pkg::p : 0) - C;
end
if (&eq_val) begin
state <= FINISHED;
o_val <= 1;
end
end
{FINISHED}: begin
if (o_val && i_rdy) begin
state <= IDLE;
o_val <= 0;
o_rdy <= 1;
end
end
endcase
if (o_err) begin
o_val <= 1;
if (o_val && i_rdy) begin
o_err <= 0;
state <= IDLE;
end
end
end
end
// Task for using multiplies
task multiply(input int unsigned ctl, input logic [255:0] a, b);
if (~o_mult_if.val || (o_mult_if.val && o_mult_if.rdy)) begin
o_mult_if.val <= 1;
o_mult_if.dat[0 +: 256] <= a;
o_mult_if.dat[256 +: 256] <= b;
o_mult_if.ctl <= ctl;
eq_wait[ctl] <= 1;
end
endtask
// Task for using modulo
task modulo(input int unsigned ctl, input logic [512:0] a);
if (~o_mod_if.val || (o_mod_if.val && o_mod_if.rdy)) begin
o_mod_if.val <= 1;
o_mod_if.dat <= a;
o_mod_if.ctl <= ctl;
eq_wait[ctl] <= 1;
end
endtask
endmodule

View File

@ -0,0 +1,180 @@
/*
This performs point multiplication. We use the standard double
and add algorithm.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module secp256k1_point_mult
import secp256k1_pkg::*;
#(
)(
input i_clk, i_rst,
// Input point and value to multiply
input jb_point_t i_p,
input logic [255:0] i_k,
input logic i_val,
output logic o_rdy,
// Output point
output jb_point_t o_p,
input logic i_rdy,
output logic o_val,
output logic o_err
);
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mult_in_if(i_clk);
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mult_out_if(i_clk);
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mod_in_if(i_clk);
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mod_out_if(i_clk);
logic [255:0] k_l;
jb_point_t p_n, p_q, p_dbl;
logic p_dbl_in_val, p_dbl_in_rdy, p_dbl_out_err, p_dbl_out_val, p_dbl_out_rdy;
enum {IDLE, DOUBLE, ADD, FINISHED} state;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_val <= 0;
o_err <= 0;
o_rdy <= 0;
k_l <= 0;
p_q <= 0;
p_dbl_in_val <= 0;
p_dbl_out_rdy <= 0;
state <= IDLE;
o_p <= 0;
p_n <= 0;
end else begin
p_dbl_in_val <= 0;
p_dbl_out_rdy <= 1;
case (state)
{IDLE}: begin
o_rdy <= 1;
o_err <= 0;
p_q <= {x:0, y:0, z:1}; // p_q starts at 0
if (o_rdy && i_val) begin
k_l <= i_k;
p_n <= i_p;
// Regardless of i_k[0] we skip the first add since it would set p_q to i_p
if (i_k[0]) begin
p_q <= i_p;
end
state <= DOUBLE;
p_dbl_in_val <= 1;
end
end
{DOUBLE}: begin
if(p_dbl_in_val && p_dbl_in_rdy) begin
p_dbl_in_val <= 0;
end
if (p_dbl_out_val && p_dbl_out_rdy) begin
p_n <= p_dbl;
k_l <= k_l >> 1;
if (k_l[1] == 1) begin
state <= ADD;
end else if (k_l[255:1] == 0) begin
state <= FINISHED;
o_p <= p_dbl;
o_val <= 1;
end else begin
state <= DOUBLE;
p_dbl_in_val <= 1;
end
end
end
{ADD}: begin
state <= DOUBLE;
p_q <= p_n;
p_dbl_in_val <= 1;
end
{FINISHED}: begin
if (i_rdy && o_val) begin
o_val <= 0;
state <= IDLE;
end
end
endcase
if (p_dbl_out_err) begin
o_err <= 1;
o_val <= 1;
state <= FINISHED;
end
end
end
secp256k1_point_dbl secp256k1_point_dbl(
.i_clk ( i_clk ),
.i_rst ( i_rst ),
// Input point
.i_p ( p_n ),
.i_val ( p_dbl_in_val ),
.o_rdy ( p_dbl_in_rdy ),
// Output point
.o_p ( p_dbl ),
.o_err ( p_dbl_out_err ),
.i_rdy ( p_dbl_out_rdy ),
.o_val ( p_dbl_out_val ),
// Interfaces to shared multipliers / modulo blocks
.o_mult_if ( mult_in_if ),
.i_mult_if ( mult_out_if ),
.o_mod_if ( mod_in_if ),
.i_mod_if ( mod_out_if )
);
secp256k1_mult_mod #(
.CTL_BITS ( 8 )
)
secp256k1_mult_mod (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_dat_a ( mult_in_if.dat[0 +: 256] ),
.i_dat_b ( mult_in_if.dat[256 +: 256] ),
.i_val ( mult_in_if.val ),
.i_err ( mult_in_if.err ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl ),
.o_err ( mult_out_if.err )
);
secp256k1_mod #(
.USE_MULT ( 0 ),
.CTL_BITS ( 8 )
)
secp256k1_mod (
.i_clk( i_clk ),
.i_rst( i_rst ),
.i_dat( mod_in_if.dat ),
.i_val( mod_in_if.val ),
.i_err( mod_in_if.err ),
.i_ctl( mod_in_if.ctl ),
.o_rdy( mod_in_if.rdy ),
.o_dat( mod_out_if.dat ),
.o_ctl( mod_out_if.ctl ),
.o_err( mod_out_if.err ),
.i_rdy( mod_out_if.rdy ),
.o_val( mod_out_if.val )
);
endmodule

View File

@ -1,12 +1,241 @@
module secp256k1_top (
input i_clk,
input i_rst,
input i_val,
output logic o_rdy,
output logic o_val
input i_clk,
input i_rst,
// Command interface
if_axi_stream.sink if_cmd_rx,
if_axi_stream.source if_cmd_tx,
// Memory map interface for debug
if_axi_mm.sink if_axi_mm
);
import secp256k1_pkg::*;
import zcash_fpga_pkg::*;
// Register map is used for storing command data
logic [REGISTER_SIZE/64-1:0][63:0] register_map;
if_ram #(.RAM_WIDTH(64), .RAM_DEPTH(REGISTER_SIZE)) register_file_a (i_clk, i_rst);
if_ram #(.RAM_WIDTH(64), .RAM_DEPTH(REGISTER_SIZE)) register_file_b (i_clk, i_rst);
// 256 multiplier (karatsuba)
logic [255:0] mult_dat_a, mult_dat_b;
logic mult_dat_val;
if_axi_stream #(.DAT_BYTS(512/8)) mult_out_if(i_clk);
// 256 bit inverse calculation
if_axi_stream #(.DAT_BYTS(256/8)) bin_inv_in_if(i_clk);
if_axi_stream #(.DAT_BYTS(256/8)) bin_inv_out_if(i_clk);
// TODO just have one multiplier (unless doulbe & add is parallel)
//one multiplier that barret reduction can share?
// Can avoid final inverstion converting from projected coord by some check in c++ code
// Controlling state machine
typedef enum {IDLE,
VERIFY_SECP256K1_SIG_PARSE, // Parse inputs
CALC_S_INV,
POINT_DBL,
POINT_ADD,
IGNORE,
FINISHED} secp256k1_state_t;
secp256k1_state_t secp256k1_state;
header_t header, header_l;
secp256k1_ver_t secp256k1_ver;
// Other temporary values
logic [255:0] r, w;
logic [5:0] cnt; // Counter for parsing command inputs
logic if_axi_mm_rd;
always_comb begin
header = if_cmd_rx.dat;
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
secp256k1_state <= IDLE;
if_cmd_tx.reset_source();
if_cmd_rx.reset_sink();
cnt <= 0;
mult_out_if.rdy <= 0;
register_file_a.reset_source();
mult_dat_a <= 0;
mult_dat_b <= 0;
mult_dat_val <= 0;
w <= 0;
r <= 0;
bin_inv_in_if.reset_source();
bin_inv_out_if.rdy <= 0;
secp256k1_ver <= 0;
end else begin
register_file_a.en <= 1;
register_file_a.wr <= 0;
register_file_a.rd <= 1;
mult_out_if.rdy <= 1;
bin_inv_out_if.rdy <= 1;
mult_dat_val <= 0;
case(secp256k1_state)
{IDLE}: begin
secp256k1_ver <= 0;
if_cmd_rx.rdy <= 1;
header_l <= header;
cnt <= 0;
if (if_cmd_rx.val && if_cmd_rx.rdy) begin
case(header.cmd)
{VERIFY_SECP256K1_SIG}: begin
register_map[CURR_CMD] <= header;
secp256k1_state <= VERIFY_SECP256K1_SIG_PARSE;
end
default: begin
if (~if_cmd_rx.eop) begin
if_cmd_rx.rdy <= 1;
secp256k1_state <= IGNORE;
end
end
endcase
end
end
{VERIFY_SECP256K1_SIG_PARSE}: begin
if_cmd_rx.rdy <= 1;
if (if_cmd_rx.val && if_cmd_rx.rdy) begin
register_file_a.wr <= 1;
cnt <= cnt + 1;
if (cnt == 19) secp256k1_state <= CALC_S_INV;
end
if (bin_inv_in_if.val && bin_inv_in_if.rdy)
bin_inv_in_if.val <= 0;
case(cnt) inside
[0:3]: begin
register_file_a.a <= SIG_VER_S + (cnt % 4);
register_file_a.d <= if_cmd_rx.dat;
// Can start calculating the inverse here
bin_inv_in_if.dat[(cnt % 4)*64 +: 64] <= if_cmd_rx.dat;
if (cnt == 3) begin
bin_inv_in_if.val <= 1;
end
end
[4:7]: begin
// We can load R into the karatsuba multiplier
register_file_a.a <= SIG_VER_R + (cnt % 4);
register_file_a.d <= if_cmd_rx.dat;
mult_dat_a[(cnt % 4)*64 +: 64] <= if_cmd_rx.dat;
end
[8:11]: begin
register_file_a.a <= SIG_VER_HASH + (cnt % 4);
register_file_a.d <= if_cmd_rx.dat;
end
[12:19]: begin
register_file_a.a <= SIG_VER_Q + (cnt % 8);
register_file_a.d <= if_cmd_rx.dat;
end
endcase
end
{CALC_S_INV}: begin
// Wait until bin_inv_out_if.val
if (bin_inv_in_if.dat >= secp256k1_pkg::n) secp256k1_ver.OUT_OF_RANGE_S <= 1;
if (mult_dat_a >= secp256k1_pkg::n) secp256k1_ver.OUT_OF_RANGE_R <= 1;
if (bin_inv_out.val && bin_inv_out.rdy) begin
w <= bin_inv_out.dat;
// TODO also write this to RAM
// need to do 2 multiplications % n to get u1 and u2
end
end
{IGNORE}: begin
if_cmd_rx.rdy <= 1;
if (if_cmd_rx.rdy && if_cmd_rx.val && if_cmd_rx.eop)
secp256k1_state <= IDLE;
end
endcase
end
end
// TODO could provide write access
always_comb begin
end
always_ff @ (posedge i_clk) begin
if (i_rst) begin
if_axi_mm.reset_sink();
register_file_b.reset_source();
end else begin
if_axi_mm.rd_dat_val <= 0;
register_file_b.en <= 1;
register_file_b.rd <= 1;
register_file_b.a <= if_axi_mm.addr/8;
if_axi_mm_rd <= if_axi_mm.rd;
if (if_axi_mm_rd) begin
if_axi_mm.rd_dat_val <= 1;
if_axi_mm.rd_dat <= register_file_b.q;
end
end
end
// BRAM for storing parsed inputs
bram #(
.RAM_WIDTH ( 64 ),
.RAM_DEPTH ( REGISTER_SIZE ),
.RAM_PERFORMANCE ( "HIGH_PERFORMANCE" )
) register_file (
.a ( register_file_a ),
.b ( register_file_b )
);
// Calculate binary inverse mod n
begin: BINARY_INVERSE_MOD_N
bin_inv #(
.BITS ( 256 ),
.P ( secp256k1_pkg::n )
)(
.i_clk ( i_clk ),
.i_rst ( i_rst) ,
.i_dat ( bin_inv_in_if.dat ),
.i_val ( bin_inv_in_if.val ),
.o_rdy ( bin_inv_in_if.rdy ),
.o_dat ( bin_inv_out_if.dat ),
.o_val ( bin_inv_out_if.val ),
.i_rdy ( bin_inv_out_if.rdy )
);
end
// 256 bit Karatsuba_ofman multiplier
begin: KARATSUBA_OFMAN_MULT
localparam KARATSUBA_LEVEL = 2;
logic [KARATSUBA_LEVEL-1:0] val;
karatsuba_ofman_mult # (
.BITS ( 256 ),
.LEVEL ( KARATSUBA_LEVEL )
)
karatsuba_ofman_mult (
.i_clk ( i_clk ),
.i_dat_a( mult_dat_a ),
.i_dat_b( mult_dat_b ),
.o_dat ( mult_out_if.dat )
);
always_ff @ (posedge i_clk) begin
if (i_rst) begin
mult_out_if.val <= 0;
end else begin
val <= {val, mult_dat_val};
end
end
end
// Modulo p reducer (shared with arbitrator)
// Modulo n reducer (output from karatsuba multiplier)
// 256 bit Karatsuba_ofman multiplier (shared with arbitrator)
// Point double module or Point multiply module
endmodule

View File

@ -24,7 +24,7 @@ package zcash_fpga_pkg;
import equihash_pkg::N;
import equihash_pkg::K;
parameter FPGA_VERSION = 32'h0;
parameter FPGA_VERSION = 32'h01_00_00; //v1.0.0
localparam [63:0] FPGA_CMD_CAP = {{62'd0},
(equihash_pkg::N == 144 && equihash_pkg::K == 5), // N = 144, K = 5 for VERIFY_EQUIHASH command
(equihash_pkg::N == 200 && equihash_pkg::K == 9)}; // N = 200, K = 9 for VERIFY_EQUIHASH command
@ -33,9 +33,10 @@ package zcash_fpga_pkg;
// Reply messages from the FPGA to host all have the last
// bit set (start at 0x80000000). Messages with bits [31:16] == 0 are processed by a different state machine
typedef enum logic [31:0] {
RESET_FPGA = 'h0000_00_00,
FPGA_STATUS = 'h0000_00_01,
VERIFY_EQUIHASH = 'h0000_01_00,
RESET_FPGA = 'h0000_00_00,
FPGA_STATUS = 'h0000_00_01,
VERIFY_EQUIHASH = 'h0000_01_00,
VERIFY_SECP256K1_SIG = 'h0000_01_01,
// Replies from the FPGA
RESET_FPGA_RPL = 'h80_00_00_00,

View File

@ -0,0 +1,165 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module secp256k1_point_dbl_tb ();
import common_pkg::*;
import secp256k1_pkg::*;
localparam CLK_PERIOD = 100;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(256*3/8)) in_if(clk); // Point is X, Y, Z
if_axi_stream #(.DAT_BYTS(256*3/8)) out_if(clk);
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mult_in_if(clk);
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mult_out_if(clk);
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mod_in_if(clk);
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mod_out_if(clk);
jb_point_t in_p, out_p;
always_comb begin
in_p = in_if.dat;
out_if.dat = out_p;
end
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
secp256k1_point_dbl secp256k1_point_dbl(
.i_clk ( clk ),
.i_rst ( rst ),
// Input point
.i_p ( in_p ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.o_p ( out_p ),
.o_err ( out_if.err ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ) ,
.o_mult_if ( mult_in_if ),
.i_mult_if ( mult_out_if ),
.o_mod_if ( mod_in_if ),
.i_mod_if ( mod_out_if )
);
// Attach a mod reduction unit and multiply - mod unit
// In full design these could use dedicated multipliers or be arbitrated
secp256k1_mult_mod #(
.CTL_BITS ( 8 )
)
secp256k1_mult_mod (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( mult_in_if.dat[0 +: 256] ),
.i_dat_b ( mult_in_if.dat[256 +: 256] ),
.i_val ( mult_in_if.val ),
.i_err ( mult_in_if.err ),
.i_ctl ( mult_in_if.ctl ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl ),
.o_err ( mult_out_if.err )
);
secp256k1_mod #(
.USE_MULT ( 0 ),
.CTL_BITS ( 8 )
)
secp256k1_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_dat( mod_in_if.dat ),
.i_val( mod_in_if.val ),
.i_err( mod_in_if.err ),
.i_ctl( mod_in_if.ctl ),
.o_rdy( mod_in_if.rdy ),
.o_dat( mod_out_if.dat ),
.o_ctl( mod_out_if.ctl ),
.o_err( mod_out_if.err ),
.i_rdy( mod_out_if.rdy ),
.o_val( mod_out_if.val )
);
task test_0();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
logic [255:0] in_a, in_b;
jb_point_t p_in, p_exp, p_out;
$display("Running test_0...");
p_in = {z:1, x:2, y:3};
p_exp = dbl_jb_point(p_in);
fork
in_if.put_stream(p_in, 256*3/8);
out_if.get_stream(get_dat, get_len);
join
p_out = get_dat;
if (p_exp != p_out) begin
$display("Expected:");
print_jb_point(p_exp);
$display("Was:");
print_jb_point(p_out);
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
end
$display("test_0 PASSED");
end
endtask;
function compare_point();
endfunction
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test_0();
#1us $finish();
end
endmodule

View File

@ -0,0 +1,119 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module secp256k1_point_mult_tb ();
import common_pkg::*;
import secp256k1_pkg::*;
localparam CLK_PERIOD = 100;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(256*3/8)) in_if(clk);
if_axi_stream #(.DAT_BYTS(256*3/8)) out_if(clk);
jb_point_t in_p, out_p;
logic [255:0] k;
always_comb begin
in_p = in_if.dat;
out_if.dat = out_p;
end
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err) begin
out_if.rdy = 1;
$error(1, "%m %t ERROR: output .err asserted", $time);
end
secp256k1_point_mult secp256k1_point_mult (
.i_clk ( clk ),
.i_rst ( rst ),
.i_p ( in_if.dat ),
.i_k ( k ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.o_p ( out_p ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ),
.o_err ( out_if.err )
);
task test_0();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
logic [255:0] in_a, in_b;
jb_point_t p_in, p_exp, p_out;
$display("Running test_0...");
p_in = {z:1, x:2, y:3};
k = 100;
//p_exp = dbl_jb_point(p_in);
fork
in_if.put_stream(p_in, 256*3/8);
out_if.get_stream(get_dat, get_len);
join
/*p_out = get_dat;
if (p_exp != p_out) begin
$display("Expected:");
print_jb_point(p_exp);
$display("Was:");
print_jb_point(p_out);
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
end */
$display("test_0 PASSED");
end
endtask;
function compare_point();
endfunction
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test_0();
#1us $finish();
end
endmodule