updates to structure for eliptical curve blocks

This commit is contained in:
bsdevlin 2019-05-22 14:53:06 +08:00
parent 9145fd8533
commit 9d2c9340a3
6 changed files with 562 additions and 32 deletions

View File

@ -6,16 +6,16 @@ Repo for Zcash FPGA projects code and documents.
These have been designed targetted for Xilinx boards (US+) and therefore contain Xilinx-specific IP.
## zcash_verif
## zcash_fpga_top
This is the top level for the Zcash verification FPGA board. It targets both Xilinx Virtex UltraScale+ FPGA VCU118 Evaluation Kit, and Amazon EC2 F1 Instances.
This is the top level for the Zcash FPGA. It targets both Xilinx Virtex UltraScale+ FPGA VCU118 Evaluation Kit, and Amazon EC2 F1 Instances.
Architecture document is [here]()
Contains the following top-level engine:
### Equihash verification engine
### EC secp256k1 signature verification engine
### EC bls12-381 co-processor
It optionally contains the following top-level engines (you can optionally include in a build via parameters):
* Equihash verification engine
* EC secp256k1 signature verification engine
* EC bls12-381 co-processor
## ip_cores
@ -25,4 +25,7 @@ These contain custom IP cores used in the projects in this repo.
* blake2b - A simple implementation of blake2b and a pipline-unrolled version for high performance.
* common - Packages and interfaces that are shared.
* fifo - Fifo implementations
* parsing - Blocks for parsing/processing streams, as well as testbench files.
* parsing - Blocks for parsing/processing streams, as well as testbench files.
* Karabutsa multiplier
* Barret reduction
* Resource arbitrators

View File

@ -1,7 +1,6 @@
/*
This performs a multiplication followed by modular reduction
using karabusa multiplier and barrets algorithm, for the bls381-12
curve on Fp elements.
using karabusa multiplier and barrets algorithm on Fp elements.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@ -19,41 +18,48 @@
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module bls12_381_fp_mult_mod #(
parameter CTL_BITS = 16
module ec_fp_mult_mod #(
parameter DAT_BITS,
parameter [DAT_BITS-1:0] P,
parameter KARATSUBA_LVL,
parameter CTL_BITS = 16
)(
input i_clk, i_rst,
// Input value
input [380:0] i_dat_a,
input [380:0] i_dat_b,
input [DAT_BITS-1:0] i_dat_a,
input [DAT_BITS-1:0] i_dat_b,
input
input i_val,
input i_err,
input [CTL_BITS-1:0] i_ctl,
output logic o_rdy,
// output
output logic [380:0] o_dat,
output logic [DAT_BITS-1:0] o_dat,
output logic [CTL_BITS-1:0] o_ctl,
input i_rdy,
output logic o_val
);
localparam ARB_BIT = 8;
localparam KARATSUBA_LEVEL = 3;
import bls12_381_pkg::*;
// The reduction mod takes DAT_BITS + 1 bits, but we also need to make sure we are a multiple of KARATSUBA_LVL*2
parameter MLT_BITS = DAT_BITS + 1 + (KARATSUBA_LVL*2 - (DAT_BITS + 1) % (KARATSUBA_LVL*2));
if_axi_stream #(.DAT_BYTS(384*2/8), .CTL_BITS(CTL_BITS)) mult_if_in [3:0] (i_clk);
if_axi_stream #(.DAT_BYTS(384*2/8), .CTL_BITS(CTL_BITS)) mult_if_out [3:0] (i_clk);
localparam ARB_BIT = 8;
if_axi_stream #(.DAT_BITS(MLT_BITS*2), .CTL_BITS(CTL_BITS)) mult_if_in [3:0] (i_clk);
if_axi_stream #(.DAT_BITS(MLT_BITS*2), .CTL_BITS(CTL_BITS)) mult_if_out [3:0] (i_clk);
always_comb begin
mult_if_in[0].mod = 0;
mult_if_in[0].sop = 1;
mult_if_in[0].eop = 1;
mult_if_in[0].dat = {3'd0, i_dat_b, 3'd0, i_dat_a};
mult_if_in[0].dat = 0;
mult_if_in[0].dat[0 +: DAT_BITS] = i_dat_a;
mult_if_in[0].dat[MLT_BITS +: DAT_BITS] = i_dat_b;
mult_if_in[0].val = i_val;
mult_if_in[0].err = i_err;
mult_if_in[0].ctl = i_ctl;
o_rdy = mult_if_in[0].rdy;
mult_if_out[3].sop = 1;
mult_if_out[3].eop = 1;
mult_if_out[3].mod = 0;
@ -77,16 +83,16 @@ resource_share_mod (
);
karatsuba_ofman_mult # (
.BITS ( 384 ),
.LEVEL ( KARATSUBA_LEVEL ),
.CTL_BITS ( CTL_BITS )
.BITS ( MLT_BITS ),
.LEVEL ( KARATSUBA_LVL ),
.CTL_BITS ( CTL_BITS )
)
karatsuba_ofman_mult (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_ctl ( mult_if_in[3].ctl ),
.i_dat_a( mult_if_in[3].dat[0 +: 384] ),
.i_dat_b( mult_if_in[3].dat[384 +: 384] ),
.i_dat_a( mult_if_in[3].dat[0 +: MLT_BITS] ),
.i_dat_b( mult_if_in[3].dat[MLT_BITS +: MLT_BITS] ),
.i_val ( mult_if_in[3].val ),
.o_rdy ( mult_if_in[3].rdy ),
.o_dat ( mult_if_out[3].dat ),
@ -95,10 +101,12 @@ karatsuba_ofman_mult (
.o_ctl ( mult_if_out[3].ctl )
);
barret_mod_pipe #(
.DAT_BITS ( 384 ),
barret_mod_pipe #(
.DAT_BITS ( MLT_BITS ),
.CTL_BITS ( CTL_BITS ),
.P ( bls12_381_pkg::P )
.P ( P )
)
barret_mod_pipe (
.i_clk ( i_clk ),

View File

@ -0,0 +1,109 @@
/*
Package for Fp fields
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
package ec_fp_pkg;
// Expected to be in Jacobian coordinates
typedef struct packed {
logic [255:0] x, y, z;
} jb_point_t;
function is_zero(jb_point_t p);
is_zero = (p.x == 0 && p.y == 0 && p.z == 1);
return is_zero;
endfunction
// Function to double point in Jacobian coordinates (for comparison in testbench)
// Here a is 0, and we also mod the result
function jb_point_t dbl_jb_point(jb_point_t p, input logic [1023:0] mod);
logic signed [1023:0] I_X, I_Y, I_Z, A, B, C, D, X, Y, Z;
if (p.z == 0) return p;
I_X = p.x;
I_Y = p.y;
I_Z = p.z;
A = (I_Y*I_Y) % mod;
B = (((4*I_X) % mod)*A) % mod;
C = (((8*A) % mod)*A) % mod;
D = (((3*I_X)% mod)*I_X) % mod;
X = (D*D)% mod;
X = X + ((2*B) % mod > X ? mod : 0) - (2*B) % mod;
Y = (D*((B + (X > B ? mod : 0)-X) % mod)) % mod;
Y = Y + (C > Y ? mod : 0) - C;
Z = (((2*I_Y)% mod)*I_Z) % mod;
dbl_jb_point.x = X;
dbl_jb_point.y = Y;
dbl_jb_point.z = Z;
return dbl_jb_point;
endfunction
function jb_point_t add_jb_point(jb_point_t p1, p2, input logic [1023:0] mod);
logic signed [1023:0] A, U1, U2, S1, S2, H, H3, R;
if (p1.z == 0) return p2;
if (p2.z == 0) return p1;
if (p1.y == p2.y && p1.x == p2.x)
return (dbl_jb_point(p1));
U1 = p1.x*p2.z % mod;
U1 = U1*p2.z % mod;
U2 = p2.x*p1.z % mod;
U2 = U2 *p1.z % mod;
S1 = p1.y *p2.z % mod;
S1 = (S1*p2.z % mod) *p2.z % mod;
S2 = p2.y * p1.z % mod;
S2 = (S2*p1.z % mod) *p1.z % mod;
H = U2 + (U1 > U2 ? mod : 0) -U1;
R = S2 + (S1 > S2 ? mod : 0) -S1;
H3 = ((H * H %mod ) * H ) % mod;
A = (((2*U1 % mod) *H % mod) * H % mod);
add_jb_point.z = ((H * p1.z % mod) * p2.z) % mod;
add_jb_point.x = R*R % mod;
add_jb_point.x = add_jb_point.x + (H3 > add_jb_point.x ? mod : 0) - H3;
add_jb_point.x = add_jb_point.x + (A > add_jb_point.x ? mod : 0) - A;
A = (U1*H % mod) * H % mod;
A = A + (add_jb_point.x > A ? mod : 0) - add_jb_point.x;
A = A*R % mod;
add_jb_point.y = S1*H3 % mod;
add_jb_point.y = A + (add_jb_point.y > A ? mod : 0) - add_jb_point.y;
endfunction
function on_curve(jb_point_t p);
return (p.y*p.y - p.x*p.x*p.x - secp256k1_pkg::a*p.x*p.z*p.z*p.z*p.z - secp256k1_pkg::b*p.z*p.z*p.z*p.z*p.z*p.z);
endfunction
function print_jb_point(jb_point_t p);
$display("x:%h", p.x);
$display("y:%h", p.y);
$display("z:%h", p.z);
return;
endfunction
endpackage

View File

@ -0,0 +1,253 @@
/*
This performs point doubling on a prime field Fp using jacobian points.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
module ec_fp_point_dbl
#(
parameter DAT_BITS,
parameter [DAT_BITS-1:0] P
)(
input i_clk, i_rst,
// Input point
input jb_point_t i_p,
input logic i_val,
output logic o_rdy,
// Output point
output jb_point_t o_p,
input logic i_rdy,
output logic o_val,
output logic o_err,
// Interface to multiplier (mod p)
if_axi_stream.source o_mult_if,
if_axi_stream.sink i_mult_if
);
/*
* These are the equations that need to be computed, they are issued as variables
* become valid. We have a bitmask to track what equation results are valid which
* will trigger other equations. [] show what equations must be valid before this starts.
*
* 0. A = (i_p.y)^2 mod p
* 1. B = (i_p.x)*A mod p [eq0]
* 2. B = 4*B mod p [eq1]
* 3. C = A^2 mod p [eq0]
* 4. C = C*8 mod p [eq3]
* 5. D = (i_p.x)^2 mod p
* 6. D = 3*D mod p [eq5]
* 7. (o_p.x) = D^2 mod p[eq6]
* 8. E = 2*B mod p [eq2]
* 9. (o_p.x) = o_p.x - E mod p [eq8, eq7]
* 10 (o_p.y) = B - o_p.x mod p [eq9, eq2]
* 11. (o_p.y) = D*(o_p.y) [eq10, eq6]
* 12. (o_p.y) = (o_p.y) - C mod p [eq11, eq4]
* 13. (o_p.z) = 2*(i_p.y) mod p
* 14. (o_p.z) = o_p.y * i_p.z mod p [eq14]
*/
logic [14:0] eq_val, eq_wait;
// Temporary variables
logic [DAT_BITS-1:0] A, B, C, D, E;
jb_point_t i_p_l;
always_comb begin
o_mult_if.sop = 1;
o_mult_if.eop = 1;
o_mult_if.err = 0;
o_mult_if.mod = 0;
end
enum {IDLE, START, FINISHED} state;
always_ff @ (posedge i_clk) begin
if (i_rst) begin
o_val <= 0;
o_rdy <= 0;
o_p <= 0;
o_mult_if.val <= 0;
o_mult_if.ctl <= 0;
o_mult_if.dat <= 0;
i_mult_if.rdy <= 0;
i_mod_if.rdy <= 0;
eq_val <= 0;
state <= IDLE;
eq_wait <= 0;
i_p_l <= 0;
o_err <= 0;
A <= 0;
B <= 0;
C <= 0;
D <= 0;
E <= 0;
end else begin
if (o_mult_if.rdy) o_mult_if.val <= 0;
case(state)
{IDLE}: begin
o_rdy <= 1;
eq_val <= 0;
eq_wait <= 0;
o_err <= 0;
i_mult_if.rdy <= 1;
i_p_l <= i_p;
A <= 0;
B <= 0;
C <= 0;
D <= 0;
E <= 0;
o_val <= 0;
if (i_val && o_rdy) begin
state <= START;
o_rdy <= 0;
if (i_p.z == 0) begin
o_p <= i_p;
o_val <= 1;
state <= FINISHED;
end
end
end
// Just a big if tree where we issue equations if the required inputs
// are valid
{START}: begin
i_mult_if.rdy <= 1;
// Check any results from multiplier
if (i_mult_if.val && i_mult_if.rdy) begin
eq_val[i_mult_if.ctl[5:0]] <= 1;
case(i_mult_if.ctl[5:0]) inside
0: A <= i_mult_if.dat;
1: B <= i_mult_if.dat;
2: B <= i_mult_if.dat;
3: C <= i_mult_if.dat;
4: C <= i_mult_if.dat;
5: D <= i_mult_if.dat;
6: D <= i_mult_if.dat;
7: o_p.x <= i_mult_if.dat;
11: o_p.y <= i_mult_if.dat;
14: o_p.z <= i_mult_if.dat;
default: o_err <= 1;
endcase
end
// Issue new multiplies
if (~eq_wait[0]) begin //0. A = (i_p.y)^2 mod p
multiply(0, i_p_l.y, i_p_l.y);
end else
if (eq_val[0] && ~eq_wait[1]) begin //1. B = (i_p.x)*A mod p [eq0]
multiply(1, i_p_l.x, A);
end else
if (eq_val[0] && ~eq_wait[3]) begin //3. C = A^2 mod p [eq0]
multiply(3, A, A);
end else
if (~eq_wait[5]) begin //5. D = (i_p.x)^2 mod p
multiply(5, i_p_l.x, i_p_l.x);
end else
if (eq_val[5] && ~eq_wait[6]) begin //6. D = 3*D mod p [eq5]
multiply(6, 256'd3, D);
end else
if (eq_val[6] && ~eq_wait[7]) begin //7. (o_p.x) = D^2 mod p[eq6]
multiply(7, D, D);
end else
if (eq_val[10] && eq_val[6] && ~eq_wait[11]) begin //11. (o_p.y) = D*(o_p.y) [eq10, eq6]
multiply(11, D, o_p.y);
end else
if (eq_val[13] && ~eq_wait[14]) begin //14. (o_p.z) = o_p.z * i_p.z mod p [eq13]
multiply(14, i_p_l.z, o_p.z);
end else
if (eq_val[1] && ~eq_wait[2]) begin //2. B = 4*B mod p [eq1]
multiply(2, B, 4);
end else
if (eq_val[3] && ~eq_wait[4]) begin //4. C = C*8 mod p [eq3]
multiply(4, C, 8);
end
// Additions / subtractions we do in-module
if (eq_val[8] && eq_val[7] && ~eq_wait[9]) begin //9. (o_p.x) = o_p.x - E mod p [eq8, eq7]
o_p.x <= subtract(9, o_p.x, E);
end
if (eq_val[9] && eq_val[2] && ~eq_wait[10]) begin //10. (o_p.y) = B - o_p.x mod p [eq9, eq2]
o_p.y <= subtract(10, B, o_p.x);
end
if (eq_val[2] && ~eq_wait[8]) begin //8. E = 2*B mod p [eq2]
E <= addition(8, B, B);
end
if (~eq_wait[13]) begin //13. (o_p.z) = 2*(i_p.y) mod p
o_p.z <= addition(13, i_p_l.y, i_p_l.y);
end
if (eq_val[4] && eq_val[11] && ~eq_wait[12]) begin //12. (o_p.y) = (o_p.y) - C mod p [eq11, eq4]
o_p.y <= subtract(12, o_p.y, C);
end
if (&eq_val) begin
state <= FINISHED;
o_val <= 1;
end
end
{FINISHED}: begin
if (o_val && i_rdy) begin
state <= IDLE;
o_val <= 0;
o_rdy <= 1;
end
end
endcase
if (o_err) begin
o_val <= 1;
if (o_val && i_rdy) begin
o_err <= 0;
state <= IDLE;
end
end
end
end
// Task for subtractions
function logic [DAT_BITS-1:0] subtract(input int unsigned ctl, input logic [DAT_BITS-1:0] a, b);
eq_wait[ctl] <= 1;
eq_val[ctl] <= 1;
return (a + (b > a ? P : 0) - b);
endfunction
// Task for addition
function logic [DAT_BITS-1:0] addition(input int unsigned ctl, input logic [DAT_BITS-1:0] a, b);
eq_wait[ctl] <= 1;
eq_val[ctl] <= 1;
return (a + b - (b + a > P ? P : 0));
endfunction
// Task for using multiplies
task multiply(input int unsigned ctl, input logic [DAT_BITS-1:0] a, b);
if (~o_mult_if.val || (o_mult_if.val && o_mult_if.rdy)) begin
o_mult_if.val <= 1;
o_mult_if.dat[0 +: DAT_BITS] <= a;
o_mult_if.dat[DAT_BITS +: DAT_BITS] <= b;
o_mult_if.ctl[5:0] <= ctl;
eq_wait[ctl] <= 1;
end
endtask
endmodule

View File

@ -16,8 +16,10 @@
*/
`timescale 1ps/1ps
module bls12_381_fp_mult_mod_tb ();
module ec_fp_mult_mod_tb ();
import common_pkg::*;
import ec_fp_pkg::*;
import bls12_381_pkg::*;
localparam CLK_PERIOD = 100;
@ -53,8 +55,13 @@ logic [380:0] out_dat;
always_comb out_if.dat = {3'd0, out_dat};
bls12_381_fp_mult_mod
bls12_381_fp_mult_mod (
ec_fp_mult_mod #(
.DAT_BITS ( 381 ),
.P ( bls12_381_pkg::P ),
.KARATSUBA_LVL ( 3 ),
.CTL_BITS ( 16 )
)
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_ctl ( 16'd0 ),

View File

@ -0,0 +1,150 @@
/*
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
`timescale 1ps/1ps
module ec_fp_point_dbl_tb ();
import common_pkg::*;
import secp256k1_pkg::*;
import ec_fp_pkg::*;
localparam CLK_PERIOD = 100;
logic clk, rst;
if_axi_stream #(.DAT_BYTS(256*3/8)) in_if(clk); // Point is X, Y, Z
if_axi_stream #(.DAT_BYTS(256*3/8)) out_if(clk);
if_axi_stream #(.DAT_BYTS(256*2/8), .CTL_BITS(8)) mult_in_if(clk);
if_axi_stream #(.DAT_BYTS(256/8), .CTL_BITS(8)) mult_out_if(clk);
jb_point_t in_p, out_p;
always_comb begin
in_p = in_if.dat;
out_if.dat = out_p;
end
initial begin
rst = 0;
repeat(2) #(20*CLK_PERIOD) rst = ~rst;
end
initial begin
clk = 0;
forever #CLK_PERIOD clk = ~clk;
end
always_comb begin
out_if.sop = 1;
out_if.eop = 1;
out_if.ctl = 0;
out_if.mod = 0;
end
// Check for errors
always_ff @ (posedge clk)
if (out_if.val && out_if.err)
$error(1, "%m %t ERROR: output .err asserted", $time);
ec_point_dbl #(
.DAT_BITS (256),
.P(secp256k1_pkg::p)
)
ec_point_dbl(
.i_clk ( clk ),
.i_rst ( rst ),
// Input point
.i_p ( in_p ),
.i_val ( in_if.val ),
.o_rdy ( in_if.rdy ),
.o_p ( out_p ),
.o_err ( out_if.err ),
.i_rdy ( out_if.rdy ),
.o_val ( out_if.val ) ,
.o_mult_if ( mult_in_if ),
.i_mult_if ( mult_out_if )
);
// Attach a mod reduction unit and multiply - mod unit
// In full design these could use dedicated multipliers or be arbitrated
secp256k1_mult_mod #(
.CTL_BITS ( 8 )
)
secp256k1_mult_mod (
.i_clk ( clk ),
.i_rst ( rst ),
.i_dat_a ( mult_in_if.dat[0 +: 256] ),
.i_dat_b ( mult_in_if.dat[256 +: 256] ),
.i_val ( mult_in_if.val ),
.i_err ( mult_in_if.err ),
.i_ctl ( mult_in_if.ctl ),
.i_cmd ( 1'd0 ),
.o_rdy ( mult_in_if.rdy ),
.o_dat ( mult_out_if.dat ),
.i_rdy ( mult_out_if.rdy ),
.o_val ( mult_out_if.val ),
.o_ctl ( mult_out_if.ctl ),
.o_err ( mult_out_if.err )
);
task test_0();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] expected, get_dat;
logic [255:0] in_a, in_b;
jb_point_t p_in, p_exp, p_out;
$display("Running test_0...");
//p_in = {z:1, x:4, y:2};
//p_in = {z:10, x:64, y:23};
p_in = {x:secp256k1_pkg::Gx, y:secp256k1_pkg::Gx, z:1};
p_exp = dbl_jb_point(p_in, secp256k1_pkg::p);
fork
in_if.put_stream(p_in, 256*3/8);
out_if.get_stream(get_dat, get_len);
join
p_out = get_dat;
if (p_exp != p_out) begin
$display("Expected:");
print_jb_point(p_exp);
$display("Was:");
print_jb_point(p_out);
$fatal(1, "%m %t ERROR: test_0 point was wrong", $time);
end
$display("test_0 PASSED");
end
endtask;
function compare_point();
endfunction
initial begin
out_if.rdy = 0;
in_if.val = 0;
#(40*CLK_PERIOD);
test_0();
#1us $finish();
end
endmodule