Modify bls12-381 pairing engine .rdy signal so that it is not
combinatorial, to improve timing. Update version to 1.2.2
This commit is contained in:
parent
dbed8ccb0d
commit
3a8c799a74
14
README.md
14
README.md
|
@ -1,17 +1,21 @@
|
||||||
The work in this repo is the result of a Zcash foundation grant to develop open-source FPGA code that can be used to accelerate various aspects of the network.
|
The work in this repo is the result of a Zcash foundation grant to develop open-source FPGA code that can be used to accelerate various aspects of the network.
|
||||||
**An Architecture document is [here](zcash_fpga_design_doc_v1.1.x.pdf)**.
|
**An Architecture document is [here](zcash_fpga_design_doc_v1.1.x.pdf)**.
|
||||||
|
|
||||||
While mainly developed for Equihash and the secp256k1 and bls12-381 curves, the code used in this repo can also be applied with minimum modification to other curves.
|
While mainly developed for Equihash and the secp256k1 and bls12-381 curves, the code (ip_cores) used in this repo can also be applied to other curves by
|
||||||
|
changing parameters / minimum modification to equations.
|
||||||
|
|
||||||
** Currently still a work in progress
|
# Getting started
|
||||||
|
|
||||||
|
The architecture document has instructions for building an AWS image or simulating the top level design. The easiest way is to add all .sv and .xci files to a new Vivado project,
|
||||||
|
and then set the top level _tb.sv file to the module you want to test.
|
||||||
|
|
||||||
# Repo folder structure
|
# Repo folder structure
|
||||||
|
|
||||||
Each top level folder is explained below. Inside each folder is source code written in systemverilog, and most blocks have a standalone self-checking testbench.
|
Each top level folder is explained below. Inside each folder is source code written in systemverilog, and most blocks have a standalone self-checking testbench.
|
||||||
|
|
||||||
## aws
|
## AWS
|
||||||
|
|
||||||
This contains the top / project files for building on a AWS (Amazon FPGA VU9P w/ 64GB DDR4).
|
This contains the top / project files for building on a AWS F1 instance (Amazon FPGA VU9P w/ 64GB DDR4).
|
||||||
|
|
||||||
* This contains the zcash_fpga library (aws/cl_zcash/software/runtime/zcash_fpga.hpp) that can be used to interface with the FPGA over PCIe.
|
* This contains the zcash_fpga library (aws/cl_zcash/software/runtime/zcash_fpga.hpp) that can be used to interface with the FPGA over PCIe.
|
||||||
* Instructions on how to build are in the architecture document.
|
* Instructions on how to build are in the architecture document.
|
||||||
|
@ -22,7 +26,7 @@ This contains the top / project files for building on the Bittware VVH board (VU
|
||||||
|
|
||||||
## ip_cores
|
## ip_cores
|
||||||
|
|
||||||
These contain shared IP cores used by the projects in this repo. These include many functions, such as:
|
These contain shared IP cores that are used by the projects in this repo. These include many functions, such as:
|
||||||
|
|
||||||
* Hashing
|
* Hashing
|
||||||
- Blake2b - single pipe implementation of blake2b and a pipline-unrolled version for high performance (single clock hash @ 200MHz after initial 52 clock delay).
|
- Blake2b - single pipe implementation of blake2b and a pipline-unrolled version for high performance (single clock hash @ 200MHz after initial 52 clock delay).
|
||||||
|
|
|
@ -116,9 +116,6 @@ logic point_mul_mode, found_one;
|
||||||
FE_TYPE key;
|
FE_TYPE key;
|
||||||
|
|
||||||
always_comb begin
|
always_comb begin
|
||||||
dbl_f12_o_if.rdy = pair_state == POINT_MULT_DBL || (f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4))); // As this is a sparse f12 using full f12_mul
|
|
||||||
add_f12_o_if.rdy = pair_state == POINT_MULT_ADD || (f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4))); // As this is a sparse f12 using full f12_mul
|
|
||||||
|
|
||||||
final_exp_fe12_o_if.dat = f[0][0][0];
|
final_exp_fe12_o_if.dat = f[0][0][0];
|
||||||
final_exp_fe12_o_if.err = 0;
|
final_exp_fe12_o_if.err = 0;
|
||||||
final_exp_fe12_o_if.ctl = 0;
|
final_exp_fe12_o_if.ctl = 0;
|
||||||
|
@ -153,6 +150,9 @@ always_ff @ (posedge i_clk) begin
|
||||||
stage_done <= 0;
|
stage_done <= 0;
|
||||||
|
|
||||||
o_p_jb_if.reset_source();
|
o_p_jb_if.reset_source();
|
||||||
|
|
||||||
|
dbl_f12_o_if.rdy <= 0;
|
||||||
|
add_f12_o_if.rdy <= 0;
|
||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
|
@ -166,6 +166,9 @@ always_ff @ (posedge i_clk) begin
|
||||||
f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]};
|
f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]};
|
||||||
f_val <= mul_fe12_i_if[0].eop;
|
f_val <= mul_fe12_i_if[0].eop;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
dbl_f12_o_if.rdy <= 0;
|
||||||
|
add_f12_o_if.rdy <= 0;
|
||||||
|
|
||||||
case(pair_state)
|
case(pair_state)
|
||||||
IDLE: begin
|
IDLE: begin
|
||||||
|
@ -229,36 +232,41 @@ always_ff @ (posedge i_clk) begin
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
1: begin // Multiply by double result
|
1: begin // Multiply by double result
|
||||||
if(~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) begin
|
if(~dbl_f12_o_if.rdy && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy))) begin
|
||||||
if ((dbl_f12_o_if.val && f_val) || (out_cnt/2 == 5)) begin
|
if ((dbl_f12_o_if.val && f_val) || (out_cnt/2 == 5)) begin
|
||||||
mul_fe12_o_if[0].sop <= out_cnt == 0;
|
mul_fe12_o_if[0].sop <= out_cnt == 0;
|
||||||
mul_fe12_o_if[0].eop <= out_cnt == 11;
|
mul_fe12_o_if[0].eop <= out_cnt == 11;
|
||||||
mul_fe12_o_if[0].val <= 1;
|
mul_fe12_o_if[0].val <= dbl_f12_o_if.val || (out_cnt/2 == 2) || (out_cnt/2 == 3) || (out_cnt/2 == 5);
|
||||||
|
dbl_f12_o_if.rdy <= (out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4);
|
||||||
case (out_cnt/2) inside
|
case (out_cnt/2) inside
|
||||||
0,1,4: mul_fe12_o_if[0].dat <= {dbl_f12_o_if.dat, f[0][0][0]};
|
0,1,4: mul_fe12_o_if[0].dat <= {dbl_f12_o_if.dat, f[0][0][0]};
|
||||||
default: mul_fe12_o_if[0].dat <= {381'd0, f[0][0][0]};
|
default: mul_fe12_o_if[0].dat <= {381'd0, f[0][0][0]};
|
||||||
endcase
|
endcase
|
||||||
|
|
||||||
out_cnt <= out_cnt + 1;
|
out_cnt <= out_cnt + 1;
|
||||||
f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]};
|
f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]};
|
||||||
mul_fe12_o_if[0].ctl <= miller_mult_cnt;
|
|
||||||
mul_fe12_o_if[0].ctl[SQ_BIT] <= 0;
|
|
||||||
if (out_cnt == 11) begin
|
if (out_cnt == 11) begin
|
||||||
f_val <= 0;
|
f_val <= 0;
|
||||||
out_cnt <= 0;
|
out_cnt <= 0;
|
||||||
miller_mult_cnt <= ATE_X[ate_loop_cnt] == 0 ? 3 : 2;
|
miller_mult_cnt <= ATE_X[ate_loop_cnt] == 0 ? 3 : 2;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
mul_fe12_o_if[0].ctl <= miller_mult_cnt;
|
||||||
|
mul_fe12_o_if[0].ctl[SQ_BIT] <= 0;
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
2: begin // Multiply by add result
|
2: begin // Multiply by add result
|
||||||
if(~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) begin
|
if(~add_f12_o_if.rdy && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy))) begin
|
||||||
if ((add_f12_o_if.val && f_val) || (out_cnt/2 == 5)) begin
|
if ((add_f12_o_if.val && f_val) || (out_cnt/2 == 5)) begin
|
||||||
g2_r_jb_i <= add_g2_o;
|
g2_r_jb_i <= add_g2_o;
|
||||||
mul_fe12_o_if[0].ctl <= miller_mult_cnt;
|
mul_fe12_o_if[0].ctl <= miller_mult_cnt;
|
||||||
mul_fe12_o_if[0].ctl[SQ_BIT] <= 0;
|
mul_fe12_o_if[0].ctl[SQ_BIT] <= 0;
|
||||||
mul_fe12_o_if[0].sop <= out_cnt == 0;
|
mul_fe12_o_if[0].sop <= out_cnt == 0;
|
||||||
mul_fe12_o_if[0].eop <= out_cnt == 11;
|
mul_fe12_o_if[0].eop <= out_cnt == 11;
|
||||||
mul_fe12_o_if[0].val <= 1;
|
mul_fe12_o_if[0].val <= add_f12_o_if.val || (out_cnt/2 == 2) || (out_cnt/2 == 3) || (out_cnt/2 == 5);
|
||||||
|
add_f12_o_if.rdy <= (out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4);
|
||||||
out_cnt <= out_cnt + 1;
|
out_cnt <= out_cnt + 1;
|
||||||
case (out_cnt/2) inside
|
case (out_cnt/2) inside
|
||||||
0,1,4: mul_fe12_o_if[0].dat <= {add_f12_o_if.dat, f[0][0][0]};
|
0,1,4: mul_fe12_o_if[0].dat <= {add_f12_o_if.dat, f[0][0][0]};
|
||||||
|
@ -302,6 +310,7 @@ always_ff @ (posedge i_clk) begin
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
POINT_MULT_DBL: begin
|
POINT_MULT_DBL: begin
|
||||||
|
dbl_f12_o_if.rdy <= 1;
|
||||||
if(found_one == 0) begin
|
if(found_one == 0) begin
|
||||||
key <= key << 1;
|
key <= key << 1;
|
||||||
ate_loop_cnt <= ate_loop_cnt - 1;
|
ate_loop_cnt <= ate_loop_cnt - 1;
|
||||||
|
@ -327,6 +336,7 @@ always_ff @ (posedge i_clk) begin
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
POINT_MULT_ADD: begin
|
POINT_MULT_ADD: begin
|
||||||
|
add_f12_o_if.rdy <= 1;
|
||||||
if (~wait_add) begin
|
if (~wait_add) begin
|
||||||
wait_add <= 1;
|
wait_add <= 1;
|
||||||
add_i_val <= 1;
|
add_i_val <= 1;
|
||||||
|
|
|
@ -27,7 +27,7 @@ package zcash_fpga_pkg;
|
||||||
|
|
||||||
import bls12_381_pkg::point_type_t;
|
import bls12_381_pkg::point_type_t;
|
||||||
|
|
||||||
parameter FPGA_VERSION = 32'h01_02_01; //v1.2.1
|
parameter FPGA_VERSION = 32'h01_02_02; //v1.2.2
|
||||||
|
|
||||||
// What features are enabled in this build
|
// What features are enabled in this build
|
||||||
parameter bit ENB_VERIFY_SECP256K1_SIG = 1;
|
parameter bit ENB_VERIFY_SECP256K1_SIG = 1;
|
||||||
|
|
Loading…
Reference in New Issue