Modify bls12-381 pairing engine .rdy signal so that it is not
combinatorial, to improve timing. Update version to 1.2.2
This commit is contained in:
parent
dbed8ccb0d
commit
3a8c799a74
14
README.md
14
README.md
|
@ -1,17 +1,21 @@
|
|||
The work in this repo is the result of a Zcash foundation grant to develop open-source FPGA code that can be used to accelerate various aspects of the network.
|
||||
**An Architecture document is [here](zcash_fpga_design_doc_v1.1.x.pdf)**.
|
||||
|
||||
While mainly developed for Equihash and the secp256k1 and bls12-381 curves, the code used in this repo can also be applied with minimum modification to other curves.
|
||||
While mainly developed for Equihash and the secp256k1 and bls12-381 curves, the code (ip_cores) used in this repo can also be applied to other curves by
|
||||
changing parameters / minimum modification to equations.
|
||||
|
||||
** Currently still a work in progress
|
||||
# Getting started
|
||||
|
||||
The architecture document has instructions for building an AWS image or simulating the top level design. The easiest way is to add all .sv and .xci files to a new Vivado project,
|
||||
and then set the top level _tb.sv file to the module you want to test.
|
||||
|
||||
# Repo folder structure
|
||||
|
||||
Each top level folder is explained below. Inside each folder is source code written in systemverilog, and most blocks have a standalone self-checking testbench.
|
||||
|
||||
## aws
|
||||
## AWS
|
||||
|
||||
This contains the top / project files for building on a AWS (Amazon FPGA VU9P w/ 64GB DDR4).
|
||||
This contains the top / project files for building on a AWS F1 instance (Amazon FPGA VU9P w/ 64GB DDR4).
|
||||
|
||||
* This contains the zcash_fpga library (aws/cl_zcash/software/runtime/zcash_fpga.hpp) that can be used to interface with the FPGA over PCIe.
|
||||
* Instructions on how to build are in the architecture document.
|
||||
|
@ -22,7 +26,7 @@ This contains the top / project files for building on the Bittware VVH board (VU
|
|||
|
||||
## ip_cores
|
||||
|
||||
These contain shared IP cores used by the projects in this repo. These include many functions, such as:
|
||||
These contain shared IP cores that are used by the projects in this repo. These include many functions, such as:
|
||||
|
||||
* Hashing
|
||||
- Blake2b - single pipe implementation of blake2b and a pipline-unrolled version for high performance (single clock hash @ 200MHz after initial 52 clock delay).
|
||||
|
|
|
@ -116,9 +116,6 @@ logic point_mul_mode, found_one;
|
|||
FE_TYPE key;
|
||||
|
||||
always_comb begin
|
||||
dbl_f12_o_if.rdy = pair_state == POINT_MULT_DBL || (f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4))); // As this is a sparse f12 using full f12_mul
|
||||
add_f12_o_if.rdy = pair_state == POINT_MULT_ADD || (f_val && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) && ((out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4))); // As this is a sparse f12 using full f12_mul
|
||||
|
||||
final_exp_fe12_o_if.dat = f[0][0][0];
|
||||
final_exp_fe12_o_if.err = 0;
|
||||
final_exp_fe12_o_if.ctl = 0;
|
||||
|
@ -153,6 +150,9 @@ always_ff @ (posedge i_clk) begin
|
|||
stage_done <= 0;
|
||||
|
||||
o_p_jb_if.reset_source();
|
||||
|
||||
dbl_f12_o_if.rdy <= 0;
|
||||
add_f12_o_if.rdy <= 0;
|
||||
|
||||
end else begin
|
||||
|
||||
|
@ -166,6 +166,9 @@ always_ff @ (posedge i_clk) begin
|
|||
f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]};
|
||||
f_val <= mul_fe12_i_if[0].eop;
|
||||
end
|
||||
|
||||
dbl_f12_o_if.rdy <= 0;
|
||||
add_f12_o_if.rdy <= 0;
|
||||
|
||||
case(pair_state)
|
||||
IDLE: begin
|
||||
|
@ -229,36 +232,41 @@ always_ff @ (posedge i_clk) begin
|
|||
end
|
||||
end
|
||||
1: begin // Multiply by double result
|
||||
if(~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) begin
|
||||
if(~dbl_f12_o_if.rdy && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy))) begin
|
||||
if ((dbl_f12_o_if.val && f_val) || (out_cnt/2 == 5)) begin
|
||||
mul_fe12_o_if[0].sop <= out_cnt == 0;
|
||||
mul_fe12_o_if[0].eop <= out_cnt == 11;
|
||||
mul_fe12_o_if[0].val <= 1;
|
||||
mul_fe12_o_if[0].val <= dbl_f12_o_if.val || (out_cnt/2 == 2) || (out_cnt/2 == 3) || (out_cnt/2 == 5);
|
||||
dbl_f12_o_if.rdy <= (out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4);
|
||||
case (out_cnt/2) inside
|
||||
0,1,4: mul_fe12_o_if[0].dat <= {dbl_f12_o_if.dat, f[0][0][0]};
|
||||
default: mul_fe12_o_if[0].dat <= {381'd0, f[0][0][0]};
|
||||
endcase
|
||||
|
||||
out_cnt <= out_cnt + 1;
|
||||
f <= {mul_fe12_i_if[0].dat, f[1], f[0][2:1], f[0][0][1]};
|
||||
mul_fe12_o_if[0].ctl <= miller_mult_cnt;
|
||||
mul_fe12_o_if[0].ctl[SQ_BIT] <= 0;
|
||||
if (out_cnt == 11) begin
|
||||
f_val <= 0;
|
||||
out_cnt <= 0;
|
||||
miller_mult_cnt <= ATE_X[ate_loop_cnt] == 0 ? 3 : 2;
|
||||
end
|
||||
end
|
||||
|
||||
mul_fe12_o_if[0].ctl <= miller_mult_cnt;
|
||||
mul_fe12_o_if[0].ctl[SQ_BIT] <= 0;
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
2: begin // Multiply by add result
|
||||
if(~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy)) begin
|
||||
if(~add_f12_o_if.rdy && (~mul_fe12_o_if[0].val || (mul_fe12_o_if[0].val && mul_fe12_o_if[0].rdy))) begin
|
||||
if ((add_f12_o_if.val && f_val) || (out_cnt/2 == 5)) begin
|
||||
g2_r_jb_i <= add_g2_o;
|
||||
mul_fe12_o_if[0].ctl <= miller_mult_cnt;
|
||||
mul_fe12_o_if[0].ctl[SQ_BIT] <= 0;
|
||||
mul_fe12_o_if[0].sop <= out_cnt == 0;
|
||||
mul_fe12_o_if[0].eop <= out_cnt == 11;
|
||||
mul_fe12_o_if[0].val <= 1;
|
||||
mul_fe12_o_if[0].val <= add_f12_o_if.val || (out_cnt/2 == 2) || (out_cnt/2 == 3) || (out_cnt/2 == 5);
|
||||
add_f12_o_if.rdy <= (out_cnt/2 == 0) || (out_cnt/2 == 1) || (out_cnt/2 == 4);
|
||||
out_cnt <= out_cnt + 1;
|
||||
case (out_cnt/2) inside
|
||||
0,1,4: mul_fe12_o_if[0].dat <= {add_f12_o_if.dat, f[0][0][0]};
|
||||
|
@ -302,6 +310,7 @@ always_ff @ (posedge i_clk) begin
|
|||
end
|
||||
end
|
||||
POINT_MULT_DBL: begin
|
||||
dbl_f12_o_if.rdy <= 1;
|
||||
if(found_one == 0) begin
|
||||
key <= key << 1;
|
||||
ate_loop_cnt <= ate_loop_cnt - 1;
|
||||
|
@ -327,6 +336,7 @@ always_ff @ (posedge i_clk) begin
|
|||
end
|
||||
end
|
||||
POINT_MULT_ADD: begin
|
||||
add_f12_o_if.rdy <= 1;
|
||||
if (~wait_add) begin
|
||||
wait_add <= 1;
|
||||
add_i_val <= 1;
|
||||
|
|
|
@ -27,7 +27,7 @@ package zcash_fpga_pkg;
|
|||
|
||||
import bls12_381_pkg::point_type_t;
|
||||
|
||||
parameter FPGA_VERSION = 32'h01_02_01; //v1.2.1
|
||||
parameter FPGA_VERSION = 32'h01_02_02; //v1.2.2
|
||||
|
||||
// What features are enabled in this build
|
||||
parameter bit ENB_VERIFY_SECP256K1_SIG = 1;
|
||||
|
|
Loading…
Reference in New Issue