Updates to FIFO and bug fix for unrolled pipe version of Blake2b

This commit is contained in:
bsdevlin 2019-02-19 19:28:13 -05:00
parent 05f5913fce
commit 523c1c7388
3 changed files with 71 additions and 35 deletions

View File

@ -10,7 +10,7 @@
Does not support using keys.
Futher optimization to save area is fixing part of input message constant for
all hashes (just have nonce as input that changes and place this in i_block.ctl).
all hashes (just have nonce as input that changes and place this in i_block.ctl), as well as the message input length.
Copyright (C) 2019 Benjamin Devlin and Zcash Foundation
@ -32,9 +32,10 @@ module blake2b_pipe_top
import blake2b_pkg::*;
// Do we fully unroll the pipeline (lot of resources) or just un-roll one pass
parameter FULLY_UNROLL = 0,
parameter FULLY_UNROLL,
// If we fully unfold the pipeline, the message byte length is hard-coded
parameter MSG_LEN = 3,
parameter MSG_LEN,
parameter MSG_VAR_BYTS = MSG_LEN, // Setting this != MSG_LEN will assume only those bytes are changing when fully unrolled
parameter CTL_BITS = 8
@ -49,12 +50,14 @@ module blake2b_pipe_top
localparam NUM_ROUNDS = 12;
localparam NUM_PASSES = 1 + MSG_LEN/128;
localparam NUM_PASSES = 1 + (MSG_LEN - 1)/128;
localparam NUM_PIPE = 2 + NUM_PASSES*(NUM_ROUNDS*2) + 2*NUM_PASSES - 1;
logic [NUM_PIPE-1:0][15:0][63:0] v;
logic [NUM_PIPE-1:0][7:0][63:0] h;
logic [NUM_PIPE-1:0][15:0][63:0] msg;
logic [MSG_LEN*8-1:0] msg_fixed;
logic [7:0] byte_len;
logic [NUM_PIPE-1:0][CTL_BITS-1:0] ctl;
logic [NUM_PIPE-1:0] eop_l, sop_l, valid;
@ -85,12 +88,18 @@ generate
h[0] <= 0;
valid[0] <= 0;
valid[1] <= 0;
msg_fixed <= 0;
byte_len <= 0;
end else begin
if (i_block.rdy) begin
// First stage
// First stage - depends if we are fully unrolling or not as where input comes from
h[0] <= i_parameters ^ blake2b_pkg::IV;
v[0] <= 0;
msg[0] <= i_block.dat;
if (i_block.val) begin
msg_fixed <= i_block.dat;
byte_len <= i_byte_len;
ctl[0] <= i_block.ctl;
valid[0] <= i_block.val;
@ -109,7 +118,7 @@ generate
for (g0 = 0; g0 < NUM_PASSES; g0++) begin: GEN_PASS
localparam LAST_BLOCK = (g0 == NUM_PASSES -1);
localparam SR_MSG_BYTS = LAST_BLOCK ? MSG_LEN % 128 : 128;
localparam SR_MSG_BYTS = (LAST_BLOCK && NUM_PASSES > 1) ? (MSG_LEN % 128) : 128;
localparam PIPE_G0 = 2 + NUM_ROUNDS*2 + g0*(NUM_ROUNDS*2 + 2);
// Each pass after 0 has a shift register for storing that part of the message
@ -148,7 +157,7 @@ generate
// Need to pull msg and ctl from the shift register if we have more than one pass
// and we fully unrolled. Otherwise next input will be on input. Assert the control
// matches.
msg_out.rdy <= 1;
msg_out.rdy <= valid[PIPE_G0-1];
if (g0 > 0) begin
msg[PIPE_G0+1] <= msg_out.dat;
ctl[PIPE_G0+1] <= msg_out.ctl;
@ -162,24 +171,19 @@ generate
if (g0 > 0 && FULLY_UNROLL != 0) begin: GEN_MSG_FIFO
always_ff @ (posedge i_clk) begin
if (msg_in.val && msg_in.rdy) begin
msg_in.dat <= i_block.dat[128*8*g0 :+ 128*8];
msg_in.sop <= 0;
msg_in.eop <= 0;
msg_in.err <= 0;
msg_in.ctl <= i_block.ctl;
msg_in.mod <= LAST_BLOCK ? i_block.mod : 0;
always_comb begin
if (g0 == 0) i_block.rdy = msg_in.rdy;
msg_in.val = i_block.val;
msg_in.dat = i_block.dat[128*8*g0 :+ 128*8];
msg_in.sop = 0;
msg_in.eop = 0;
msg_in.err = 0;
msg_in.ctl = i_block.ctl;
msg_in.mod = LAST_BLOCK ? i_block.mod : 0;
axi_stream_fifo #(
.A_BITS ( $clog2(NUM_ROUNDS + 2) ),
.DAT_BITS ( 128*8 ),
@ -209,14 +213,26 @@ generate
always_ff @(posedge i_clk) begin
if (o_hash.rdy) begin
msg[PIPE_G2] <= msg[PIPE_G2-1];
//if (PIPE_G2 != PIPE_G0)
h[PIPE_G2] <= h[PIPE_G2-1];
ctl[PIPE_G2] <= ctl[PIPE_G2-1]; // TODO could remove?
ctl[PIPE_G2] <= ctl[PIPE_G2-1];
for (g3 = 0; g3 < 4; g3++) begin: GEN_G_FUNC_COL_DIAG
logic [63:0] msg0, msg1;
logic [16*64-1:0] msg_;
always_comb begin
msg_ = msg[PIPE_G2-1];
if (FULLY_UNROLL == 1)
for (int i = MSG_VAR_BYTS; i < 16*64; i++)
msg_[i*8 +: 8] = msg_fixed[i*8 +: 8];
for (int i = 0; i < 8; i ++) begin
msg0 = msg_[64*blake2b_pkg::SIGMA[16*(g1%10) + g2*8 + g3*2] +: 64];
msg1 = msg_[64*blake2b_pkg::SIGMA[16*(g1%10) + g2*8 + g3*2 + 1] +: 64];
#( .PIPELINES(1) )
blake2b_g (
@ -225,8 +241,8 @@ generate
.i_b(g2 == 0 ? v[PIPE_G2-1][blake2b_pkg::G_MAPPING[(g3*4 + 1)]] : v[PIPE_G2-1][blake2b_pkg::G_MAPPING[16 + (g3*4 + 1)]]),
.i_c(g2 == 0 ? v[PIPE_G2-1][blake2b_pkg::G_MAPPING[(g3*4 + 2)]] : v[PIPE_G2-1][blake2b_pkg::G_MAPPING[16 + (g3*4 + 2)]]),
.i_d(g2 == 0 ? v[PIPE_G2-1][blake2b_pkg::G_MAPPING[(g3*4 + 3)]] : v[PIPE_G2-1][blake2b_pkg::G_MAPPING[16 + (g3*4 + 3)]]),
.i_m0(msg[PIPE_G2-1][blake2b_pkg::SIGMA[16*(g1%10) + g2*8 + g3*2]]),
.i_m1(msg[PIPE_G2-1][blake2b_pkg::SIGMA[16*(g1%10) + g2*8 + g3*2 + 1]]),
.o_a(v[PIPE_G2][g2 == 0 ? blake2b_pkg::G_MAPPING[g3*4 + 0] : blake2b_pkg::G_MAPPING[16 + g3*4 + 0]]),
.o_b(v[PIPE_G2][g2 == 0 ? blake2b_pkg::G_MAPPING[g3*4 + 1] : blake2b_pkg::G_MAPPING[16 + g3*4 + 1]]),
.o_c(v[PIPE_G2][g2 == 0 ? blake2b_pkg::G_MAPPING[g3*4 + 2] : blake2b_pkg::G_MAPPING[16 + g3*4 + 2]]),
@ -246,7 +262,7 @@ begin
case (i) inside
0,1,2,3,4,5,6,7: v[j][i] <= h[j-1][i];
8,9,10,11: v[j][i] <= blake2b_pkg::IV[i%8];
12: v[j][i] <= blake2b_pkg::IV[i%8] ^ (last_block ? (MSG_LEN % 128) : j*128);
12: v[j][i] <= blake2b_pkg::IV[i%8] ^ (last_block ? byte_len : j*128);
13: v[j][i] <= blake2b_pkg::IV[i%8] ^ j*128 >> 64;
14: v[j][i] <= blake2b_pkg::IV[i%8] ^ {64{last_block}};
15: v[j][i] <= blake2b_pkg::IV[i%8];

View File

@ -20,7 +20,8 @@
module blake2b_top_tb();
parameter USE_BLAKE2B_PIPE = 1; // This instantiates the pipelined version instead
parameter USE_BLAKE2B_PIPE_MSG_LEN = 3;
parameter USE_BLAKE2B_PIPE_MSG_LEN = 140;
import blake2b_pkg::*;
import common_pkg::*;
@ -55,7 +56,9 @@ generate if ( USE_BLAKE2B_PIPE == 0 ) begin: DUT_GEN
end else begin
blake2b_pipe_top #(
.MSG_LEN ( 3 ),
.CTL_BITS ( 8 )
@ -114,6 +117,21 @@ begin
// This is a test for hashing random string of 127 bytes
task test_127_bytes();
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat;
$display("Running test_127_bytes...");
expected = 'h14aee933634b9fa905fcf52aa64de25a8d9216e3bbb740f09d7b6d4dac498661c50e0cd1eb7e968bfe57f7107cd038e47777c2404229a6413067a008b36cc8da;
i_byte_len = 127;
i_block.put_stream("34h1im4zJ7w4rLLGGARc4FM3UT5JKPNkiLS4ojxRroYjvdzIApWsdVtEP2kzHMc7CKqbWRxOdkLxAb8XnWGHgwU5kmyDQqMvYOFrXf7rVaEXCU3IlZITlJ03sjjI0Jc", i_byte_len);
out_hash.get_stream(get_dat, get_len);
common_pkg::compare_and_print(get_dat, expected);
$display("test_127_bytes PASSED");
// Main testbench calls
initial begin
@ -122,9 +140,10 @@ initial begin
parameters = {32'd0, 8'd1, 8'd1, 8'd0, 8'd64};
// test_140_bytes();
#10us $finish();

View File

@ -20,7 +20,7 @@
module axi_stream_fifo #(
parameter A_BITS,
parameter DEPTH,
parameter DAT_BITS,
parameter CTL_BITS
) (
@ -31,10 +31,10 @@ module axi_stream_fifo #(
localparam MOD_BITS = $clog2(DAT_BITS/8);
logic [$clog2(A_BITS):0] rd_ptr, wr_ptr;
logic [$clog2(DEPTH):0] rd_ptr, wr_ptr;
logic empty, full;
logic [A_BITS-1:0][DAT_BITS + CTL_BITS + MOD_BITS + 3 -1:0] ram;
logic [DEPTH-1:0][DAT_BITS + CTL_BITS + MOD_BITS + 3 -1:0] ram;
// Control for full and empty, and assigning outputs from the ram
always_comb begin
@ -46,13 +46,14 @@ always_comb begin
o_axi.mod = ram[rd_ptr][CTL_BITS+DAT_BITS +: MOD_BITS];
o_axi.sop = ram[rd_ptr][CTL_BITS+DAT_BITS+MOD_BITS +: 1];
o_axi.eop = ram[rd_ptr][CTL_BITS+DAT_BITS+MOD_BITS+1 +: 1];
o_axi.err = ram[rd_ptr][CTL_BITS+DAT_BITS+MOD_BITS+2 +: 1];
o_axi.val = ~empty;
// Logic for writing and reading from ram without reset
always_ff @ (posedge i_clk) begin
if (i_axi.val && i_axi.rdy) begin
ram [wr_ptr] <= {i_axi.eop, i_axi.sop, i_axi.mod, i_axi.ctl, i_axi.dat};
ram [wr_ptr] <= {i_axi.err, i_axi.eop, i_axi.sop, i_axi.mod, i_axi.ctl, i_axi.dat};