diff --git a/ip_cores/blake2b/src/rtl/blake2b_pipe_top.sv b/ip_cores/blake2b/src/rtl/blake2b_pipe_top.sv index 47b6af6..bde1404 100644 --- a/ip_cores/blake2b/src/rtl/blake2b_pipe_top.sv +++ b/ip_cores/blake2b/src/rtl/blake2b_pipe_top.sv @@ -10,7 +10,7 @@ Does not support using keys. Futher optimization to save area is fixing part of input message constant for - all hashes (just have nonce as input that changes and place this in i_block.ctl). + all hashes (just have nonce as input that changes and place this in i_block.ctl), as well as the message input length. Copyright (C) 2019 Benjamin Devlin and Zcash Foundation @@ -32,9 +32,10 @@ module blake2b_pipe_top import blake2b_pkg::*; #( // Do we fully unroll the pipeline (lot of resources) or just un-roll one pass - parameter FULLY_UNROLL = 0, + parameter FULLY_UNROLL, // If we fully unfold the pipeline, the message byte length is hard-coded - parameter MSG_LEN = 3, + parameter MSG_LEN, + parameter MSG_VAR_BYTS = MSG_LEN, // Setting this != MSG_LEN will assume only those bytes are changing when fully unrolled parameter CTL_BITS = 8 ) ( @@ -49,12 +50,14 @@ module blake2b_pipe_top localparam NUM_ROUNDS = 12; -localparam NUM_PASSES = 1 + MSG_LEN/128; +localparam NUM_PASSES = 1 + (MSG_LEN - 1)/128; localparam NUM_PIPE = 2 + NUM_PASSES*(NUM_ROUNDS*2) + 2*NUM_PASSES - 1; logic [NUM_PIPE-1:0][15:0][63:0] v; logic [NUM_PIPE-1:0][7:0][63:0] h; logic [NUM_PIPE-1:0][15:0][63:0] msg; +logic [MSG_LEN*8-1:0] msg_fixed; +logic [7:0] byte_len; logic [NUM_PIPE-1:0][CTL_BITS-1:0] ctl; logic [NUM_PIPE-1:0] eop_l, sop_l, valid; @@ -85,12 +88,18 @@ generate h[0] <= 0; valid[0] <= 0; valid[1] <= 0; + msg_fixed <= 0; + byte_len <= 0; end else begin if (i_block.rdy) begin - // First stage + // First stage - depends if we are fully unrolling or not as where input comes from h[0] <= i_parameters ^ blake2b_pkg::IV; v[0] <= 0; msg[0] <= i_block.dat; + if (i_block.val) begin + msg_fixed <= i_block.dat; + byte_len <= i_byte_len; + end ctl[0] <= i_block.ctl; valid[0] <= i_block.val; end @@ -109,7 +118,7 @@ generate for (g0 = 0; g0 < NUM_PASSES; g0++) begin: GEN_PASS localparam LAST_BLOCK = (g0 == NUM_PASSES -1); - localparam SR_MSG_BYTS = LAST_BLOCK ? MSG_LEN % 128 : 128; + localparam SR_MSG_BYTS = (LAST_BLOCK && NUM_PASSES > 1) ? (MSG_LEN % 128) : 128; localparam PIPE_G0 = 2 + NUM_ROUNDS*2 + g0*(NUM_ROUNDS*2 + 2); // Each pass after 0 has a shift register for storing that part of the message @@ -148,7 +157,7 @@ generate // Need to pull msg and ctl from the shift register if we have more than one pass // and we fully unrolled. Otherwise next input will be on input. Assert the control // matches. - msg_out.rdy <= 1; + msg_out.rdy <= valid[PIPE_G0-1]; if (g0 > 0) begin msg[PIPE_G0+1] <= msg_out.dat; ctl[PIPE_G0+1] <= msg_out.ctl; @@ -162,26 +171,21 @@ generate if (g0 > 0 && FULLY_UNROLL != 0) begin: GEN_MSG_FIFO - always_ff @ (posedge i_clk) begin - if (msg_in.val && msg_in.rdy) begin - msg_in.dat <= i_block.dat[128*8*g0 :+ 128*8]; - msg_in.sop <= 0; - msg_in.eop <= 0; - msg_in.err <= 0; - msg_in.ctl <= i_block.ctl; - msg_in.mod <= LAST_BLOCK ? i_block.mod : 0; - end - end - always_comb begin if (g0 == 0) i_block.rdy = msg_in.rdy; msg_in.val = i_block.val; + msg_in.dat = i_block.dat[128*8*g0 :+ 128*8]; + msg_in.sop = 0; + msg_in.eop = 0; + msg_in.err = 0; + msg_in.ctl = i_block.ctl; + msg_in.mod = LAST_BLOCK ? i_block.mod : 0; end axi_stream_fifo #( - .A_BITS ( $clog2(NUM_ROUNDS + 2) ), - .DAT_BITS ( 128*8 ), - .CTL_BITS ( CTL_BITS ) + .DEPTH ( NUM_ROUNDS + 2 ), + .DAT_BITS ( 128*8 ), + .CTL_BITS ( CTL_BITS ) ) message_fifo ( .i_clk ( i_clk ), @@ -209,14 +213,26 @@ generate always_ff @(posedge i_clk) begin if (o_hash.rdy) begin msg[PIPE_G2] <= msg[PIPE_G2-1]; - //if (PIPE_G2 != PIPE_G0) h[PIPE_G2] <= h[PIPE_G2-1]; - ctl[PIPE_G2] <= ctl[PIPE_G2-1]; // TODO could remove? + ctl[PIPE_G2] <= ctl[PIPE_G2-1]; end end for (g3 = 0; g3 < 4; g3++) begin: GEN_G_FUNC_COL_DIAG + logic [63:0] msg0, msg1; + logic [16*64-1:0] msg_; + always_comb begin + msg_ = msg[PIPE_G2-1]; + if (FULLY_UNROLL == 1) + for (int i = MSG_VAR_BYTS; i < 16*64; i++) + msg_[i*8 +: 8] = msg_fixed[i*8 +: 8]; + for (int i = 0; i < 8; i ++) begin + msg0 = msg_[64*blake2b_pkg::SIGMA[16*(g1%10) + g2*8 + g3*2] +: 64]; + msg1 = msg_[64*blake2b_pkg::SIGMA[16*(g1%10) + g2*8 + g3*2 + 1] +: 64]; + end + end + blake2b_g #( .PIPELINES(1) ) blake2b_g ( @@ -225,8 +241,8 @@ generate .i_b(g2 == 0 ? v[PIPE_G2-1][blake2b_pkg::G_MAPPING[(g3*4 + 1)]] : v[PIPE_G2-1][blake2b_pkg::G_MAPPING[16 + (g3*4 + 1)]]), .i_c(g2 == 0 ? v[PIPE_G2-1][blake2b_pkg::G_MAPPING[(g3*4 + 2)]] : v[PIPE_G2-1][blake2b_pkg::G_MAPPING[16 + (g3*4 + 2)]]), .i_d(g2 == 0 ? v[PIPE_G2-1][blake2b_pkg::G_MAPPING[(g3*4 + 3)]] : v[PIPE_G2-1][blake2b_pkg::G_MAPPING[16 + (g3*4 + 3)]]), - .i_m0(msg[PIPE_G2-1][blake2b_pkg::SIGMA[16*(g1%10) + g2*8 + g3*2]]), - .i_m1(msg[PIPE_G2-1][blake2b_pkg::SIGMA[16*(g1%10) + g2*8 + g3*2 + 1]]), + .i_m0(msg0), + .i_m1(msg1), .o_a(v[PIPE_G2][g2 == 0 ? blake2b_pkg::G_MAPPING[g3*4 + 0] : blake2b_pkg::G_MAPPING[16 + g3*4 + 0]]), .o_b(v[PIPE_G2][g2 == 0 ? blake2b_pkg::G_MAPPING[g3*4 + 1] : blake2b_pkg::G_MAPPING[16 + g3*4 + 1]]), .o_c(v[PIPE_G2][g2 == 0 ? blake2b_pkg::G_MAPPING[g3*4 + 2] : blake2b_pkg::G_MAPPING[16 + g3*4 + 2]]), @@ -246,7 +262,7 @@ begin case (i) inside 0,1,2,3,4,5,6,7: v[j][i] <= h[j-1][i]; 8,9,10,11: v[j][i] <= blake2b_pkg::IV[i%8]; - 12: v[j][i] <= blake2b_pkg::IV[i%8] ^ (last_block ? (MSG_LEN % 128) : j*128); + 12: v[j][i] <= blake2b_pkg::IV[i%8] ^ (last_block ? byte_len : j*128); 13: v[j][i] <= blake2b_pkg::IV[i%8] ^ j*128 >> 64; 14: v[j][i] <= blake2b_pkg::IV[i%8] ^ {64{last_block}}; 15: v[j][i] <= blake2b_pkg::IV[i%8]; diff --git a/ip_cores/blake2b/src/tb/blake2b_top_tb.sv b/ip_cores/blake2b/src/tb/blake2b_top_tb.sv index c768b60..21be188 100644 --- a/ip_cores/blake2b/src/tb/blake2b_top_tb.sv +++ b/ip_cores/blake2b/src/tb/blake2b_top_tb.sv @@ -20,7 +20,8 @@ module blake2b_top_tb(); parameter USE_BLAKE2B_PIPE = 1; // This instantiates the pipelined version instead -parameter USE_BLAKE2B_PIPE_MSG_LEN = 3; +parameter USE_BLAKE2B_PIPE_MSG_LEN = 140; +parameter MSG_VAR_BYTS = USE_BLAKE2B_PIPE_MSG_LEN; import blake2b_pkg::*; import common_pkg::*; @@ -55,7 +56,9 @@ generate if ( USE_BLAKE2B_PIPE == 0 ) begin: DUT_GEN ); end else begin blake2b_pipe_top #( - .MSG_LEN ( 3 ), + .MSG_LEN ( USE_BLAKE2B_PIPE_MSG_LEN ), + .MSG_VAR_BYTS ( MSG_VAR_BYTS ), + .FULLY_UNROLL ( 1 ), .CTL_BITS ( 8 ) ) DUT ( @@ -114,6 +117,21 @@ begin end endtask +// This is a test for hashing random string of 127 bytes +task test_127_bytes(); +begin + integer signed get_len; + logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat; + $display("Running test_127_bytes..."); + expected = 'h14aee933634b9fa905fcf52aa64de25a8d9216e3bbb740f09d7b6d4dac498661c50e0cd1eb7e968bfe57f7107cd038e47777c2404229a6413067a008b36cc8da; + i_byte_len = 127; + i_block.put_stream("34h1im4zJ7w4rLLGGARc4FM3UT5JKPNkiLS4ojxRroYjvdzIApWsdVtEP2kzHMc7CKqbWRxOdkLxAb8XnWGHgwU5kmyDQqMvYOFrXf7rVaEXCU3IlZITlJ03sjjI0Jc", i_byte_len); + out_hash.get_stream(get_dat, get_len); + common_pkg::compare_and_print(get_dat, expected); + $display("test_127_bytes PASSED"); +end +endtask + // Main testbench calls initial begin i_block.reset_source(); @@ -122,9 +140,10 @@ initial begin parameters = {32'd0, 8'd1, 8'd1, 8'd0, 8'd64}; #200ns; - rfc_test(); - // test_128_bytes(); - // test_140_bytes(); + //rfc_test(); + //test_127_bytes(); + //test_128_bytes(); + test_140_bytes(); #10us $finish(); diff --git a/ip_cores/fifo/src/rtl/axi_stream_fifo.sv b/ip_cores/fifo/src/rtl/axi_stream_fifo.sv index efeccdd..915fa0c 100644 --- a/ip_cores/fifo/src/rtl/axi_stream_fifo.sv +++ b/ip_cores/fifo/src/rtl/axi_stream_fifo.sv @@ -20,7 +20,7 @@ */ module axi_stream_fifo #( - parameter A_BITS, + parameter DEPTH, parameter DAT_BITS, parameter CTL_BITS ) ( @@ -31,10 +31,10 @@ module axi_stream_fifo #( localparam MOD_BITS = $clog2(DAT_BITS/8); -logic [$clog2(A_BITS):0] rd_ptr, wr_ptr; +logic [$clog2(DEPTH):0] rd_ptr, wr_ptr; logic empty, full; -logic [A_BITS-1:0][DAT_BITS + CTL_BITS + MOD_BITS + 3 -1:0] ram; +logic [DEPTH-1:0][DAT_BITS + CTL_BITS + MOD_BITS + 3 -1:0] ram; // Control for full and empty, and assigning outputs from the ram always_comb begin @@ -46,13 +46,14 @@ always_comb begin o_axi.mod = ram[rd_ptr][CTL_BITS+DAT_BITS +: MOD_BITS]; o_axi.sop = ram[rd_ptr][CTL_BITS+DAT_BITS+MOD_BITS +: 1]; o_axi.eop = ram[rd_ptr][CTL_BITS+DAT_BITS+MOD_BITS+1 +: 1]; + o_axi.err = ram[rd_ptr][CTL_BITS+DAT_BITS+MOD_BITS+2 +: 1]; o_axi.val = ~empty; end // Logic for writing and reading from ram without reset always_ff @ (posedge i_clk) begin if (i_axi.val && i_axi.rdy) begin - ram [wr_ptr] <= {i_axi.eop, i_axi.sop, i_axi.mod, i_axi.ctl, i_axi.dat}; + ram [wr_ptr] <= {i_axi.err, i_axi.eop, i_axi.sop, i_axi.mod, i_axi.ctl, i_axi.dat}; end end