Update to blake2 core to work correctly with messages larger than 128

bytes, added testbench for checking this.
This commit is contained in:
bsdevlin 2019-02-13 15:23:22 -05:00
parent d391cbc992
commit 1992538306
3 changed files with 58 additions and 49 deletions

View File

@ -14,37 +14,37 @@ package blake2_pkg;
// Sigma permutations used for G function blocks and input messages
parameter [16*10-1:0][31:0] SIGMA = {
0, 13, 12, 3, 14, 9, 11, 15, 5, 1, 6, 7, 4, 8, 2, 10,
5, 10, 4, 1, 7, 13, 2, 12, 8, 0, 3, 11, 9, 14, 15, 6,
10, 2, 6, 8, 4, 15, 0, 5, 9, 3, 1, 12, 14, 7, 11, 13,
11, 8, 2, 9, 3, 6, 7, 0, 10, 4, 13, 14, 15, 1, 5, 12,
9, 1, 14, 15, 5, 7, 13, 4, 3, 8, 11, 0, 10, 6, 12, 2,
13, 3, 8, 6, 12, 11, 1, 14, 15, 10, 4, 2, 7, 5, 0, 9,
8, 15, 0, 4, 10, 5, 6, 2, 14, 11, 12, 13, 1, 3, 9, 7,
4, 9, 1, 7, 6, 3, 14, 10, 13, 15, 2, 5, 0, 12, 8, 11,
3, 5, 7, 11, 2, 0, 12, 1, 6, 13, 15, 9, 8, 4, 10, 14,
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
32'd0, 32'd13, 32'd12, 32'd3, 32'd14, 32'd9, 32'd11, 32'd15, 32'd5, 32'd1, 32'd6, 32'd7, 32'd4, 32'd8, 32'd2, 32'd10,
32'd5, 32'd10, 32'd4, 32'd1, 32'd7, 32'd13, 32'd2, 32'd12, 32'd8, 32'd0, 32'd3, 32'd11, 32'd9, 32'd14, 32'd15, 32'd6,
32'd10, 32'd2, 32'd6, 32'd8, 32'd4, 32'd15, 32'd0, 32'd5, 32'd9, 32'd3, 32'd1, 32'd12, 32'd14, 32'd7, 32'd11, 32'd13,
32'd11, 32'd8, 32'd2, 32'd9, 32'd3, 32'd6, 32'd7, 32'd0, 32'd10, 32'd4, 32'd13, 32'd14, 32'd15, 32'd1, 32'd5, 32'd12,
32'd9, 32'd1, 32'd14, 32'd15, 32'd5, 32'd7, 32'd13, 32'd4, 32'd3, 32'd8, 32'd11, 32'd0, 32'd10, 32'd6, 32'd12, 32'd2,
32'd13, 32'd3, 32'd8, 32'd6, 32'd12, 32'd11, 32'd1, 32'd14, 32'd15, 32'd10, 32'd4, 32'd2, 32'd7, 32'd5, 32'd0, 32'd9,
32'd8, 32'd15, 32'd0, 32'd4, 32'd10, 32'd5, 32'd6, 32'd2, 32'd14, 32'd11, 32'd12, 32'd13, 32'd1, 32'd3, 32'd9, 32'd7,
32'd4, 32'd9, 32'd1, 32'd7, 32'd6, 32'd3, 32'd14, 32'd10, 32'd13, 32'd15, 32'd2, 32'd5, 32'd0, 32'd12, 32'd8, 32'd11,
32'd3, 32'd5, 32'd7, 32'd11, 32'd2, 32'd0, 32'd12, 32'd1, 32'd6, 32'd13, 32'd15, 32'd9, 32'd8, 32'd4, 32'd10, 32'd14,
32'd15, 32'd14, 32'd13, 32'd12, 32'd11, 32'd10, 32'd9, 32'd8, 32'd7, 32'd6, 32'd5, 32'd4, 32'd3, 32'd2, 32'd1, 32'd0
};
// Mapping for each G function block to the state vector v
parameter [4*8-1:0][31:0] G_MAPPING = {
14, 9, 4, 3,
13, 8, 7, 2,
12, 11, 6, 1,
15, 10, 5, 0,
15, 11, 7, 3,
14, 10, 6, 2,
13, 9, 5, 1,
12, 8, 4, 0
32'd14, 32'd9, 32'd4, 32'd3,
32'd13, 32'd8, 32'd7, 32'd2,
32'd12, 32'd11, 32'd6, 32'd1,
32'd15, 32'd10, 32'd5, 32'd0,
32'd15, 32'd11, 32'd7, 32'd3,
32'd14, 32'd10, 32'd6, 32'd2,
32'd13, 32'd9, 32'd5, 32'd1,
32'd12, 32'd8, 32'd4, 32'd0
};
// This is so we can get the correct mapping back from the diagonal
// operation
parameter [4*4-1:0][31:0] G_MAPPING_DIAG = {
3, 15, 11,7,
6, 2, 14, 10,
9, 5, 1, 13,
12, 8 , 4, 0
32'd3, 32'd15, 32'd11, 32'd7,
32'd6, 32'd2, 32'd14, 32'd10,
32'd9, 32'd5, 32'd1, 32'd13,
32'd12, 32'd8 , 32'd4, 32'd0
};
endpackage

View File

@ -1,6 +1,6 @@
/* Implemented from RFC-7693, The BLAKE2 Cryptographic Hash and Message Authentication Code (MAC)
* Personalization string in the input parameter should be "ZcashPoW" followed by n and k in
* little endian order.
* Parameters are passed in as an input. Inputs and outputs are AXI stream and respect flow control.
* Only only hash is computed at a time, and takes 26 clocks * number of 128 Byte message blocks.
*/
module blake2_top
@ -24,32 +24,33 @@ localparam ROUNDS = 12;
logic [7:0][63:0] h, h_tmp; // The state vector
logic [15:0][63:0] v, v_tmp; // The local work vector and its intermediate value
logic [31:0][63:0] g_out;//, g_out_r; // Outputs of the G mixing function - use 8 here to save on timing
logic [127:0] t; // Counter - TODO make this smaller - related to param
logic [31:0][63:0] g_out; // Outputs of the G mixing function - use 8 here to save on timing
logic [127:0] t; // Counter
logic [$clog2(ROUNDS)-1:0] round_cntr, round_cntr_msg, round_cntr_fin;
logic g_col;
logic [15:0][63:0] block, block_r; // The message block registered and converted to a 2d array
logic block_eop_l; // Use to latch if this is the final block
logic h_xor_done;
logic [7:0] byte_len_l;
// Pipelining logic that has no reset
always_ff @(posedge i_clk) begin
//g_out_r <= g_out;
if (blake2_state == STATE_IDLE && ~i_block.rdy)
block_r <= 0;
if (blake2_state == STATE_IDLE) begin
block_r <= 0;
if (i_block.val && i_block.rdy) begin
block_r <= i_block.dat;
end
if (i_block.val && i_block.rdy) begin
block_r <= i_block.dat;
end
for (int i = 0; i < 16; i++)
if (g_col == 0/* && blake2_state == STATE_ROUNDS*/) // TODO why do I need this qualifier
if (g_col == 0)
v_tmp[i] <= g_out[blake2_pkg::G_MAPPING[i]];
for (int i = 0; i < 8; i++)
if (blake2_state == STATE_ROUNDS)
h_tmp[i] <= g_out[16 + blake2_pkg::G_MAPPING_DIAG[i]] ^ g_out[16 + blake2_pkg::G_MAPPING_DIAG[i+8]]; //TODO fix
h_tmp[i] <= g_out[16 + blake2_pkg::G_MAPPING_DIAG[i]] ^ g_out[16 + blake2_pkg::G_MAPPING_DIAG[i+8]];
end
@ -71,14 +72,16 @@ always_ff @(posedge i_clk) begin
o_hash.reset_source();
round_cntr_fin <= 0;
block_eop_l <= 0;
h_xor_done <= 0;
byte_len_l <= 0;
end else begin
if (blake2_state != STATE_NEXT_BLOCK) g_col <= ~g_col;
g_col <= ~g_col;
case (blake2_state)
STATE_IDLE: begin
h <= i_parameters ^ blake2_pkg::IV;
t <= 128;
t <= 2;
i_block.rdy <= 1;
v <= 0;
o_hash.val <= 0;
@ -87,11 +90,12 @@ always_ff @(posedge i_clk) begin
round_cntr_msg <= 0;
round_cntr_fin <= 0;
if (i_block.rdy && i_block.val && i_block.sop) begin
init_local_work_vector(i_byte_len, i_block.eop);
init_local_work_vector(i_block.eop ? i_byte_len : 128, i_block.eop);
blake2_state <= STATE_ROUNDS;
g_col <= 0;
i_block.rdy <= 0;
block_eop_l <= i_block.eop;
byte_len_l <= i_byte_len;
end
end
// Here we do the compression over 12 rounds, each round can be done in two clock cycles
@ -116,7 +120,6 @@ always_ff @(posedge i_clk) begin
blake2_state <= STATE_FINAL_BLOCK;
else begin
blake2_state <= STATE_NEXT_BLOCK;
i_block.rdy <= 1;
end
end
end
@ -124,16 +127,22 @@ always_ff @(posedge i_clk) begin
round_cntr <= 0;
round_cntr_msg <= 0;
round_cntr_fin <= 0;
h_xor_done <= 1;
i_block.rdy <= 1;
if (~h_xor_done)
for (int i = 0; i < 8; i++)
h[i] <= h[i] ^ h_tmp[i];
if (i_block.rdy && i_block.val) begin
init_local_work_vector(t, i_block.eop); //TODO this wont work with h_tmp
init_local_work_vector(i_block.eop ? byte_len_l : t*128, i_block.eop);
block_eop_l <= i_block.eop;
t <= t + 128;
h <= h ^ h_tmp;
t <= t + 1;
blake2_state <= STATE_ROUNDS;
h_xor_done <= 0;
i_block.rdy <= 0;
g_col <= 0;
end
end
STATE_FINAL_BLOCK: begin
t <= 128;
round_cntr <= 0;
round_cntr_fin <= 0;
round_cntr_msg <= 0;
@ -162,7 +171,7 @@ generate begin
// For each G function we want to pipeline the input message to help timing
logic [63:0] m0, m1;
always_ff @ (posedge i_clk) begin
if(blake2_state == STATE_IDLE) begin
if(blake2_state == STATE_IDLE || blake2_state == STATE_NEXT_BLOCK) begin
m0 <= block[blake2_pkg::SIGMA[gv_g*2]];
m1 <= block[blake2_pkg::SIGMA[gv_g*2 + 1]];
end else begin

View File

@ -37,9 +37,9 @@ task rfc_test();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat;
expected = 'h560c602c9cda1e198190f58e6341131f127367051c64f7df7d343e1b4c32a8bbc0eac1bcae463807dca442ae77d5150df700f6a640949a52cd4341dfc1e1044b;
expected = 'h239900d4ed8623b95a92f1dba88ad31895cc3345ded552c22d79ab2a39c5877dd1a2ffdb6fbb124bb7c45a68142f214ce9f6129fb697276a0d4d1c983fa580ba;
i_byte_len = 3;
i_block.put_stream("hSV", i_byte_len);
i_block.put_stream("cba", i_byte_len);
out_hash.get_stream(get_dat, get_len);
common_pkg::compare_and_print(get_dat, expected);
$display("rfc_test PASSED");
@ -65,9 +65,9 @@ task test_140_bytes();
begin
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat;
expected = 'h429b65332e3b6701a29664f98c247204858479f55a8c18cc9b0ffa321cda4288fd420a5d47d134949f3b858bff7a696a00d91a07c92055cdd597971cf573281c;
expected = 'h2012a869a3b89a69ffc954f6855c7f61a61190553dc487171ec3fe944d04c83cd4c842fff5a8258d5e14b05b7b6f30e8ddcb754d719137ec42fb5cdb562f8c89;
i_byte_len = 140;
i_block.put_stream("6RehRZqUdYD2SB3N35QlQhreiU2XEaSgIGUsreLqV49l8Z5r93FbP567Juqc1IUaVyJKv8qFmtQwXYvZdnrMacAs5H9hBhs5JxAfyDibIM3TjKyiVzXC8lfCqiN1j6fW8FSJY131mVpw", i_byte_len);
i_block.put_stream("YbEAEzgJ1tgC3t6vDaJFqlWp1PaL482f7iZZzRj3xXpY2PPupwdTKAaBzB6KuN6j0alaoaFQfNboDbkNv5KDs5d7zN9JssrtOjGJdrVLfvb7uAdnVYoIgIv2zbXUQIPpwWdzEzj1CzX5", i_byte_len);
out_hash.get_stream(get_dat, get_len);
common_pkg::compare_and_print(get_dat, expected);
$display("test_140_bytes PASSED");
@ -83,8 +83,8 @@ initial begin
#200ns;
rfc_test();
//test_128_bytes();
//test_140_bytes();
test_128_bytes();
test_140_bytes();
#10us $finish();