Update to blake2 core to work correctly with messages larger than 128
bytes, added testbench for checking this.
This commit is contained in:
parent
d391cbc992
commit
1992538306
|
@ -14,37 +14,37 @@ package blake2_pkg;
|
|||
|
||||
// Sigma permutations used for G function blocks and input messages
|
||||
parameter [16*10-1:0][31:0] SIGMA = {
|
||||
0, 13, 12, 3, 14, 9, 11, 15, 5, 1, 6, 7, 4, 8, 2, 10,
|
||||
5, 10, 4, 1, 7, 13, 2, 12, 8, 0, 3, 11, 9, 14, 15, 6,
|
||||
10, 2, 6, 8, 4, 15, 0, 5, 9, 3, 1, 12, 14, 7, 11, 13,
|
||||
11, 8, 2, 9, 3, 6, 7, 0, 10, 4, 13, 14, 15, 1, 5, 12,
|
||||
9, 1, 14, 15, 5, 7, 13, 4, 3, 8, 11, 0, 10, 6, 12, 2,
|
||||
13, 3, 8, 6, 12, 11, 1, 14, 15, 10, 4, 2, 7, 5, 0, 9,
|
||||
8, 15, 0, 4, 10, 5, 6, 2, 14, 11, 12, 13, 1, 3, 9, 7,
|
||||
4, 9, 1, 7, 6, 3, 14, 10, 13, 15, 2, 5, 0, 12, 8, 11,
|
||||
3, 5, 7, 11, 2, 0, 12, 1, 6, 13, 15, 9, 8, 4, 10, 14,
|
||||
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
|
||||
32'd0, 32'd13, 32'd12, 32'd3, 32'd14, 32'd9, 32'd11, 32'd15, 32'd5, 32'd1, 32'd6, 32'd7, 32'd4, 32'd8, 32'd2, 32'd10,
|
||||
32'd5, 32'd10, 32'd4, 32'd1, 32'd7, 32'd13, 32'd2, 32'd12, 32'd8, 32'd0, 32'd3, 32'd11, 32'd9, 32'd14, 32'd15, 32'd6,
|
||||
32'd10, 32'd2, 32'd6, 32'd8, 32'd4, 32'd15, 32'd0, 32'd5, 32'd9, 32'd3, 32'd1, 32'd12, 32'd14, 32'd7, 32'd11, 32'd13,
|
||||
32'd11, 32'd8, 32'd2, 32'd9, 32'd3, 32'd6, 32'd7, 32'd0, 32'd10, 32'd4, 32'd13, 32'd14, 32'd15, 32'd1, 32'd5, 32'd12,
|
||||
32'd9, 32'd1, 32'd14, 32'd15, 32'd5, 32'd7, 32'd13, 32'd4, 32'd3, 32'd8, 32'd11, 32'd0, 32'd10, 32'd6, 32'd12, 32'd2,
|
||||
32'd13, 32'd3, 32'd8, 32'd6, 32'd12, 32'd11, 32'd1, 32'd14, 32'd15, 32'd10, 32'd4, 32'd2, 32'd7, 32'd5, 32'd0, 32'd9,
|
||||
32'd8, 32'd15, 32'd0, 32'd4, 32'd10, 32'd5, 32'd6, 32'd2, 32'd14, 32'd11, 32'd12, 32'd13, 32'd1, 32'd3, 32'd9, 32'd7,
|
||||
32'd4, 32'd9, 32'd1, 32'd7, 32'd6, 32'd3, 32'd14, 32'd10, 32'd13, 32'd15, 32'd2, 32'd5, 32'd0, 32'd12, 32'd8, 32'd11,
|
||||
32'd3, 32'd5, 32'd7, 32'd11, 32'd2, 32'd0, 32'd12, 32'd1, 32'd6, 32'd13, 32'd15, 32'd9, 32'd8, 32'd4, 32'd10, 32'd14,
|
||||
32'd15, 32'd14, 32'd13, 32'd12, 32'd11, 32'd10, 32'd9, 32'd8, 32'd7, 32'd6, 32'd5, 32'd4, 32'd3, 32'd2, 32'd1, 32'd0
|
||||
};
|
||||
|
||||
// Mapping for each G function block to the state vector v
|
||||
parameter [4*8-1:0][31:0] G_MAPPING = {
|
||||
14, 9, 4, 3,
|
||||
13, 8, 7, 2,
|
||||
12, 11, 6, 1,
|
||||
15, 10, 5, 0,
|
||||
15, 11, 7, 3,
|
||||
14, 10, 6, 2,
|
||||
13, 9, 5, 1,
|
||||
12, 8, 4, 0
|
||||
32'd14, 32'd9, 32'd4, 32'd3,
|
||||
32'd13, 32'd8, 32'd7, 32'd2,
|
||||
32'd12, 32'd11, 32'd6, 32'd1,
|
||||
32'd15, 32'd10, 32'd5, 32'd0,
|
||||
32'd15, 32'd11, 32'd7, 32'd3,
|
||||
32'd14, 32'd10, 32'd6, 32'd2,
|
||||
32'd13, 32'd9, 32'd5, 32'd1,
|
||||
32'd12, 32'd8, 32'd4, 32'd0
|
||||
};
|
||||
|
||||
// This is so we can get the correct mapping back from the diagonal
|
||||
// operation
|
||||
parameter [4*4-1:0][31:0] G_MAPPING_DIAG = {
|
||||
3, 15, 11,7,
|
||||
6, 2, 14, 10,
|
||||
9, 5, 1, 13,
|
||||
12, 8 , 4, 0
|
||||
32'd3, 32'd15, 32'd11, 32'd7,
|
||||
32'd6, 32'd2, 32'd14, 32'd10,
|
||||
32'd9, 32'd5, 32'd1, 32'd13,
|
||||
32'd12, 32'd8 , 32'd4, 32'd0
|
||||
};
|
||||
|
||||
endpackage
|
|
@ -1,6 +1,6 @@
|
|||
/* Implemented from RFC-7693, The BLAKE2 Cryptographic Hash and Message Authentication Code (MAC)
|
||||
* Personalization string in the input parameter should be "ZcashPoW" followed by n and k in
|
||||
* little endian order.
|
||||
* Parameters are passed in as an input. Inputs and outputs are AXI stream and respect flow control.
|
||||
* Only only hash is computed at a time, and takes 26 clocks * number of 128 Byte message blocks.
|
||||
*/
|
||||
|
||||
module blake2_top
|
||||
|
@ -24,32 +24,33 @@ localparam ROUNDS = 12;
|
|||
|
||||
logic [7:0][63:0] h, h_tmp; // The state vector
|
||||
logic [15:0][63:0] v, v_tmp; // The local work vector and its intermediate value
|
||||
logic [31:0][63:0] g_out;//, g_out_r; // Outputs of the G mixing function - use 8 here to save on timing
|
||||
logic [127:0] t; // Counter - TODO make this smaller - related to param
|
||||
logic [31:0][63:0] g_out; // Outputs of the G mixing function - use 8 here to save on timing
|
||||
logic [127:0] t; // Counter
|
||||
logic [$clog2(ROUNDS)-1:0] round_cntr, round_cntr_msg, round_cntr_fin;
|
||||
logic g_col;
|
||||
logic [15:0][63:0] block, block_r; // The message block registered and converted to a 2d array
|
||||
logic block_eop_l; // Use to latch if this is the final block
|
||||
logic h_xor_done;
|
||||
logic [7:0] byte_len_l;
|
||||
|
||||
// Pipelining logic that has no reset
|
||||
always_ff @(posedge i_clk) begin
|
||||
|
||||
//g_out_r <= g_out;
|
||||
if (blake2_state == STATE_IDLE && ~i_block.rdy)
|
||||
block_r <= 0;
|
||||
|
||||
if (blake2_state == STATE_IDLE) begin
|
||||
block_r <= 0;
|
||||
if (i_block.val && i_block.rdy) begin
|
||||
block_r <= i_block.dat;
|
||||
end
|
||||
if (i_block.val && i_block.rdy) begin
|
||||
block_r <= i_block.dat;
|
||||
end
|
||||
|
||||
|
||||
for (int i = 0; i < 16; i++)
|
||||
if (g_col == 0/* && blake2_state == STATE_ROUNDS*/) // TODO why do I need this qualifier
|
||||
if (g_col == 0)
|
||||
v_tmp[i] <= g_out[blake2_pkg::G_MAPPING[i]];
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
if (blake2_state == STATE_ROUNDS)
|
||||
h_tmp[i] <= g_out[16 + blake2_pkg::G_MAPPING_DIAG[i]] ^ g_out[16 + blake2_pkg::G_MAPPING_DIAG[i+8]]; //TODO fix
|
||||
h_tmp[i] <= g_out[16 + blake2_pkg::G_MAPPING_DIAG[i]] ^ g_out[16 + blake2_pkg::G_MAPPING_DIAG[i+8]];
|
||||
|
||||
end
|
||||
|
||||
|
@ -71,14 +72,16 @@ always_ff @(posedge i_clk) begin
|
|||
o_hash.reset_source();
|
||||
round_cntr_fin <= 0;
|
||||
block_eop_l <= 0;
|
||||
h_xor_done <= 0;
|
||||
byte_len_l <= 0;
|
||||
end else begin
|
||||
|
||||
if (blake2_state != STATE_NEXT_BLOCK) g_col <= ~g_col;
|
||||
g_col <= ~g_col;
|
||||
|
||||
case (blake2_state)
|
||||
STATE_IDLE: begin
|
||||
h <= i_parameters ^ blake2_pkg::IV;
|
||||
t <= 128;
|
||||
t <= 2;
|
||||
i_block.rdy <= 1;
|
||||
v <= 0;
|
||||
o_hash.val <= 0;
|
||||
|
@ -87,11 +90,12 @@ always_ff @(posedge i_clk) begin
|
|||
round_cntr_msg <= 0;
|
||||
round_cntr_fin <= 0;
|
||||
if (i_block.rdy && i_block.val && i_block.sop) begin
|
||||
init_local_work_vector(i_byte_len, i_block.eop);
|
||||
init_local_work_vector(i_block.eop ? i_byte_len : 128, i_block.eop);
|
||||
blake2_state <= STATE_ROUNDS;
|
||||
g_col <= 0;
|
||||
i_block.rdy <= 0;
|
||||
block_eop_l <= i_block.eop;
|
||||
byte_len_l <= i_byte_len;
|
||||
end
|
||||
end
|
||||
// Here we do the compression over 12 rounds, each round can be done in two clock cycles
|
||||
|
@ -116,7 +120,6 @@ always_ff @(posedge i_clk) begin
|
|||
blake2_state <= STATE_FINAL_BLOCK;
|
||||
else begin
|
||||
blake2_state <= STATE_NEXT_BLOCK;
|
||||
i_block.rdy <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -124,16 +127,22 @@ always_ff @(posedge i_clk) begin
|
|||
round_cntr <= 0;
|
||||
round_cntr_msg <= 0;
|
||||
round_cntr_fin <= 0;
|
||||
h_xor_done <= 1;
|
||||
i_block.rdy <= 1;
|
||||
if (~h_xor_done)
|
||||
for (int i = 0; i < 8; i++)
|
||||
h[i] <= h[i] ^ h_tmp[i];
|
||||
if (i_block.rdy && i_block.val) begin
|
||||
init_local_work_vector(t, i_block.eop); //TODO this wont work with h_tmp
|
||||
init_local_work_vector(i_block.eop ? byte_len_l : t*128, i_block.eop);
|
||||
block_eop_l <= i_block.eop;
|
||||
t <= t + 128;
|
||||
h <= h ^ h_tmp;
|
||||
t <= t + 1;
|
||||
blake2_state <= STATE_ROUNDS;
|
||||
h_xor_done <= 0;
|
||||
i_block.rdy <= 0;
|
||||
g_col <= 0;
|
||||
end
|
||||
end
|
||||
STATE_FINAL_BLOCK: begin
|
||||
t <= 128;
|
||||
round_cntr <= 0;
|
||||
round_cntr_fin <= 0;
|
||||
round_cntr_msg <= 0;
|
||||
|
@ -162,7 +171,7 @@ generate begin
|
|||
// For each G function we want to pipeline the input message to help timing
|
||||
logic [63:0] m0, m1;
|
||||
always_ff @ (posedge i_clk) begin
|
||||
if(blake2_state == STATE_IDLE) begin
|
||||
if(blake2_state == STATE_IDLE || blake2_state == STATE_NEXT_BLOCK) begin
|
||||
m0 <= block[blake2_pkg::SIGMA[gv_g*2]];
|
||||
m1 <= block[blake2_pkg::SIGMA[gv_g*2 + 1]];
|
||||
end else begin
|
||||
|
|
|
@ -37,9 +37,9 @@ task rfc_test();
|
|||
begin
|
||||
integer signed get_len;
|
||||
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat;
|
||||
expected = 'h560c602c9cda1e198190f58e6341131f127367051c64f7df7d343e1b4c32a8bbc0eac1bcae463807dca442ae77d5150df700f6a640949a52cd4341dfc1e1044b;
|
||||
expected = 'h239900d4ed8623b95a92f1dba88ad31895cc3345ded552c22d79ab2a39c5877dd1a2ffdb6fbb124bb7c45a68142f214ce9f6129fb697276a0d4d1c983fa580ba;
|
||||
i_byte_len = 3;
|
||||
i_block.put_stream("hSV", i_byte_len);
|
||||
i_block.put_stream("cba", i_byte_len);
|
||||
out_hash.get_stream(get_dat, get_len);
|
||||
common_pkg::compare_and_print(get_dat, expected);
|
||||
$display("rfc_test PASSED");
|
||||
|
@ -65,9 +65,9 @@ task test_140_bytes();
|
|||
begin
|
||||
integer signed get_len;
|
||||
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat;
|
||||
expected = 'h429b65332e3b6701a29664f98c247204858479f55a8c18cc9b0ffa321cda4288fd420a5d47d134949f3b858bff7a696a00d91a07c92055cdd597971cf573281c;
|
||||
expected = 'h2012a869a3b89a69ffc954f6855c7f61a61190553dc487171ec3fe944d04c83cd4c842fff5a8258d5e14b05b7b6f30e8ddcb754d719137ec42fb5cdb562f8c89;
|
||||
i_byte_len = 140;
|
||||
i_block.put_stream("6RehRZqUdYD2SB3N35QlQhreiU2XEaSgIGUsreLqV49l8Z5r93FbP567Juqc1IUaVyJKv8qFmtQwXYvZdnrMacAs5H9hBhs5JxAfyDibIM3TjKyiVzXC8lfCqiN1j6fW8FSJY131mVpw", i_byte_len);
|
||||
i_block.put_stream("YbEAEzgJ1tgC3t6vDaJFqlWp1PaL482f7iZZzRj3xXpY2PPupwdTKAaBzB6KuN6j0alaoaFQfNboDbkNv5KDs5d7zN9JssrtOjGJdrVLfvb7uAdnVYoIgIv2zbXUQIPpwWdzEzj1CzX5", i_byte_len);
|
||||
out_hash.get_stream(get_dat, get_len);
|
||||
common_pkg::compare_and_print(get_dat, expected);
|
||||
$display("test_140_bytes PASSED");
|
||||
|
@ -83,8 +83,8 @@ initial begin
|
|||
#200ns;
|
||||
|
||||
rfc_test();
|
||||
//test_128_bytes();
|
||||
//test_140_bytes();
|
||||
test_128_bytes();
|
||||
test_140_bytes();
|
||||
|
||||
#10us $finish();
|
||||
|
||||
|
|
Loading…
Reference in New Issue