diff --git a/ip_cores/blake2b/src/rtl/blake2_pkg.sv b/ip_cores/blake2b/src/rtl/blake2_pkg.sv index 9654b3c..af48313 100644 --- a/ip_cores/blake2b/src/rtl/blake2_pkg.sv +++ b/ip_cores/blake2b/src/rtl/blake2_pkg.sv @@ -14,37 +14,37 @@ package blake2_pkg; // Sigma permutations used for G function blocks and input messages parameter [16*10-1:0][31:0] SIGMA = { - 0, 13, 12, 3, 14, 9, 11, 15, 5, 1, 6, 7, 4, 8, 2, 10, - 5, 10, 4, 1, 7, 13, 2, 12, 8, 0, 3, 11, 9, 14, 15, 6, - 10, 2, 6, 8, 4, 15, 0, 5, 9, 3, 1, 12, 14, 7, 11, 13, - 11, 8, 2, 9, 3, 6, 7, 0, 10, 4, 13, 14, 15, 1, 5, 12, - 9, 1, 14, 15, 5, 7, 13, 4, 3, 8, 11, 0, 10, 6, 12, 2, - 13, 3, 8, 6, 12, 11, 1, 14, 15, 10, 4, 2, 7, 5, 0, 9, - 8, 15, 0, 4, 10, 5, 6, 2, 14, 11, 12, 13, 1, 3, 9, 7, - 4, 9, 1, 7, 6, 3, 14, 10, 13, 15, 2, 5, 0, 12, 8, 11, - 3, 5, 7, 11, 2, 0, 12, 1, 6, 13, 15, 9, 8, 4, 10, 14, - 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + 32'd0, 32'd13, 32'd12, 32'd3, 32'd14, 32'd9, 32'd11, 32'd15, 32'd5, 32'd1, 32'd6, 32'd7, 32'd4, 32'd8, 32'd2, 32'd10, + 32'd5, 32'd10, 32'd4, 32'd1, 32'd7, 32'd13, 32'd2, 32'd12, 32'd8, 32'd0, 32'd3, 32'd11, 32'd9, 32'd14, 32'd15, 32'd6, + 32'd10, 32'd2, 32'd6, 32'd8, 32'd4, 32'd15, 32'd0, 32'd5, 32'd9, 32'd3, 32'd1, 32'd12, 32'd14, 32'd7, 32'd11, 32'd13, + 32'd11, 32'd8, 32'd2, 32'd9, 32'd3, 32'd6, 32'd7, 32'd0, 32'd10, 32'd4, 32'd13, 32'd14, 32'd15, 32'd1, 32'd5, 32'd12, + 32'd9, 32'd1, 32'd14, 32'd15, 32'd5, 32'd7, 32'd13, 32'd4, 32'd3, 32'd8, 32'd11, 32'd0, 32'd10, 32'd6, 32'd12, 32'd2, + 32'd13, 32'd3, 32'd8, 32'd6, 32'd12, 32'd11, 32'd1, 32'd14, 32'd15, 32'd10, 32'd4, 32'd2, 32'd7, 32'd5, 32'd0, 32'd9, + 32'd8, 32'd15, 32'd0, 32'd4, 32'd10, 32'd5, 32'd6, 32'd2, 32'd14, 32'd11, 32'd12, 32'd13, 32'd1, 32'd3, 32'd9, 32'd7, + 32'd4, 32'd9, 32'd1, 32'd7, 32'd6, 32'd3, 32'd14, 32'd10, 32'd13, 32'd15, 32'd2, 32'd5, 32'd0, 32'd12, 32'd8, 32'd11, + 32'd3, 32'd5, 32'd7, 32'd11, 32'd2, 32'd0, 32'd12, 32'd1, 32'd6, 32'd13, 32'd15, 32'd9, 32'd8, 32'd4, 32'd10, 32'd14, + 32'd15, 32'd14, 32'd13, 32'd12, 32'd11, 32'd10, 32'd9, 32'd8, 32'd7, 32'd6, 32'd5, 32'd4, 32'd3, 32'd2, 32'd1, 32'd0 }; // Mapping for each G function block to the state vector v parameter [4*8-1:0][31:0] G_MAPPING = { - 14, 9, 4, 3, - 13, 8, 7, 2, - 12, 11, 6, 1, - 15, 10, 5, 0, - 15, 11, 7, 3, - 14, 10, 6, 2, - 13, 9, 5, 1, - 12, 8, 4, 0 + 32'd14, 32'd9, 32'd4, 32'd3, + 32'd13, 32'd8, 32'd7, 32'd2, + 32'd12, 32'd11, 32'd6, 32'd1, + 32'd15, 32'd10, 32'd5, 32'd0, + 32'd15, 32'd11, 32'd7, 32'd3, + 32'd14, 32'd10, 32'd6, 32'd2, + 32'd13, 32'd9, 32'd5, 32'd1, + 32'd12, 32'd8, 32'd4, 32'd0 }; // This is so we can get the correct mapping back from the diagonal // operation parameter [4*4-1:0][31:0] G_MAPPING_DIAG = { - 3, 15, 11,7, - 6, 2, 14, 10, - 9, 5, 1, 13, - 12, 8 , 4, 0 + 32'd3, 32'd15, 32'd11, 32'd7, + 32'd6, 32'd2, 32'd14, 32'd10, + 32'd9, 32'd5, 32'd1, 32'd13, + 32'd12, 32'd8 , 32'd4, 32'd0 }; endpackage \ No newline at end of file diff --git a/ip_cores/blake2b/src/rtl/blake2_top.sv b/ip_cores/blake2b/src/rtl/blake2_top.sv index 2422a40..995120b 100644 --- a/ip_cores/blake2b/src/rtl/blake2_top.sv +++ b/ip_cores/blake2b/src/rtl/blake2_top.sv @@ -1,6 +1,6 @@ /* Implemented from RFC-7693, The BLAKE2 Cryptographic Hash and Message Authentication Code (MAC) - * Personalization string in the input parameter should be "ZcashPoW" followed by n and k in - * little endian order. + * Parameters are passed in as an input. Inputs and outputs are AXI stream and respect flow control. + * Only only hash is computed at a time, and takes 26 clocks * number of 128 Byte message blocks. */ module blake2_top @@ -24,32 +24,33 @@ localparam ROUNDS = 12; logic [7:0][63:0] h, h_tmp; // The state vector logic [15:0][63:0] v, v_tmp; // The local work vector and its intermediate value -logic [31:0][63:0] g_out;//, g_out_r; // Outputs of the G mixing function - use 8 here to save on timing -logic [127:0] t; // Counter - TODO make this smaller - related to param +logic [31:0][63:0] g_out; // Outputs of the G mixing function - use 8 here to save on timing +logic [127:0] t; // Counter logic [$clog2(ROUNDS)-1:0] round_cntr, round_cntr_msg, round_cntr_fin; logic g_col; logic [15:0][63:0] block, block_r; // The message block registered and converted to a 2d array logic block_eop_l; // Use to latch if this is the final block +logic h_xor_done; +logic [7:0] byte_len_l; // Pipelining logic that has no reset always_ff @(posedge i_clk) begin - //g_out_r <= g_out; + if (blake2_state == STATE_IDLE && ~i_block.rdy) + block_r <= 0; - if (blake2_state == STATE_IDLE) begin - block_r <= 0; - if (i_block.val && i_block.rdy) begin - block_r <= i_block.dat; - end + if (i_block.val && i_block.rdy) begin + block_r <= i_block.dat; end + for (int i = 0; i < 16; i++) - if (g_col == 0/* && blake2_state == STATE_ROUNDS*/) // TODO why do I need this qualifier + if (g_col == 0) v_tmp[i] <= g_out[blake2_pkg::G_MAPPING[i]]; for (int i = 0; i < 8; i++) if (blake2_state == STATE_ROUNDS) - h_tmp[i] <= g_out[16 + blake2_pkg::G_MAPPING_DIAG[i]] ^ g_out[16 + blake2_pkg::G_MAPPING_DIAG[i+8]]; //TODO fix + h_tmp[i] <= g_out[16 + blake2_pkg::G_MAPPING_DIAG[i]] ^ g_out[16 + blake2_pkg::G_MAPPING_DIAG[i+8]]; end @@ -71,14 +72,16 @@ always_ff @(posedge i_clk) begin o_hash.reset_source(); round_cntr_fin <= 0; block_eop_l <= 0; + h_xor_done <= 0; + byte_len_l <= 0; end else begin - if (blake2_state != STATE_NEXT_BLOCK) g_col <= ~g_col; + g_col <= ~g_col; case (blake2_state) STATE_IDLE: begin h <= i_parameters ^ blake2_pkg::IV; - t <= 128; + t <= 2; i_block.rdy <= 1; v <= 0; o_hash.val <= 0; @@ -87,11 +90,12 @@ always_ff @(posedge i_clk) begin round_cntr_msg <= 0; round_cntr_fin <= 0; if (i_block.rdy && i_block.val && i_block.sop) begin - init_local_work_vector(i_byte_len, i_block.eop); + init_local_work_vector(i_block.eop ? i_byte_len : 128, i_block.eop); blake2_state <= STATE_ROUNDS; g_col <= 0; i_block.rdy <= 0; block_eop_l <= i_block.eop; + byte_len_l <= i_byte_len; end end // Here we do the compression over 12 rounds, each round can be done in two clock cycles @@ -116,7 +120,6 @@ always_ff @(posedge i_clk) begin blake2_state <= STATE_FINAL_BLOCK; else begin blake2_state <= STATE_NEXT_BLOCK; - i_block.rdy <= 1; end end end @@ -124,16 +127,22 @@ always_ff @(posedge i_clk) begin round_cntr <= 0; round_cntr_msg <= 0; round_cntr_fin <= 0; + h_xor_done <= 1; + i_block.rdy <= 1; + if (~h_xor_done) + for (int i = 0; i < 8; i++) + h[i] <= h[i] ^ h_tmp[i]; if (i_block.rdy && i_block.val) begin - init_local_work_vector(t, i_block.eop); //TODO this wont work with h_tmp + init_local_work_vector(i_block.eop ? byte_len_l : t*128, i_block.eop); block_eop_l <= i_block.eop; - t <= t + 128; - h <= h ^ h_tmp; + t <= t + 1; blake2_state <= STATE_ROUNDS; + h_xor_done <= 0; + i_block.rdy <= 0; + g_col <= 0; end end STATE_FINAL_BLOCK: begin - t <= 128; round_cntr <= 0; round_cntr_fin <= 0; round_cntr_msg <= 0; @@ -162,7 +171,7 @@ generate begin // For each G function we want to pipeline the input message to help timing logic [63:0] m0, m1; always_ff @ (posedge i_clk) begin - if(blake2_state == STATE_IDLE) begin + if(blake2_state == STATE_IDLE || blake2_state == STATE_NEXT_BLOCK) begin m0 <= block[blake2_pkg::SIGMA[gv_g*2]]; m1 <= block[blake2_pkg::SIGMA[gv_g*2 + 1]]; end else begin diff --git a/ip_cores/blake2b/src/tb/blake2_top_tb.sv b/ip_cores/blake2b/src/tb/blake2_top_tb.sv index 578d31e..2c6048c 100644 --- a/ip_cores/blake2b/src/tb/blake2_top_tb.sv +++ b/ip_cores/blake2b/src/tb/blake2_top_tb.sv @@ -37,9 +37,9 @@ task rfc_test(); begin integer signed get_len; logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat; - expected = 'h560c602c9cda1e198190f58e6341131f127367051c64f7df7d343e1b4c32a8bbc0eac1bcae463807dca442ae77d5150df700f6a640949a52cd4341dfc1e1044b; + expected = 'h239900d4ed8623b95a92f1dba88ad31895cc3345ded552c22d79ab2a39c5877dd1a2ffdb6fbb124bb7c45a68142f214ce9f6129fb697276a0d4d1c983fa580ba; i_byte_len = 3; - i_block.put_stream("hSV", i_byte_len); + i_block.put_stream("cba", i_byte_len); out_hash.get_stream(get_dat, get_len); common_pkg::compare_and_print(get_dat, expected); $display("rfc_test PASSED"); @@ -65,9 +65,9 @@ task test_140_bytes(); begin integer signed get_len; logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat; - expected = 'h429b65332e3b6701a29664f98c247204858479f55a8c18cc9b0ffa321cda4288fd420a5d47d134949f3b858bff7a696a00d91a07c92055cdd597971cf573281c; + expected = 'h2012a869a3b89a69ffc954f6855c7f61a61190553dc487171ec3fe944d04c83cd4c842fff5a8258d5e14b05b7b6f30e8ddcb754d719137ec42fb5cdb562f8c89; i_byte_len = 140; - i_block.put_stream("6RehRZqUdYD2SB3N35QlQhreiU2XEaSgIGUsreLqV49l8Z5r93FbP567Juqc1IUaVyJKv8qFmtQwXYvZdnrMacAs5H9hBhs5JxAfyDibIM3TjKyiVzXC8lfCqiN1j6fW8FSJY131mVpw", i_byte_len); + i_block.put_stream("YbEAEzgJ1tgC3t6vDaJFqlWp1PaL482f7iZZzRj3xXpY2PPupwdTKAaBzB6KuN6j0alaoaFQfNboDbkNv5KDs5d7zN9JssrtOjGJdrVLfvb7uAdnVYoIgIv2zbXUQIPpwWdzEzj1CzX5", i_byte_len); out_hash.get_stream(get_dat, get_len); common_pkg::compare_and_print(get_dat, expected); $display("test_140_bytes PASSED"); @@ -83,8 +83,8 @@ initial begin #200ns; rfc_test(); - //test_128_bytes(); - //test_140_bytes(); + test_128_bytes(); + test_140_bytes(); #10us $finish();