diff --git a/ip_cores/blake2b/src/rtl/blake2b_pipe_top.sv b/ip_cores/blake2b/src/rtl/blake2b_pipe_top.sv index 437f78a..095af6c 100644 --- a/ip_cores/blake2b/src/rtl/blake2b_pipe_top.sv +++ b/ip_cores/blake2b/src/rtl/blake2b_pipe_top.sv @@ -72,6 +72,7 @@ generate o_hash.ctl = ctl[NUM_PIPE-1]; o_hash.sop = 1; o_hash.eop = 1; + o_hash.err = 0; o_hash.dat = h[NUM_PIPE-1]; end @@ -150,16 +151,18 @@ generate end // Second stage if (o_hash.rdy) begin - h[PIPE_G0+1] <= h[PIPE_G0]; - init_local_work_vector_pipe(PIPE_G0+1, LAST_BLOCK ? byte_len : 128 , LAST_BLOCK); // Shift message down either from previous pipeline or from fixed portion - msg[PIPE_G0+1] <= 0; - for (int i = 0; i < 128; i++) begin - if ((g0+1)*128 + i < MSG_VAR_BYTS) - msg[PIPE_G0+1][i*8 +: 8] <= msg[PIPE_G0][((g0+1)*128 + i)*8 +: 8]; + if (g0 < (NUM_PASSES - 1)) begin + h[PIPE_G0+1] <= h[PIPE_G0]; + init_local_work_vector_pipe(PIPE_G0+1, LAST_BLOCK ? byte_len : 128 , LAST_BLOCK); + msg[PIPE_G0+1] <= 0; + for (int i = 0; i < 128; i++) begin + if ((g0+1)*128 + i < MSG_VAR_BYTS) + msg[PIPE_G0+1][i*8 +: 8] <= msg[PIPE_G0][((g0+1)*128 + i)*8 +: 8]; + end + ctl[PIPE_G0+1] <= ctl[PIPE_G0]; end - ctl[PIPE_G0+1] <= ctl[PIPE_G0]; end end diff --git a/ip_cores/common/src/rtl/common_if.sv b/ip_cores/common/src/rtl/common_if.sv index fa5d194..a344276 100644 --- a/ip_cores/common/src/rtl/common_if.sv +++ b/ip_cores/common/src/rtl/common_if.sv @@ -1,5 +1,7 @@ /* - Interface for a AXI stream + Commonly used interfaces: + - AXI stream + - RAM Copyright (C) 2019 Benjamin Devlin and Zcash Foundation @@ -21,9 +23,8 @@ interface if_axi_stream # ( parameter DAT_BYTS = 8, parameter CTL_BYTS = 1 )( - input clk + input i_clk ); - import common_pkg::*; localparam DAT_BITS = DAT_BYTS*8; localparam CTL_BITS = CTL_BYTS*8; @@ -36,8 +37,8 @@ interface if_axi_stream # ( logic [DAT_BITS-1:0] dat; logic [$clog2(DAT_BYTS)-1:0] mod; - modport sink (input val, err, sop, eop, ctl, dat, mod, clk, output rdy); - modport source (output val, err, sop, eop, ctl, dat, mod, input rdy, clk, import task reset_source()); + modport sink (input val, err, sop, eop, ctl, dat, mod, i_clk, output rdy); + modport source (output val, err, sop, eop, ctl, dat, mod, input rdy, i_clk, import task reset_source()); // Task to reset a source interface signals to all 0 task reset_source(); @@ -55,7 +56,7 @@ interface if_axi_stream # ( logic sop_l=0; reset_source(); - @(posedge clk); + @(posedge i_clk); while (len > 0) begin sop = ~sop_l; @@ -66,8 +67,8 @@ interface if_axi_stream # ( data = data >> DAT_BITS; sop_l = 1; len = len - DAT_BYTS; - @(posedge clk); // Go to next clock edge - while (!rdy) @(posedge clk); // If not rdy then wait here + @(posedge i_clk); // Go to next clock edge + while (!rdy) @(posedge i_clk); // If not rdy then wait here end reset_source(); endtask @@ -78,7 +79,7 @@ interface if_axi_stream # ( rdy = 1; len = 0; data = 0; - @(posedge clk); + @(posedge i_clk); while (1) begin if (val && rdy) begin @@ -88,11 +89,37 @@ interface if_axi_stream # ( len = len + (eop ? (mod == 0 ? DAT_BYTS : mod) : DAT_BYTS); if (eop) break; end - @(posedge clk); + @(posedge i_clk); end endtask +endinterface + +interface if_ram # ( + parameter RAM_WIDTH = 32, + parameter RAM_DEPTH = 128 +)( + input i_clk, i_rst +); + + logic [$clog2(RAM_DEPTH)-1:0] a; + logic en; + logic we; + logic re; + logic [RAM_WIDTH-1:0 ] d, q; + + modport sink (input a, en, re, we, d, i_clk, i_rst, output q); + modport source (output a, en, re, we, d, input q, i_clk, i_rst, import task reset_source()); + + // Task to reset a source interface signals to all 0 + task reset_source(); + a <= 0; + en <= 0; + we <= 0; + re <= 0; + d <= 0; + endtask endinterface \ No newline at end of file diff --git a/ip_cores/memory/rtl/src/bram.sv b/ip_cores/memory/rtl/src/bram.sv index 7796a8e..2b969f5 100644 --- a/ip_cores/memory/rtl/src/bram.sv +++ b/ip_cores/memory/rtl/src/bram.sv @@ -1,12 +1,49 @@ -// Xilinx True Dual Port RAM, No Change, Dual Clock +// Xilinx True Dual Port RAM, No Change, Dual Clock. +// Added wrapper to use intefaces. // This code implements a parameterizable true dual port memory (both ports can read and write). // This is a no change RAM which retains the last read value on the output during writes // which is the most power efficient mode. // If a reset or enable is not necessary, it may be tied off or removed from the code. -//module xilinx_true_dual_port_no_change_2_clock_ram #( -module bram #( +module bram #( + parameter RAM_WIDTH = 18, + parameter RAM_DEPTH = 1024, + parameter RAM_PERFORMANCE = "HIGH_PERFORMANCE", + parameter INIT_FILE = "" +) ( + if_ram.sink a, + if_ram.sink b +); + + xilinx_true_dual_port_no_change_2_clock_ram #( + .RAM_WIDTH(RAM_WIDTH), // Specify RAM data width + .RAM_DEPTH(RAM_DEPTH), // Specify RAM depth (number of entries) + .RAM_PERFORMANCE(RAM_PERFORMANCE), // Select "HIGH_PERFORMANCE" or "LOW_LATENCY" + .INIT_FILE(INIT_FILE) // Specify name/location of RAM initialization file if using one (leave blank if not) + ) your_instance_name ( + .addra(a.a), // Port A address bus, width determined from RAM_DEPTH + .addrb(b.a), // Port B address bus, width determined from RAM_DEPTH + .dina(a.d), // Port A RAM input data, width determined from RAM_WIDTH + .dinb(b.d), // Port B RAM input data, width determined from RAM_WIDTH + .clka(a.i_clk), // Port A clock + .clkb(b.i_clk), // Port B clock + .wea(a.we), // Port A write enable + .web(b.we), // Port B write enable + .ena(a.en), // Port A RAM Enable, for additional power savings, disable port when not in use + .enb(b.en), // Port B RAM Enable, for additional power savings, disable port when not in use + .rsta(a.i_rst), // Port A output reset (does not affect memory contents) + .rstb(b.i_rst), // Port B output reset (does not affect memory contents) + .regcea(a.re), // Port A output register enable + .regceb(b.re), // Port B output register enable + .douta(a.q), // Port A RAM output data, width determined from RAM_WIDTH + .doutb(b.q) // Port B RAM output data, width determined from RAM_WIDTH + ); + +endmodule + + +module xilinx_true_dual_port_no_change_2_clock_ram #( parameter RAM_WIDTH = 18, // Specify RAM data width parameter RAM_DEPTH = 1024, // Specify RAM depth (number of entries) parameter RAM_PERFORMANCE = "HIGH_PERFORMANCE", // Select "HIGH_PERFORMANCE" or "LOW_LATENCY" @@ -103,31 +140,3 @@ module bram #( endmodule -// The following is an instantiation template for xilinx_true_dual_port_no_change_2_clock_ram -/* - // Xilinx True Dual Port RAM, No Change, Dual Clock - xilinx_true_dual_port_no_change_2_clock_ram #( - .RAM_WIDTH(18), // Specify RAM data width - .RAM_DEPTH(1024), // Specify RAM depth (number of entries) - .RAM_PERFORMANCE("HIGH_PERFORMANCE"), // Select "HIGH_PERFORMANCE" or "LOW_LATENCY" - .INIT_FILE("") // Specify name/location of RAM initialization file if using one (leave blank if not) - ) your_instance_name ( - .addra(addra), // Port A address bus, width determined from RAM_DEPTH - .addrb(addrb), // Port B address bus, width determined from RAM_DEPTH - .dina(dina), // Port A RAM input data, width determined from RAM_WIDTH - .dinb(dinb), // Port B RAM input data, width determined from RAM_WIDTH - .clka(clka), // Port A clock - .clkb(clkb), // Port B clock - .wea(wea), // Port A write enable - .web(web), // Port B write enable - .ena(ena), // Port A RAM Enable, for additional power savings, disable port when not in use - .enb(enb), // Port B RAM Enable, for additional power savings, disable port when not in use - .rsta(rsta), // Port A output reset (does not affect memory contents) - .rstb(rstb), // Port B output reset (does not affect memory contents) - .regcea(regcea), // Port A output register enable - .regceb(regceb), // Port B output register enable - .douta(douta), // Port A RAM output data, width determined from RAM_WIDTH - .doutb(doutb) // Port B RAM output data, width determined from RAM_WIDTH - ); -*/ - diff --git a/ip_cores/parsing/src/rtl/file_to_axi.sv b/ip_cores/parsing/src/rtl/file_to_axi.sv index 94787e3..e969c30 100644 --- a/ip_cores/parsing/src/rtl/file_to_axi.sv +++ b/ip_cores/parsing/src/rtl/file_to_axi.sv @@ -44,7 +44,7 @@ initial begin o_done = 0; o_axi.reset_source(); sop_l = 0; - while (!i_start) @(posedge o_axi.clk); + while (!i_start) @(posedge o_axi.i_clk); fp = $fopen(i_file, BINARY ? "rb" : "r"); if (fp==0) $fatal(1, "%m %t ERROR: file_to_axi could not open file %s", $time, i_file); @@ -60,8 +60,8 @@ initial begin o_axi.eop = $feof(fp); o_axi.mod = $feof(fp) ? r : 0; - @(posedge o_axi.clk); - while (!(o_axi.val && o_axi.rdy)) @(posedge o_axi.clk); + @(posedge o_axi.i_clk); + while (!(o_axi.val && o_axi.rdy)) @(posedge o_axi.i_clk); end end diff --git a/zcash_verif/src/rtl/zcash_verif_equihash.sv b/zcash_verif/src/rtl/zcash_verif_equihash.sv index 444cef7..4442e88 100644 --- a/zcash_verif/src/rtl/zcash_verif_equihash.sv +++ b/zcash_verif/src/rtl/zcash_verif_equihash.sv @@ -24,73 +24,237 @@ module zcash_verif_equihash import zcash_verif_pkg::*; -( +#( + parameter DAT_BYTS = 8 +)( input i_clk, i_rst, if_axi_stream.sink i_axi, output equihash_bm_t o_mask, output logic o_mask_val ); - -cblockheader_t cblockheader; -logic [COLLISION_BIT_LEN-1:0] sol_hash_xor; + +localparam [7:0] EQUIHASH_GEN_BYTS = $bits(equihash_gen_in_t)/8; +localparam DAT_BITS = DAT_BYTS*8; + +cblockheader_t cblockheader; +logic cblockheader_val; +logic [$clog2($bits(cblockheader_t)/8)-1:0] cblockheader_byts; + +equihash_gen_in_t equihash_gen_in; +logic [N-1:0] sol_hash_xor; +logic [$clog2(SOL_LIST_LEN)-1:0] sol_cnt_out, sol_cnt_in; // This tracks how many solutions we have XORed +logic [$clog2(DAT_BITS)-1:0] sol_pos; // This tracks the pos in our DAT_BITS RAM output +logic [SOL_BITS-1:0] ram_out; + logic [64*8-1:0] parameters; -logic [7:0] byte_len; +logic [7:0] byte_len; +logic all_checks_done; -if_axi_stream #(.DAT_BYTS(INDICIES_PER_HASH * N), .CTL_BYTS(1)) blake2b_out_hash(clk); -if_axi_stream #(.DAT_BYTS(EQUIHASH_BLAKE2B_PIPE == 0 ? 128 : $bits(equihash_gen_in_t)/8 )) blake2b_in_hash(clk); + +if_axi_stream #(.DAT_BYTS(BLAKE2B_DIGEST_BYTS), .CTL_BYTS(1)) blake2b_out_hash(i_clk); +if_axi_stream #(.DAT_BYTS(EQUIHASH_BLAKE2B_PIPE == 0 ? 128 : EQUIHASH_GEN_BYTS )) blake2b_in_hash(i_clk); + +// We write the block into a port as it comes in and then read from the b port +if_ram #(.RAM_WIDTH(DAT_BITS), .RAM_DEPTH(SOL_LIST_BYTS/DAT_BYTS)) equihash_sol_bram_if_a (i_clk, i_rst); +if_ram #(.RAM_WIDTH(DAT_BITS), .RAM_DEPTH(SOL_LIST_BYTS/DAT_BYTS)) equihash_sol_bram_if_b (i_clk, i_rst); +logic [DAT_BITS-1:0] equihash_sol_bram_if_b_l; + +enum {STATE_IDLE = 0, + STATE_DATA_WRITE = 1, + STATE_FINISH_WAIT = 2} ram_state; + +// State machine for controlling writing equihash solution into the RAM and registering the header always_ff @ (posedge i_clk) begin if (i_rst) begin i_axi.rdy <= 0; + equihash_sol_bram_if_a.reset_source(); + cblockheader <= 0; + cblockheader_byts <= 0; + cblockheader_val <= 0; + ram_state <= STATE_IDLE; + end else begin + // Defaults + equihash_sol_bram_if_a.we <= 1; + equihash_sol_bram_if_a.en <= 1; + equihash_sol_bram_if_a.d <= i_axi.dat; + + if (i_axi.val && i_axi.rdy && ~cblockheader_val) begin + cblockheader <= {cblockheader, i_axi.dat}; + cblockheader_val <= (cblockheader_byts + DAT_BYTS) >= $bits(cblockheader_t)/8; + cblockheader_byts <= cblockheader_byts + DAT_BYTS; + end + + case (ram_state) + // This state we are waiting for an input block + STATE_IDLE: begin + i_axi.rdy <= 1; + if (i_axi.val && i_axi.rdy) begin + ram_state <= STATE_DATA_WRITE; + equihash_sol_bram_if_a.a <= equihash_sol_bram_if_a.a + 1; + end + end + // Here we are checking header values as well as populating the RAM + STATE_DATA_WRITE: begin + if (i_axi.val && i_axi.rdy) begin + equihash_sol_bram_if_a.a <= equihash_sol_bram_if_a.a + 1; + if (i_axi.eop) begin + i_axi.rdy <= 0; + ram_state <= STATE_FINISH_WAIT; + end + end + end + // Here we are have finished populating RAM and waiting for all checks to finish + STATE_FINISH_WAIT: begin + equihash_sol_bram_if_a.we <= 0; + equihash_sol_bram_if_a.a <= equihash_sol_bram_if_a.a; + if (all_checks_done) begin + ram_state <= STATE_IDLE; + i_axi.rdy <= 1; + cblockheader_val <= 0; + equihash_sol_bram_if_a.a <= 0; + end + end + endcase + end +end + +// State machine for controlling the hash calculation +// and checking the header values +always_ff @ (posedge i_clk) begin + if (i_rst) begin o_mask_val <= 0; o_mask <= 0; sol_hash_xor <= 0; blake2b_in_hash.reset_source(); blake2b_out_hash.rdy <= 0; + equihash_sol_bram_if_b.reset_source(); + all_checks_done <= 0; + sol_cnt_in <= 0; + sol_cnt_out <= 0; + sol_pos <= 0; + equihash_sol_bram_if_b_l <= 0; end else begin + // Defaults + equihash_sol_bram_if_b.re <= 1; + equihash_sol_bram_if_b.en <= 1; blake2b_out_hash.rdy <= 1; - i_axi.rdy <= 1; + blake2b_in_hash.sop <= 1; + blake2b_in_hash.eop <= 1; + blake2b_in_hash.val <= 0; + if (ram_state == STATE_IDLE) begin + equihash_sol_bram_if_b.a <= $bits(cblockheader_t)/DAT_BITS; + sol_pos <= $bits(cblockheader_t) % DAT_BITS; + sol_cnt_out <= 0; + sol_cnt_in <= 0; + blake2b_in_hash.val <= 0; + o_mask_val <= 0; + o_mask <= 0; + end + if (cblockheader_val) begin + equihash_gen_in.bits <= cblockheader.bits; + equihash_gen_in.my_time <= cblockheader.my_time; + equihash_gen_in.hash_reserved <= 0; + equihash_gen_in.hash_merkle_root <= cblockheader.hash_merkle_root; + equihash_gen_in.hash_prev_block <= cblockheader.hash_prev_block; + equihash_gen_in.version <= cblockheader.version; + equihash_gen_in.nonce <= cblockheader.nonce; + for (int i = 0; i < SOL_BITS; i++) + if (i + sol_pos >= DAT_BITS) + equihash_gen_in.index[i] <= equihash_sol_bram_if_b_l[i + sol_pos - DAT_BITS]; + else + equihash_gen_in.index[i] <= equihash_sol_bram_if_b.q[i+sol_pos]; + end + // We can start loading the hash block + if((sol_cnt_in < SOL_LIST_LEN - 1) && + blake2b_in_hash.rdy && + (equihash_sol_bram_if_a.a >= $bits(cblockheader_t)/8 + DAT_BYTS)) begin + blake2b_in_hash.val <= 1; // TODO control if we take more than one hash per clock + sol_cnt_in <= sol_cnt_in + 1; + sol_pos <= sol_pos + SOL_BITS; + // Calculate if we should increase our read pointer + if (sol_pos + 2*SOL_BITS >= DAT_BITS) begin + equihash_sol_bram_if_b_l <= equihash_sol_bram_if_b.q; // Latch current output as we might need some bits + equihash_sol_bram_if_b.a <= equihash_sol_bram_if_b.a + 1; + end + + //TODO here we also need to check the ordering, and duplicates? + + end + + // When we start getting the hash results, start XORing them + if (blake2b_out_hash.val) begin + sol_hash_xor <= hash_solution(sol_hash_xor, blake2b_out_hash.dat); + sol_cnt_out <= sol_cnt_out + 1; + end + + if (sol_cnt_out == SOL_LIST_LEN - 1) begin + o_mask.XOR_FAIL <= |sol_hash_xor; + o_mask_val <= 1; + sol_cnt_out <= sol_cnt_out; + equihash_sol_bram_if_b.a <= 0; + end end end -// Constants that do not change +// Constants always_comb begin - byte_len = $bits(equihash_gen_in_t)/8; - parameters = {'0, 8'd1, 8'd1, 8'd0, byte_len}; - parameters[48*8-1 +: 16*8] = POW_TAG; + parameters = {'0, 8'd1, 8'd1, 8'd0, BLAKE2B_DIGEST_BYTS}; + parameters[48*8-1 +: 16*8] = POW_TAG; + blake2b_in_hash.dat = equihash_gen_in; end +// Function to OR the hash output depending on equihash parameters +function hash_solution(input [N-1:0] curr, input [N*INDICIES_PER_HASH-1:0] in); + for (int i = 0; i < INDICIES_PER_HASH; i++) + curr = curr ^ in[i*N +: N]; + return curr; +endfunction + +// Instantiate the Blake2b block generate if ( EQUIHASH_BLAKE2B_PIPE == 0 ) begin: BLAKE2B_GEN blake2b_top DUT ( .i_clk ( i_clk ), .i_rst ( i_rst ), .i_parameters ( parameters ), - .i_byte_len ( byte_len ), + .i_byte_len ( EQUIHASH_GEN_BYTS ), .i_block ( blake2b_in_hash ), .o_hash ( blake2b_out_hash ) ); end else begin blake2b_pipe_top #( - .MSG_LEN ( $bits(equihash_gen_in_t)/8 ), - .CTL_BITS ( 8 ) + .MSG_LEN ( EQUIHASH_GEN_BYTS ), + .MSG_VAR_BYTS ( 4 ), // Only lower 4 bytes of input to hash change + .CTL_BITS ( 8 ) ) DUT ( .i_clk ( i_clk ), .i_rst ( i_rst ), - .i_parameters ( parameters ), - .i_byte_len ( byte_len ), + .i_parameters ( parameters ), + .i_byte_len ( EQUIHASH_GEN_BYTS ), .i_block ( blake2b_in_hash ), .o_hash ( blake2b_out_hash ) ); end endgenerate -// Some checks to make sure our data structures are correct: +// Memory to store the equihash solution as it comes in. We use dual port, +// one port for writing and one port for reading +bram #( + .RAM_WIDTH ( DAT_BITS ), + .RAM_DEPTH ( SOL_LIST_BYTS/DAT_BYTS ), + .RAM_PERFORMANCE ( "LOW_LATENCY" ) // Select "HIGH_PERFORMANCE" or "LOW_LATENCY" +) equihash_sol_bram ( + .a ( equihash_sol_bram_if_a ), + .b ( equihash_sol_bram_if_b ) +); + +// Some checks to make sure our data structures are correct: initial begin assert ($bits(equihash_gen_in_t)/8 == 144) else $fatal(1, "%m %t ERROR: equihash_gen_in_t is not 144 bytes in size", $time); end diff --git a/zcash_verif/src/rtl/zcash_verif_pkg.sv b/zcash_verif/src/rtl/zcash_verif_pkg.sv index 8dfcea7..fee3173 100644 --- a/zcash_verif/src/rtl/zcash_verif_pkg.sv +++ b/zcash_verif/src/rtl/zcash_verif_pkg.sv @@ -22,9 +22,10 @@ package zcash_verif_pkg; // Variables used in the equihash PoW parameter [31:0] N = 200; parameter [31:0] K = 9; - parameter EQUIHASH_BLAKE2B_PIPE = 0; // Do we use the pipelined (high performance but large area) Blake2b core + parameter EQUIHASH_BLAKE2B_PIPE = 1; // Do we use the pipelined (high performance but large area) Blake2b core parameter INDICIES_PER_HASH = (512/N); parameter COLLISION_BIT_LEN = N/(K+1); + parameter BLAKE2B_DIGEST_BYTS = (N*INDICIES_PER_HASH)/8; parameter SOL_BITS = COLLISION_BIT_LEN+1; parameter SOL_LIST_LEN = 1 << K; parameter SOL_LIST_BYTS = SOL_LIST_LEN*SOL_BITS/8; @@ -48,10 +49,13 @@ package zcash_verif_pkg; logic [31:0] version; } equihash_gen_in_t; - + typedef struct packed { + logic [SOL_LIST_LEN-1:0][SOL_BITS-1:0] sol; + logic [3*8-1:0] size; // Contains size of solution array - should be 1347 for (200,9) + } equihash_sol_t; + // Header format for block header (CBlockheader) typedef struct packed { - equihash_sol_t equihash_sol_t; logic [255:0] nonce; logic [31:0] bits; logic [31:0] my_time; @@ -60,11 +64,12 @@ package zcash_verif_pkg; logic [255:0] hash_prev_block; logic [31:0] version; } cblockheader_t; - - typedef struct packed { - logic [SOL_LIST_LEN-1:0][SOL_BITS-1:0] sol; - logic [3*8-1:0] size; // Contains size of solution array - should be 1347 for (200,9) - } equihash_sol_t; + // Header format for block header (CBlockheader) inc. solution + typedef struct packed { + equihash_sol_t equihash_sol; + cblockheader_t cblockheader; + } cblockheader_sol_t; + endpackage \ No newline at end of file