diff --git a/aws/cl_zcash/verif/tests/test_zcash.sv b/aws/cl_zcash/verif/tests/test_zcash.sv index 90db591..46f692b 100644 --- a/aws/cl_zcash/verif/tests/test_zcash.sv +++ b/aws/cl_zcash/verif/tests/test_zcash.sv @@ -234,9 +234,13 @@ endtask; task test_bls12_381(); // Try writing and reading a slot logic [1024*8-1:0] dat = 0; + logic failed = 0; logic [31:0] rdata; bls12_381_pkg::data_t slot_data; bls12_381_pkg::inst_t inst; + bls12_381_interrupt_rpl_t interrupt_rpl; + fp2_jb_point_t out_p, exp_p; + logic [380:0] in_k = 381'h33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333 // Make sure we aren't in reset while(!tb.card.fpga.CL.zcash_fpga_top.bls12_381_top.inst_uram_reset.reset_done || @@ -274,6 +278,55 @@ task test_bls12_381(); $display("INFO: Wrote: 0x%x", inst); assert(dat[2*8-1:0] == inst) else $fatal(1, "ERROR: Writing to slot and reading gave wrong results!"); + slot_data = '{dat:in_k, pt:SCALAR}; + for(int i = 0; i < 48; i = i + 4) + write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::DATA_AXIL_START + 3*64 + i), .data(slot_data[i*8 +: 32])); + + + inst = '{code:SEND_INTERRUPT, a:16'd0, b:16'habcd, c:16'd0}; + for(int i = 0; i < 8; i = i + 4) + write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::INST_AXIL_START + 1*8 + i), .data(inst[i*8 +: 32])); + + + // Write to current slot to start + inst = '{code:FP2_FPOINT_MULT, a:16'd3, b:16'd0, c:16'd0}; + for(int i = 0; i < 8; i = i + 4) + write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::INST_AXIL_START + 0*8 + i), .data(inst[i*8 +: 32])); + + fork + begin + while(stream_len == 0) read_stream(.data(stream_data), .len(stream_len)); + interrupt_rpl = stream_data; + + assert(interrupt_rpl.hdr.cmd == BLS12_381_INTERRUPT_RPL) else $fatal(1, "ERROR: Received non-interrupt message"); + assert(interrupt_rpl.index == 16'habcd) else $fatal(1, "ERROR: Received wrong index value in message"); + assert(interrupt_rpl.data_type == FP2_JB) else $fatal(1, "ERROR: Received wrong data type value in message"); + + stream_data = stream_data >> $bits(bls12_381_interrupt_rpl_t); + + for (int i = 0; i < 6; i++) + out_p[i*381 +: 381] = stream_data[i*(48*8) +: 381]; + + if (out_p == exp_p) begin + $display("INFO: Output point matched expected:"); + print_fp2_jb_point(out_p); + end else begin + $display("ERROR: Output point did NOT match expected:"); + print_fp2_jb_point(out_p); + $display("Expected:"); + print_fp2_jb_point(exp_p); + failed = 1; + end + end + begin + repeat(10000) @(posedge tb.card.fpga.clk_main_a0); + $fatal(1, "ERROR: No reply received from test_bls12_381"); + end + join_any + disable fork; + + if(failed) $fatal(1, "ERROR: Test FAILED test_bls12_381"); + $display("test_bls12_381 PASSED"); endtask; diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_axi_bridge.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_axi_bridge.sv index 8518b98..2dd7cb5 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_axi_bridge.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_axi_bridge.sv @@ -95,11 +95,11 @@ always_ff @ (posedge i_clk) begin // Read requests if (inst_ram_read[READ_CYCLE]) begin - axi_lite_if.rdata <= inst_ram_if.q; + axi_lite_if.rdata <= inst_ram_if.q >> ((axi_lite_if.araddr - INST_AXIL_START) % INST_RAM_ALIGN_BYTE)*8; axi_lite_if.rvalid <= 1; end if (data_ram_read[READ_CYCLE]) begin - axi_lite_if.rdata <= data_ram_if.q; + axi_lite_if.rdata <= data_ram_if.q >> ((axi_lite_if.araddr - DATA_AXIL_START) % DATA_RAM_ALIGN_BYTE)*8; axi_lite_if.rvalid <= 1; end diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv index ed19509..73de080 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv @@ -90,6 +90,8 @@ package bls12_381_pkg; COPY_REG = 8'h1, SEND_INTERRUPT = 8'h6, + SUB_ELEMENT = 8'h10, + ADD_ELEMENT = 8'h11, MUL_ELEMENT = 8'h12, INV_ELEMENT = 8'h13, diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv index 82a8785..b270604 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv @@ -185,6 +185,14 @@ always_ff @ (posedge i_clk) begin if (cnt == 0) last_inst_cnt <= 0; task_mul_element(); end + SUB_ELEMENT: begin + if (cnt == 0) last_inst_cnt <= 0; + task_sub_element(); + end + ADD_ELEMENT: begin + if (cnt == 0) last_inst_cnt <= 0; + task_add_element(); + end SEND_INTERRUPT: begin last_inst_cnt <= last_inst_cnt; task_send_interrupt(); @@ -428,6 +436,138 @@ task get_next_inst(); end endtask +task task_sub_element(); + case(cnt) + 0: begin + sub_out_if[2].rdy <= 1; + data_ram_sys_if.a <= curr_inst.a; + data_ram_read[0] <= 1; + cnt <= 1; + end + 1: begin + if (data_ram_read[READ_CYCLE]) begin + sub_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat; + pt_l <= curr_data.pt; + data_ram_sys_if.a <= curr_inst.b; + data_ram_read[0] <= 1; + cnt <= 2; + end + end + 2: begin + if (data_ram_read[READ_CYCLE]) begin + sub_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat; + sub_in_if[2].val <= 1; + end + if (sub_out_if[2].val && sub_out_if[2].rdy) begin + data_ram_sys_if.a <= curr_inst.c; + new_data.dat <= sub_out_if[2].dat; + new_data.pt <= pt_l; + data_ram_sys_if.we <= 1; + cnt <= 5; + if (pt_l == FE2) begin + // FE2 requires extra logic + cnt <= 3; + end + end + end + 3: begin + if (!(|data_ram_read)) begin + data_ram_sys_if.a <= curr_inst.a + 1; + data_ram_read[0] <= 1; + end + if (data_ram_read[READ_CYCLE]) begin + sub_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat; + pt_l <= curr_data.pt; + data_ram_sys_if.a <= curr_inst.b + 1; + data_ram_read[0] <= 1; + cnt <= 4; + end + end + 4: begin + if (data_ram_read[READ_CYCLE]) begin + sub_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat; + sub_in_if[2].val <= 1; + end + if (sub_out_if[2].val && sub_out_if[2].rdy) begin + data_ram_sys_if.a <= curr_inst.c + 1; + new_data.dat <= sub_out_if[2].dat; + new_data.pt <= pt_l; + data_ram_sys_if.we <= 1; + cnt <= 5; + end + end + 5: begin + get_next_inst(); + end + endcase +endtask; + +task task_add_element(); + case(cnt) + 0: begin + add_out_if[2].rdy <= 1; + data_ram_sys_if.a <= curr_inst.a; + data_ram_read[0] <= 1; + cnt <= cnt + 1; + end + 1: begin + if (data_ram_read[READ_CYCLE]) begin + add_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat; + pt_l <= curr_data.pt; + data_ram_sys_if.a <= curr_inst.b; + data_ram_read[0] <= 1; + cnt <= 2; + end + end + 2: begin + if (data_ram_read[READ_CYCLE]) begin + add_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat; + add_in_if[2].val <= 1; + end + if (add_out_if[2].val && add_out_if[2].rdy) begin + data_ram_sys_if.a <= curr_inst.c; + new_data.dat <= add_out_if[2].dat; + new_data.pt <= pt_l; + data_ram_sys_if.we <= 1; + cnt <= 5; + if (pt_l == FE2) begin + // FE2 requires extra logic + cnt <= 3; + end + end + end + 3: begin + if (!(|data_ram_read)) begin + data_ram_sys_if.a <= curr_inst.a + 1; + data_ram_read[0] <= 1; + end + if (data_ram_read[READ_CYCLE]) begin + add_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat; + pt_l <= curr_data.pt; + data_ram_sys_if.a <= curr_inst.b + 1; + data_ram_read[0] <= 1; + cnt <= 4; + end + end + 4: begin + if (data_ram_read[READ_CYCLE]) begin + add_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat; + add_in_if[2].val <= 1; + end + if (add_out_if[2].val && add_out_if[2].rdy) begin + data_ram_sys_if.a <= curr_inst.c + 1; + new_data.dat <= add_out_if[2].dat; + new_data.pt <= pt_l; + data_ram_sys_if.we <= 1; + cnt <= 5; + end + end + 5: begin + get_next_inst(); + end + endcase +endtask; + task task_mul_element(); case(cnt) 0: begin diff --git a/zcash_fpga/src/tb/bls12_381_top_tb.sv b/zcash_fpga/src/tb/bls12_381_top_tb.sv index 3183711..968b3f2 100644 --- a/zcash_fpga/src/tb/bls12_381_top_tb.sv +++ b/zcash_fpga/src/tb/bls12_381_top_tb.sv @@ -246,7 +246,7 @@ task test_inv_element(); inst = '{code:SEND_INTERRUPT, a:16'd8, b:16'h1234, c:16'd0}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+2)*8), .len(8)); - + // Make sure instructions after are NOOP inst = '{code:NOOP_WAIT, a:16'd0, b:16'h0, c:16'd0}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START+ (rdata+3)*8), .len(8)); @@ -291,15 +291,15 @@ task test_inv_element(); if(failed) $fatal(1, "ERROR: test_inv_element on FE element FAILED"); - - + + // Try a FE2 elelemnt in2[0] = random_vector(384/8) % P; in2[1] = random_vector(384/8) % P; - + exp2 = fe2_inv(in2); $display("Trying FE2 element ..."); - + // See what current instruction pointer is axi_lite_if.peek(.addr(32'h10), .data(rdata)); @@ -307,11 +307,11 @@ task test_inv_element(); axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 5*64), .len(48)); data = '{dat:in2[1], pt:FE2}; axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 6*64), .len(48)); - + inst = '{code:SEND_INTERRUPT, a:16'd9, b:16'h5678, c:16'd0}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+1)*8), .len(8)); - + // Write to current slot to start inst = '{code:INV_ELEMENT, a:16'd5, b:16'd9, c:16'd0}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata)*8), .len(8)); @@ -349,15 +349,15 @@ task test_inv_element(); axi_lite_if.peek(.addr(32'h14), .data(rdata)); $display("INFO: Last cycle count was %d", rdata); - + if(failed) $fatal(1, "ERROR: test_inv_element on FE2 element FAILED"); - + $display("INFO: test_inv_element PASSED both FE and FE2 elements!"); endtask; -task test_mul_element(); +task test_mul_add_sub_element(); integer signed get_len; logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat; inst_t inst; @@ -371,29 +371,35 @@ task test_mul_element(); failed = 0; in_a = random_vector(384/8) % P; in_b = random_vector(384/8) % P; - exp = fe_mul(in_a, in_b); - $display("Running test_mul_element..."); + exp = fe_sub(fe_add(fe_mul(in_a, in_b), fe_mul(in_a, in_b)), fe_mul(in_a, in_b)); + $display("Running test_mul_add_sub_element..."); $display("First trying FE element ..."); //Reset the RAM axi_lite_if.poke(.addr(32'h0), .data(2'b11)); - + while(!bls12_381_top.inst_uram_reset.reset_done || !bls12_381_top.data_uram_reset.reset_done) @(posedge clk); - + axi_lite_if.poke(.addr(32'h10), .data(0)); data = '{dat:in_a, pt:FE}; axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 0*64), .len(48)); // Scalar to multiply by goes in data slot 1 data = '{dat:in_b, pt:FE}; axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 11*64), .len(48)); // Scalar to multiply by goes in data slot 1 - - inst = '{code:SEND_INTERRUPT, a:16'd2, b:16'h1111, c:16'd0}; + + inst = '{code:SEND_INTERRUPT, a:16'd6, b:16'h1111, c:16'd0}; + axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 3*8), .len(8)); + + inst = '{code:ADD_ELEMENT, a:16'd2, b:16'd2, c:16'd4}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 1*8), .len(8)); - // Write to current slot to start + inst = '{code:SUB_ELEMENT, a:16'd4, b:16'd2, c:16'd6}; + axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 2*8), .len(8)); + inst = '{code:MUL_ELEMENT, a:16'd0, b:16'd11, c:16'd2}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 0*8), .len(8)); + fork begin out_if.get_stream(get_dat, get_len, 0); @@ -429,17 +435,17 @@ task test_mul_element(); if(failed) $fatal(1, "ERROR: test_mul_element on FE element FAILED"); - - + + // Try a FE2 elelemnt in2_a[0] = random_vector(384/8) % P; in2_a[1] = random_vector(384/8) % P; in2_b[0] = random_vector(384/8) % P; in2_b[1] = random_vector(384/8) % P; - - exp2 = fe2_mul(in2_a, in2_b); + + exp2 = fe2_sub(fe2_add(fe2_mul(in2_a, in2_b), fe2_mul(in2_a, in2_b)), fe2_mul(in2_a, in2_b)); $display("Trying FE2 element ..."); - + // See what current instruction pointer is axi_lite_if.peek(.addr(32'h10), .data(rdata)); @@ -447,15 +453,15 @@ task test_mul_element(); axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 0*64), .len(48)); data = '{dat:in2_a[1], pt:FE2}; axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 1*64), .len(48)); - + data = '{dat:in2_b[0], pt:FE2}; axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 11*64), .len(48)); data = '{dat:in2_b[1], pt:FE2}; axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 12*64), .len(48)); - + // Set instruction pointer back to 0 to start - axi_lite_if.poke(.addr(32'h10), .data(2'b0)); - + axi_lite_if.poke(.addr(32'h10), .data(0)); + fork begin out_if.get_stream(get_dat, get_len, 0); @@ -489,11 +495,11 @@ task test_mul_element(); axi_lite_if.peek(.addr(32'h14), .data(rdata)); $display("INFO: Last cycle count was %d", rdata); - + if(failed) - $fatal(1, "ERROR: test_mul_element on FE2 element FAILED"); - - $display("INFO: test_mul_element PASSED both FE and FE2 elements!"); + $fatal(1, "ERROR: test_mul_add_sub_element on FE2 element FAILED"); + + $display("INFO: test_mul_add_sub_element PASSED both FE and FE2 elements!"); endtask; @@ -509,7 +515,7 @@ initial begin test_fp_fpoint_mult(); test_fp2_fpoint_mult(); test_inv_element(); - test_mul_element(); + test_mul_add_sub_element(); #1us $finish();