diff --git a/aws/cl_zcash/software/runtime/zcash_fpga.cpp b/aws/cl_zcash/software/runtime/zcash_fpga.cpp index 3fa98ac..220a3f9 100644 --- a/aws/cl_zcash/software/runtime/zcash_fpga.cpp +++ b/aws/cl_zcash/software/runtime/zcash_fpga.cpp @@ -310,7 +310,7 @@ int zcash_fpga::bls12_381_read_data_slot(unsigned int id, bls12_381_slot_t& slot fail_on(rc, out, "Unable to read from FPGA!\n"); } - slot_data.point_type = (point_type_t)(*((uint8_t*)&slot_data + 47) >> 5); + slot_data.point_type = (point_type_t)(*((char*)&slot_data + 47) >> 5); // Clear top 3 bits *((char*)&slot_data + 47) &= 0x1F; diff --git a/aws/cl_zcash/verif/scripts/Makefile b/aws/cl_zcash/verif/scripts/Makefile index fbf7565..6008e58 100644 --- a/aws/cl_zcash/verif/scripts/Makefile +++ b/aws/cl_zcash/verif/scripts/Makefile @@ -20,7 +20,7 @@ $(info VIVADO_TOOL_VERSION = $(VIVADO_TOOL_VERSION)) endif export TEST ?= test_zcash -export C_TEST ?= test_zcash +export C_TEST ?= test_null export CL_ROOT = $(PWD)/../.. export SDK_DIR = $(AWS_FPGA_REPO_DIR)/sdk diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv index 055aa0b..ed19509 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_pkg.sv @@ -37,7 +37,7 @@ package bls12_381_pkg; FP2_AF = 6, FP2_JB = 7 } point_type_t; - + function integer unsigned get_point_type_size(point_type_t pt); case(pt) SCALAR: get_point_type_size = 1; @@ -58,7 +58,7 @@ package bls12_381_pkg; fe_t y; fe_t x; } jb_point_t; - + typedef fe_t [1:0] fe2_t; typedef fe2_t [2:0] fe6_t; typedef fe6_t [1:0] fe12_t; @@ -81,7 +81,7 @@ package bls12_381_pkg; } fp2_jb_point_t; fp2_jb_point_t g2_point = '{x:G2x, y:G2y, z:FE2_one}; - + fp2_jb_point_t g_point_fp2 = '{x:{381'd0, Gx}, y:{381'd0, Gy}, z:FE2_one}; // Fp Generator point used in dual mode point multiplication // Instruction codes @@ -90,7 +90,8 @@ package bls12_381_pkg; COPY_REG = 8'h1, SEND_INTERRUPT = 8'h6, - SCALAR_INV = 8'h13, + MUL_ELEMENT = 8'h12, + INV_ELEMENT = 8'h13, POINT_MULT = 8'h24, FP_FPOINT_MULT = 8'h25, diff --git a/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv b/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv index 8f189e5..98532f4 100644 --- a/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv +++ b/zcash_fpga/src/rtl/bls12_381/bls12_381_top.sv @@ -181,6 +181,10 @@ always_ff @ (posedge i_clk) begin if (cnt == 0) last_inst_cnt <= 0; task_inv_element(); end + MUL_ELEMENT: begin + if (cnt == 0) last_inst_cnt <= 0; + task_mul_element(); + end SEND_INTERRUPT: begin last_inst_cnt <= last_inst_cnt; task_send_interrupt(); @@ -424,6 +428,115 @@ task get_next_inst(); end endtask +task task_mul_element(); + case(cnt) + 0: begin + mul_out_if[2].rdy <= 1; + data_ram_sys_if.a <= curr_inst.a; + data_ram_read[0] <= 1; + cnt <= cnt + 1; + end + 1: begin + if (data_ram_read[READ_CYCLE]) begin + mul_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat; + pt_l <= curr_data.pt; + data_ram_sys_if.a <= curr_inst.b; + data_ram_read[0] <= 1; + cnt <= 2; + end + end + 2: begin + if (data_ram_read[READ_CYCLE]) begin + mul_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat; + mul_in_if[2].val <= 1; + mul_in_if[2].ctl <= 0; + if (pt_l == FE2) begin + data_ram_sys_if.a <= curr_inst.a + 1; + data_ram_read[0] <= 1; + mul_in_if[2].rdy <= 0; + // FE2 requires extra logic + cnt <= 3; + end + end + if (mul_out_if[2].val && mul_out_if[2].rdy) begin + data_ram_sys_if.a <= curr_inst.c; + new_data.dat <= mul_out_if[2].dat; + new_data.pt <= pt_l; + data_ram_sys_if.we <= 1; + cnt <= 8; + end + end + 3: begin + if (data_ram_read[READ_CYCLE]) begin + mul_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat; + mul_in_if[2].val <= 1; + mul_in_if[2].ctl <= 3; + data_ram_sys_if.a <= curr_inst.b + 1; + data_ram_read[0] <= 1; + cnt <= 4; + end + end + 4: begin + if (data_ram_read[READ_CYCLE]) begin + mul_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat; + mul_in_if[2].val <= 1; + mul_in_if[2].ctl <= 1; + data_ram_sys_if.a <= curr_inst.a; + data_ram_read[0] <= 1; + cnt <= 5; + end + end + 5: begin + if (data_ram_read[READ_CYCLE]) begin + mul_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat; + mul_in_if[2].val <= 1; + mul_in_if[2].ctl <= 2; + mul_out_if[2].rdy <= 1; + cnt <= 6; + end + end + 6: begin + sub_out_if[2].rdy <= 1; + if (mul_out_if[2].val && mul_out_if[2].rdy) begin + case(mul_out_if[2].ctl) + 0: begin + sub_in_if[2].dat[0 +: $bits(fe_t)] <= mul_out_if[2].dat; + end + 1: begin + sub_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= mul_out_if[2].dat; + sub_in_if[2].val <= 1; + end + 2: begin + add_in_if[2].dat[0 +: $bits(fe_t)] <= mul_out_if[2].dat; + add_in_if[2].val <= 1; + end + 3: begin + add_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= mul_out_if[2].dat; + end + endcase + end + + if (sub_out_if[2].val && sub_out_if[2].rdy) begin + new_data.dat <= sub_out_if[2].dat; + new_data.pt <= pt_l; + data_ram_sys_if.we <= 1; + data_ram_sys_if.a <= curr_inst.c; + add_out_if[2].rdy <= 1; + end + if (add_out_if[2].val && add_out_if[2].rdy) begin + new_data.dat <= add_out_if[2].dat; + new_data.pt <= pt_l; + data_ram_sys_if.we <= 1; + data_ram_sys_if.a <= curr_inst.c + 1; + cnt <= 8; + end + end + 8: begin + get_next_inst(); + end + endcase +endtask; + task task_copy_reg(); case(cnt) 0: begin diff --git a/zcash_fpga/src/rtl/top/zcash_fpga_pkg.sv b/zcash_fpga/src/rtl/top/zcash_fpga_pkg.sv index d93cfa7..857724a 100644 --- a/zcash_fpga/src/rtl/top/zcash_fpga_pkg.sv +++ b/zcash_fpga/src/rtl/top/zcash_fpga_pkg.sv @@ -27,7 +27,7 @@ package zcash_fpga_pkg; import bls12_381_pkg::point_type_t; - parameter FPGA_VERSION = 32'h01_01_00; //v1.1.0 + parameter FPGA_VERSION = 32'h01_01_02; //v1.1.2 // What features are enabled in this build parameter bit ENB_VERIFY_SECP256K1_SIG = 1; diff --git a/zcash_fpga/src/tb/bls12_381_top_tb.sv b/zcash_fpga/src/tb/bls12_381_top_tb.sv index fd52e17..3183711 100644 --- a/zcash_fpga/src/tb/bls12_381_top_tb.sv +++ b/zcash_fpga/src/tb/bls12_381_top_tb.sv @@ -236,6 +236,7 @@ task test_inv_element(); $display("First trying FE element ..."); // See what current instruction pointer is axi_lite_if.peek(.addr(32'h10), .data(rdata)); + $display("Current pointer at %d", rdata); data = '{dat:in, pt:FE}; axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 5*64), .len(48)); // Scalar to multiply by goes in data slot 1 @@ -245,6 +246,11 @@ task test_inv_element(); inst = '{code:SEND_INTERRUPT, a:16'd8, b:16'h1234, c:16'd0}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+2)*8), .len(8)); + + // Make sure instructions after are NOOP + inst = '{code:NOOP_WAIT, a:16'd0, b:16'h0, c:16'd0}; + axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START+ (rdata+3)*8), .len(8)); + axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START+ (rdata+4)*8), .len(8)); // Write to current slot to start inst = '{code:INV_ELEMENT, a:16'd5, b:16'd6, c:16'd0}; @@ -305,7 +311,7 @@ task test_inv_element(); inst = '{code:SEND_INTERRUPT, a:16'd9, b:16'h5678, c:16'd0}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+1)*8), .len(8)); - + // Write to current slot to start inst = '{code:INV_ELEMENT, a:16'd5, b:16'd9, c:16'd0}; axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata)*8), .len(8)); @@ -345,12 +351,152 @@ task test_inv_element(); $display("INFO: Last cycle count was %d", rdata); if(failed) - $fatal(1, "ERROR: test_inv_element on FE element FAILED"); + $fatal(1, "ERROR: test_inv_element on FE2 element FAILED"); $display("INFO: test_inv_element PASSED both FE and FE2 elements!"); endtask; +task test_mul_element(); + integer signed get_len; + logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat; + inst_t inst; + logic failed; + data_t data; + logic [31:0] rdata; + fe_t in_a, in_b, exp, out; + fe2_t in2_a, in2_b, exp2, out2; + bls12_381_interrupt_rpl_t interrupt_rpl; + + failed = 0; + in_a = random_vector(384/8) % P; + in_b = random_vector(384/8) % P; + exp = fe_mul(in_a, in_b); + $display("Running test_mul_element..."); + $display("First trying FE element ..."); + //Reset the RAM + axi_lite_if.poke(.addr(32'h0), .data(2'b11)); + + while(!bls12_381_top.inst_uram_reset.reset_done || + !bls12_381_top.data_uram_reset.reset_done) @(posedge clk); + + axi_lite_if.poke(.addr(32'h10), .data(0)); + + data = '{dat:in_a, pt:FE}; + axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 0*64), .len(48)); // Scalar to multiply by goes in data slot 1 + data = '{dat:in_b, pt:FE}; + axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 11*64), .len(48)); // Scalar to multiply by goes in data slot 1 + + inst = '{code:SEND_INTERRUPT, a:16'd2, b:16'h1111, c:16'd0}; + axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 1*8), .len(8)); + + // Write to current slot to start + inst = '{code:MUL_ELEMENT, a:16'd0, b:16'd11, c:16'd2}; + axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 0*8), .len(8)); + + fork + begin + out_if.get_stream(get_dat, get_len, 0); + interrupt_rpl = get_dat; + + assert(interrupt_rpl.hdr.cmd == BLS12_381_INTERRUPT_RPL) else $fatal(1, "ERROR: Received non-interrupt message"); + assert(interrupt_rpl.index == 16'h1111) else $fatal(1, "ERROR: Received wrong index value in message"); + assert(interrupt_rpl.data_type == FE) else $fatal(1, "ERROR: Received wrong data type value in message"); + + get_dat = get_dat >> $bits(bls12_381_interrupt_rpl_t); + out = get_dat; + + if (out == exp) begin + $display("INFO: Output element matched expected:"); + $display("0x%x", out); + end else begin + $display("ERROR: Output element did NOT match expected:"); + $display("0x%x", out); + $display("Expected:"); + $display("0x%x", exp); + failed = 1; + end + end + begin + repeat(100000) @(posedge out_if.i_clk); + $fatal("ERROR: Timeout while waiting for result"); + end + join_any + disable fork; + + axi_lite_if.peek(.addr(32'h14), .data(rdata)); + $display("INFO: Last cycle count was %d", rdata); + + if(failed) + $fatal(1, "ERROR: test_mul_element on FE element FAILED"); + + + // Try a FE2 elelemnt + in2_a[0] = random_vector(384/8) % P; + in2_a[1] = random_vector(384/8) % P; + in2_b[0] = random_vector(384/8) % P; + in2_b[1] = random_vector(384/8) % P; + + exp2 = fe2_mul(in2_a, in2_b); + $display("Trying FE2 element ..."); + + // See what current instruction pointer is + axi_lite_if.peek(.addr(32'h10), .data(rdata)); + + data = '{dat:in2_a[0], pt:FE2}; + axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 0*64), .len(48)); + data = '{dat:in2_a[1], pt:FE2}; + axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 1*64), .len(48)); + + data = '{dat:in2_b[0], pt:FE2}; + axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 11*64), .len(48)); + data = '{dat:in2_b[1], pt:FE2}; + axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 12*64), .len(48)); + + // Set instruction pointer back to 0 to start + axi_lite_if.poke(.addr(32'h10), .data(2'b0)); + + fork + begin + out_if.get_stream(get_dat, get_len, 0); + interrupt_rpl = get_dat; + + assert(interrupt_rpl.hdr.cmd == BLS12_381_INTERRUPT_RPL) else $fatal(1, "ERROR: Received non-interrupt message"); + assert(interrupt_rpl.index == 16'h1111) else $fatal(1, "ERROR: Received wrong index value in message"); + assert(interrupt_rpl.data_type == FE2) else $fatal(1, "ERROR: Received wrong data type value in message"); + + get_dat = get_dat >> $bits(bls12_381_interrupt_rpl_t); + for (int i = 0; i < 2; i++) + out2[i] = get_dat[i*(48*8) +: 381]; + + if (out2 == exp2) begin + $display("INFO: Output element matched expected:"); + $display("0x%x", out2); + end else begin + $display("ERROR: Output element did NOT match expected:"); + $display("0x%x 0x%x", out2[1], out2[0]); + $display("Expected:"); + $display("0x%x 0x%x", exp2[1], exp2[0]); + failed = 1; + end + end + begin + repeat(100000) @(posedge out_if.i_clk); + $fatal("ERROR: Timeout while waiting for result"); + end + join_any + disable fork; + + axi_lite_if.peek(.addr(32'h14), .data(rdata)); + $display("INFO: Last cycle count was %d", rdata); + + if(failed) + $fatal(1, "ERROR: test_mul_element on FE2 element FAILED"); + + $display("INFO: test_mul_element PASSED both FE and FE2 elements!"); + +endtask; + initial begin axi_lite_if.reset_source(); out_if.rdy = 0; @@ -360,9 +506,10 @@ initial begin !bls12_381_top.data_uram_reset.reset_done) @(posedge clk); - //test_fp_fpoint_mult(); - //test_fp2_fpoint_mult(); + test_fp_fpoint_mult(); + test_fp2_fpoint_mult(); test_inv_element(); + test_mul_element(); #1us $finish();