Updated to add ADD and SUB functions, memory map fixes, and test bench

updates
This commit is contained in:
bsdevlin 2019-06-30 22:25:49 +08:00
parent ac887e3b49
commit cb18f7446e
5 changed files with 234 additions and 33 deletions

View File

@ -234,9 +234,13 @@ endtask;
task test_bls12_381();
// Try writing and reading a slot
logic [1024*8-1:0] dat = 0;
logic failed = 0;
logic [31:0] rdata;
bls12_381_pkg::data_t slot_data;
bls12_381_pkg::inst_t inst;
bls12_381_interrupt_rpl_t interrupt_rpl;
fp2_jb_point_t out_p, exp_p;
logic [380:0] in_k = 381'h33333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333
// Make sure we aren't in reset
while(!tb.card.fpga.CL.zcash_fpga_top.bls12_381_top.inst_uram_reset.reset_done ||
@ -274,6 +278,55 @@ task test_bls12_381();
$display("INFO: Wrote: 0x%x", inst);
assert(dat[2*8-1:0] == inst) else $fatal(1, "ERROR: Writing to slot and reading gave wrong results!");
slot_data = '{dat:in_k, pt:SCALAR};
for(int i = 0; i < 48; i = i + 4)
write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::DATA_AXIL_START + 3*64 + i), .data(slot_data[i*8 +: 32]));
inst = '{code:SEND_INTERRUPT, a:16'd0, b:16'habcd, c:16'd0};
for(int i = 0; i < 8; i = i + 4)
write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::INST_AXIL_START + 1*8 + i), .data(inst[i*8 +: 32]));
// Write to current slot to start
inst = '{code:FP2_FPOINT_MULT, a:16'd3, b:16'd0, c:16'd0};
for(int i = 0; i < 8; i = i + 4)
write_ocl_reg(.addr(`ZCASH_OFFSET + bls12_381_pkg::INST_AXIL_START + 0*8 + i), .data(inst[i*8 +: 32]));
fork
begin
while(stream_len == 0) read_stream(.data(stream_data), .len(stream_len));
interrupt_rpl = stream_data;
assert(interrupt_rpl.hdr.cmd == BLS12_381_INTERRUPT_RPL) else $fatal(1, "ERROR: Received non-interrupt message");
assert(interrupt_rpl.index == 16'habcd) else $fatal(1, "ERROR: Received wrong index value in message");
assert(interrupt_rpl.data_type == FP2_JB) else $fatal(1, "ERROR: Received wrong data type value in message");
stream_data = stream_data >> $bits(bls12_381_interrupt_rpl_t);
for (int i = 0; i < 6; i++)
out_p[i*381 +: 381] = stream_data[i*(48*8) +: 381];
if (out_p == exp_p) begin
$display("INFO: Output point matched expected:");
print_fp2_jb_point(out_p);
end else begin
$display("ERROR: Output point did NOT match expected:");
print_fp2_jb_point(out_p);
$display("Expected:");
print_fp2_jb_point(exp_p);
failed = 1;
end
end
begin
repeat(10000) @(posedge tb.card.fpga.clk_main_a0);
$fatal(1, "ERROR: No reply received from test_bls12_381");
end
join_any
disable fork;
if(failed) $fatal(1, "ERROR: Test FAILED test_bls12_381");
$display("test_bls12_381 PASSED");
endtask;

View File

@ -95,11 +95,11 @@ always_ff @ (posedge i_clk) begin
// Read requests
if (inst_ram_read[READ_CYCLE]) begin
axi_lite_if.rdata <= inst_ram_if.q;
axi_lite_if.rdata <= inst_ram_if.q >> ((axi_lite_if.araddr - INST_AXIL_START) % INST_RAM_ALIGN_BYTE)*8;
axi_lite_if.rvalid <= 1;
end
if (data_ram_read[READ_CYCLE]) begin
axi_lite_if.rdata <= data_ram_if.q;
axi_lite_if.rdata <= data_ram_if.q >> ((axi_lite_if.araddr - DATA_AXIL_START) % DATA_RAM_ALIGN_BYTE)*8;
axi_lite_if.rvalid <= 1;
end

View File

@ -90,6 +90,8 @@ package bls12_381_pkg;
COPY_REG = 8'h1,
SEND_INTERRUPT = 8'h6,
SUB_ELEMENT = 8'h10,
ADD_ELEMENT = 8'h11,
MUL_ELEMENT = 8'h12,
INV_ELEMENT = 8'h13,

View File

@ -185,6 +185,14 @@ always_ff @ (posedge i_clk) begin
if (cnt == 0) last_inst_cnt <= 0;
task_mul_element();
end
SUB_ELEMENT: begin
if (cnt == 0) last_inst_cnt <= 0;
task_sub_element();
end
ADD_ELEMENT: begin
if (cnt == 0) last_inst_cnt <= 0;
task_add_element();
end
SEND_INTERRUPT: begin
last_inst_cnt <= last_inst_cnt;
task_send_interrupt();
@ -428,6 +436,138 @@ task get_next_inst();
end
endtask
task task_sub_element();
case(cnt)
0: begin
sub_out_if[2].rdy <= 1;
data_ram_sys_if.a <= curr_inst.a;
data_ram_read[0] <= 1;
cnt <= 1;
end
1: begin
if (data_ram_read[READ_CYCLE]) begin
sub_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat;
pt_l <= curr_data.pt;
data_ram_sys_if.a <= curr_inst.b;
data_ram_read[0] <= 1;
cnt <= 2;
end
end
2: begin
if (data_ram_read[READ_CYCLE]) begin
sub_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat;
sub_in_if[2].val <= 1;
end
if (sub_out_if[2].val && sub_out_if[2].rdy) begin
data_ram_sys_if.a <= curr_inst.c;
new_data.dat <= sub_out_if[2].dat;
new_data.pt <= pt_l;
data_ram_sys_if.we <= 1;
cnt <= 5;
if (pt_l == FE2) begin
// FE2 requires extra logic
cnt <= 3;
end
end
end
3: begin
if (!(|data_ram_read)) begin
data_ram_sys_if.a <= curr_inst.a + 1;
data_ram_read[0] <= 1;
end
if (data_ram_read[READ_CYCLE]) begin
sub_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat;
pt_l <= curr_data.pt;
data_ram_sys_if.a <= curr_inst.b + 1;
data_ram_read[0] <= 1;
cnt <= 4;
end
end
4: begin
if (data_ram_read[READ_CYCLE]) begin
sub_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat;
sub_in_if[2].val <= 1;
end
if (sub_out_if[2].val && sub_out_if[2].rdy) begin
data_ram_sys_if.a <= curr_inst.c + 1;
new_data.dat <= sub_out_if[2].dat;
new_data.pt <= pt_l;
data_ram_sys_if.we <= 1;
cnt <= 5;
end
end
5: begin
get_next_inst();
end
endcase
endtask;
task task_add_element();
case(cnt)
0: begin
add_out_if[2].rdy <= 1;
data_ram_sys_if.a <= curr_inst.a;
data_ram_read[0] <= 1;
cnt <= cnt + 1;
end
1: begin
if (data_ram_read[READ_CYCLE]) begin
add_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat;
pt_l <= curr_data.pt;
data_ram_sys_if.a <= curr_inst.b;
data_ram_read[0] <= 1;
cnt <= 2;
end
end
2: begin
if (data_ram_read[READ_CYCLE]) begin
add_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat;
add_in_if[2].val <= 1;
end
if (add_out_if[2].val && add_out_if[2].rdy) begin
data_ram_sys_if.a <= curr_inst.c;
new_data.dat <= add_out_if[2].dat;
new_data.pt <= pt_l;
data_ram_sys_if.we <= 1;
cnt <= 5;
if (pt_l == FE2) begin
// FE2 requires extra logic
cnt <= 3;
end
end
end
3: begin
if (!(|data_ram_read)) begin
data_ram_sys_if.a <= curr_inst.a + 1;
data_ram_read[0] <= 1;
end
if (data_ram_read[READ_CYCLE]) begin
add_in_if[2].dat[0 +: $bits(fe_t)] <= curr_data.dat;
pt_l <= curr_data.pt;
data_ram_sys_if.a <= curr_inst.b + 1;
data_ram_read[0] <= 1;
cnt <= 4;
end
end
4: begin
if (data_ram_read[READ_CYCLE]) begin
add_in_if[2].dat[$bits(fe_t) +: $bits(fe_t)] <= curr_data.dat;
add_in_if[2].val <= 1;
end
if (add_out_if[2].val && add_out_if[2].rdy) begin
data_ram_sys_if.a <= curr_inst.c + 1;
new_data.dat <= add_out_if[2].dat;
new_data.pt <= pt_l;
data_ram_sys_if.we <= 1;
cnt <= 5;
end
end
5: begin
get_next_inst();
end
endcase
endtask;
task task_mul_element();
case(cnt)
0: begin

View File

@ -246,7 +246,7 @@ task test_inv_element();
inst = '{code:SEND_INTERRUPT, a:16'd8, b:16'h1234, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+2)*8), .len(8));
// Make sure instructions after are NOOP
inst = '{code:NOOP_WAIT, a:16'd0, b:16'h0, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START+ (rdata+3)*8), .len(8));
@ -291,15 +291,15 @@ task test_inv_element();
if(failed)
$fatal(1, "ERROR: test_inv_element on FE element FAILED");
// Try a FE2 elelemnt
in2[0] = random_vector(384/8) % P;
in2[1] = random_vector(384/8) % P;
exp2 = fe2_inv(in2);
$display("Trying FE2 element ...");
// See what current instruction pointer is
axi_lite_if.peek(.addr(32'h10), .data(rdata));
@ -307,11 +307,11 @@ task test_inv_element();
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 5*64), .len(48));
data = '{dat:in2[1], pt:FE2};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 6*64), .len(48));
inst = '{code:SEND_INTERRUPT, a:16'd9, b:16'h5678, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata+1)*8), .len(8));
// Write to current slot to start
inst = '{code:INV_ELEMENT, a:16'd5, b:16'd9, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + (rdata)*8), .len(8));
@ -349,15 +349,15 @@ task test_inv_element();
axi_lite_if.peek(.addr(32'h14), .data(rdata));
$display("INFO: Last cycle count was %d", rdata);
if(failed)
$fatal(1, "ERROR: test_inv_element on FE2 element FAILED");
$display("INFO: test_inv_element PASSED both FE and FE2 elements!");
endtask;
task test_mul_element();
task test_mul_add_sub_element();
integer signed get_len;
logic [common_pkg::MAX_SIM_BYTS*8-1:0] get_dat;
inst_t inst;
@ -371,29 +371,35 @@ task test_mul_element();
failed = 0;
in_a = random_vector(384/8) % P;
in_b = random_vector(384/8) % P;
exp = fe_mul(in_a, in_b);
$display("Running test_mul_element...");
exp = fe_sub(fe_add(fe_mul(in_a, in_b), fe_mul(in_a, in_b)), fe_mul(in_a, in_b));
$display("Running test_mul_add_sub_element...");
$display("First trying FE element ...");
//Reset the RAM
axi_lite_if.poke(.addr(32'h0), .data(2'b11));
while(!bls12_381_top.inst_uram_reset.reset_done ||
!bls12_381_top.data_uram_reset.reset_done) @(posedge clk);
axi_lite_if.poke(.addr(32'h10), .data(0));
data = '{dat:in_a, pt:FE};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 0*64), .len(48)); // Scalar to multiply by goes in data slot 1
data = '{dat:in_b, pt:FE};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 11*64), .len(48)); // Scalar to multiply by goes in data slot 1
inst = '{code:SEND_INTERRUPT, a:16'd2, b:16'h1111, c:16'd0};
inst = '{code:SEND_INTERRUPT, a:16'd6, b:16'h1111, c:16'd0};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 3*8), .len(8));
inst = '{code:ADD_ELEMENT, a:16'd2, b:16'd2, c:16'd4};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 1*8), .len(8));
// Write to current slot to start
inst = '{code:SUB_ELEMENT, a:16'd4, b:16'd2, c:16'd6};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 2*8), .len(8));
inst = '{code:MUL_ELEMENT, a:16'd0, b:16'd11, c:16'd2};
axi_lite_if.put_data_multiple(.data(inst), .addr(INST_AXIL_START + 0*8), .len(8));
fork
begin
out_if.get_stream(get_dat, get_len, 0);
@ -429,17 +435,17 @@ task test_mul_element();
if(failed)
$fatal(1, "ERROR: test_mul_element on FE element FAILED");
// Try a FE2 elelemnt
in2_a[0] = random_vector(384/8) % P;
in2_a[1] = random_vector(384/8) % P;
in2_b[0] = random_vector(384/8) % P;
in2_b[1] = random_vector(384/8) % P;
exp2 = fe2_mul(in2_a, in2_b);
exp2 = fe2_sub(fe2_add(fe2_mul(in2_a, in2_b), fe2_mul(in2_a, in2_b)), fe2_mul(in2_a, in2_b));
$display("Trying FE2 element ...");
// See what current instruction pointer is
axi_lite_if.peek(.addr(32'h10), .data(rdata));
@ -447,15 +453,15 @@ task test_mul_element();
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 0*64), .len(48));
data = '{dat:in2_a[1], pt:FE2};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 1*64), .len(48));
data = '{dat:in2_b[0], pt:FE2};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 11*64), .len(48));
data = '{dat:in2_b[1], pt:FE2};
axi_lite_if.put_data_multiple(.data(data), .addr(DATA_AXIL_START + 12*64), .len(48));
// Set instruction pointer back to 0 to start
axi_lite_if.poke(.addr(32'h10), .data(2'b0));
axi_lite_if.poke(.addr(32'h10), .data(0));
fork
begin
out_if.get_stream(get_dat, get_len, 0);
@ -489,11 +495,11 @@ task test_mul_element();
axi_lite_if.peek(.addr(32'h14), .data(rdata));
$display("INFO: Last cycle count was %d", rdata);
if(failed)
$fatal(1, "ERROR: test_mul_element on FE2 element FAILED");
$display("INFO: test_mul_element PASSED both FE and FE2 elements!");
$fatal(1, "ERROR: test_mul_add_sub_element on FE2 element FAILED");
$display("INFO: test_mul_add_sub_element PASSED both FE and FE2 elements!");
endtask;
@ -509,7 +515,7 @@ initial begin
test_fp_fpoint_mult();
test_fp2_fpoint_mult();
test_inv_element();
test_mul_element();
test_mul_add_sub_element();
#1us $finish();