Add the pairing engine to bls12_381 top level

This commit is contained in:
bsdevlin 2019-07-26 19:14:51 +08:00
parent 789a33351d
commit 5136bc8fe9
3 changed files with 129 additions and 290 deletions

View File

@ -121,7 +121,9 @@ package bls12_381_pkg;
POINT_MULT = 8'h24,
FP_FPOINT_MULT = 8'h25,
FP2_FPOINT_MULT = 8'h26
FP2_FPOINT_MULT = 8'h26,
ATE_PAIRING = 8'h28
} code_t;
// Instruction format

View File

@ -69,12 +69,13 @@ if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) add_o_if(i_clk)
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) dbl_i_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fp2_jb_point_t))) dbl_o_if(i_clk);
localparam CTL_BITS = 32;
localparam CTL_BITS = 70;
// Access to shared 381bit multiplier / adder / subtractor
// Fp logic uses control bits 7:0
// Fp2 15:8
// Fp6 23:16
// Top level muxes 31:24
// 67:32 Pairing engine - TODO conslidate the logic used here with the point multiplication
if_axi_stream #(.DAT_BITS(2*$bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) mul_in_if [4:0] (i_clk) ;
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) mul_out_if [4:0](i_clk);
if_axi_stream #(.DAT_BITS(2*$bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) add_in_if [4:0] (i_clk);
@ -85,6 +86,13 @@ if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t)), .CTL_BITS(CTL_BITS)) su
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t))) binv_i_if(i_clk);
if_axi_stream #(.DAT_BITS($bits(bls12_381_pkg::fe_t))) binv_o_if(i_clk);
logic pair_i_val, pair_o_rdy;
logic pair_o_val, pair_i_rdy;
bls12_381_pkg::fe12_t pair_o_res;
bls12_381_pkg::af_point_t pair_i_g1;
bls12_381_pkg::fp2_af_point_t pair_i_g2;
logic [31:0] new_inst_pt;
logic new_inst_pt_val, new_inst_pt_val_l;
logic reset_done_inst, reset_done_data;
@ -129,18 +137,17 @@ always_ff @ (posedge i_clk) begin
new_inst_pt_val_l <= 0;
mul_in_if[2].reset_source();
mul_in_if[3].reset_source();
add_in_if[2].reset_source();
add_in_if[3].reset_source();
sub_in_if[2].reset_source();
sub_in_if[3].reset_source();
mul_out_if[2].rdy <= 0;
mul_out_if[3].rdy <= 0;
add_out_if[2].rdy <= 0;
add_out_if[3].rdy <= 0;
sub_out_if[2].rdy <= 0;
sub_out_if[3].rdy <= 0;
pair_i_val <= 0;
pair_i_rdy <= 0;
pair_i_g1 <= 0;
pair_i_g2 <= 0;
end else begin
@ -167,6 +174,7 @@ always_ff @ (posedge i_clk) begin
if (add_in_if[2].val && add_in_if[2].rdy) add_in_if[2].val <= 0;
if (sub_in_if[2].val && sub_in_if[2].rdy) sub_in_if[2].val <= 0;
if (mul_in_if[2].val && mul_in_if[2].rdy) mul_in_if[2].val <= 0;
if (pair_i_val && pair_o_rdy) pair_i_val <= 0;
fp2_pt_mul_out_if.rdy <= 1;
@ -218,6 +226,10 @@ always_ff @ (posedge i_clk) begin
if (cnt == 0) last_inst_cnt <= 0;
task_fp2_fpoint_mult();
end
ATE_PAIRING: begin
if (cnt == 0) last_inst_cnt <= 0;
task_pairing();
end
default: get_next_inst();
endcase
@ -332,6 +344,28 @@ ec_fp2_point_dbl (
.i_sub_if ( sub_out_if[1] )
);
bls12_381_pairing_wrapper #(
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 32 )
)
bls12_381_pairing_wrapper (
.i_clk ( i_clk ),
.i_rst ( i_rst ),
.i_val ( pair_i_val ),
.o_rdy ( pair_o_rdy ),
.i_g1_af ( pair_i_g1 ),
.i_g2_af ( pair_i_g2 ),
.o_val ( pair_o_val ),
.i_rdy ( pair_i_rdy ),
.o_fe12 ( pair_o_res ),
.o_mul_fe_if ( mul_in_if[3] ),
.i_mul_fe_if ( mul_out_if[3] ),
.o_add_fe_if ( add_in_if[3] ),
.i_add_fe_if ( add_out_if[3] ),
.o_sub_fe_if ( sub_in_if[3] ),
.i_sub_fe_if ( sub_out_if[3] )
);
resource_share # (
.NUM_IN ( 4 ),
.DAT_BITS ( 2*$bits(bls12_381_pkg::fe_t) ),
@ -985,6 +1019,68 @@ task task_fp2_fpoint_mult();
endcase
endtask
task task_pairing();
case(cnt) inside
0: begin
pair_i_val <= 0;
pair_i_rdy <= 0;
data_ram_sys_if.a <= curr_inst.a;
data_ram_read[0] <= 1;
cnt <= cnt + 1;
end
// Load G1 affine point
1,2: begin
if (data_ram_read[READ_CYCLE]) begin
data_ram_sys_if.a <= data_ram_sys_if.a + 1;
data_ram_read[0] <= 1;
case(cnt)
1: pair_i_g1.x <= curr_data.dat;
2: pair_i_g1.y <= curr_data.dat;
endcase
cnt <= cnt + 1;
if (cnt == 2) begin
data_ram_sys_if.a <= curr_inst.b;
end
end
end
// Load G2 affine point
3,4,5,6: begin
if (data_ram_read[READ_CYCLE]) begin
data_ram_sys_if.a <= data_ram_sys_if.a + 1;
data_ram_read[0] <= 1;
case(cnt)
3: pair_i_g2.x[0] <= curr_data.dat;
4: pair_i_g2.x[1] <= curr_data.dat;
5: pair_i_g2.y[0] <= curr_data.dat;
6: pair_i_g2.y[1] <= curr_data.dat;
endcase
cnt <= cnt + 1;
if (cnt == 6) begin
data_ram_sys_if.a <= curr_inst.c;
pair_i_val <= 1;
end
end
end
// Wait for result
7,8,9,10,11,12,13,14,15,16,17,18: begin
if (pair_o_val) begin
new_data.pt <= FE12;
new_data.dat <= pair_o_res >> ((cnt-7)*DAT_BITS);
data_ram_sys_if.we <= 1;
if (cnt > 7) data_ram_sys_if.a <= data_ram_sys_if.a + 1;
cnt <= cnt + 1;
if (cnt == 18) begin
pair_i_rdy <= 1;
end
end
end
19: begin
pair_i_rdy <= 0;
get_next_inst();
end
endcase
endtask
task task_send_interrupt();
case(cnt) inside
// Load the data

View File

@ -30,7 +30,7 @@ parameter P = bls12_381_pkg::P;
af_point_t G1 = {Gy, Gx};
fp2_af_point_t G2 = {G2y, G2x};
localparam CTL_BITS = 64;
localparam CTL_BITS = 36;
localparam CLK_PERIOD = 100;
@ -49,144 +49,12 @@ end
if_axi_stream #(.DAT_BYTS(($bits(af_point_t) + $bits(fp2_af_point_t)+7)/8), .CTL_BITS(CTL_BITS)) in_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FE12_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) out_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_in_if[2:0](clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_out_if[2:0](clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_in_if[2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_out_if[2:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_in_if[2:0] (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_out_if[2:0] (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_o_if[2:0](clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe2_i_if[2:0](clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) add_fe2_o_if[2:0](clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) add_fe2_i_if[2:0](clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe2_o_if[2:0](clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe2_i_if[2:0](clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_o_if[2:0](clk);
if_axi_stream #(.DAT_BITS($bits(FE2_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe2_i_if[2:0](clk);
if_axi_stream #(.DAT_BYTS((2*$bits(FE6_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) mul_fe6_o_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FE6_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) mul_fe6_i_if(clk);
if_axi_stream #(.DAT_BYTS((2*$bits(FE6_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) add_fe6_o_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FE6_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) add_fe6_i_if(clk);
if_axi_stream #(.DAT_BYTS((2*$bits(FE6_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) sub_fe6_o_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FE6_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) sub_fe6_i_if(clk);
if_axi_stream #(.DAT_BITS($bits(FE6_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_o_if(clk);
if_axi_stream #(.DAT_BITS($bits(FE6_TYPE)), .CTL_BITS(CTL_BITS)) mnr_fe6_i_if(clk);
if_axi_stream #(.DAT_BYTS((2*$bits(FE12_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) mul_fe12_o_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FE12_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) mul_fe12_i_if(clk);
if_axi_stream #(.DAT_BYTS((2*$bits(FE12_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) add_fe12_o_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FE12_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) add_fe12_i_if(clk);
if_axi_stream #(.DAT_BYTS((2*$bits(FE12_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) sub_fe12_o_if(clk);
if_axi_stream #(.DAT_BYTS(($bits(FE12_TYPE)+7)/8), .CTL_BITS(CTL_BITS)) sub_fe12_i_if(clk);
always_comb begin
add_fe12_o_if.reset_source();
add_fe12_i_if.rdy <= 0;
sub_fe12_o_if.reset_source();
sub_fe12_i_if.rdy <= 0;
end
ec_fe2_arithmetic #(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 0 )
)
ec_fe2_arithmetic (
.i_clk ( clk ),
.i_rst ( rst ),
.i_fp_mode ( 1'd0 ),
.o_mul_fe_if ( mul_fe_in_if[0] ),
.i_mul_fe_if ( mul_fe_out_if[0] ),
.o_add_fe_if ( add_fe_in_if[0] ),
.i_add_fe_if ( add_fe_out_if[0] ),
.o_sub_fe_if ( sub_fe_in_if[0] ),
.i_sub_fe_if ( sub_fe_out_if[0] ),
.o_mul_fe2_if ( mul_fe2_i_if[2] ),
.i_mul_fe2_if ( mul_fe2_o_if[2] ),
.o_add_fe2_if ( add_fe2_i_if[2] ),
.i_add_fe2_if ( add_fe2_o_if[2] ),
.o_sub_fe2_if ( sub_fe2_i_if[2] ),
.i_sub_fe2_if ( sub_fe2_o_if[2] )
);
ec_fe6_arithmetic #(
.FE2_TYPE ( FE2_TYPE ),
.FE6_TYPE ( FE6_TYPE ),
.OVR_WRT_BIT ( 8 ),
.CTL_BITS ( CTL_BITS )
)
ec_fe6_arithmetic (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe2_if ( mul_fe2_o_if[0] ),
.i_mul_fe2_if ( mul_fe2_i_if[0] ),
.o_add_fe2_if ( add_fe2_o_if[0] ),
.i_add_fe2_if ( add_fe2_i_if[0] ),
.o_sub_fe2_if ( sub_fe2_o_if[0] ),
.i_sub_fe2_if ( sub_fe2_i_if[0] ),
.o_mnr_fe2_if ( mnr_fe2_i_if[0] ),
.i_mnr_fe2_if ( mnr_fe2_o_if[0] ),
.o_mul_fe6_if ( mul_fe6_i_if ),
.i_mul_fe6_if ( mul_fe6_o_if ),
.o_add_fe6_if ( add_fe6_i_if ),
.i_add_fe6_if ( add_fe6_o_if ),
.o_sub_fe6_if ( sub_fe6_i_if ),
.i_sub_fe6_if ( sub_fe6_o_if )
);
ec_fe12_arithmetic #(
.FE6_TYPE ( FE6_TYPE ),
.FE12_TYPE ( FE12_TYPE ),
.OVR_WRT_BIT ( 16 ),
.CTL_BITS ( CTL_BITS )
)
ec_fe12_arithmetic (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mul_fe6_if ( mul_fe6_o_if ),
.i_mul_fe6_if ( mul_fe6_i_if ),
.o_add_fe6_if ( add_fe6_o_if ),
.i_add_fe6_if ( add_fe6_i_if ),
.o_sub_fe6_if ( sub_fe6_o_if ),
.i_sub_fe6_if ( sub_fe6_i_if ),
.o_mnr_fe6_if ( mnr_fe6_o_if ),
.i_mnr_fe6_if ( mnr_fe6_i_if ),
.o_mul_fe12_if ( mul_fe12_i_if ),
.i_mul_fe12_if ( mul_fe12_o_if ),
.o_add_fe12_if ( add_fe12_i_if ),
.i_add_fe12_if ( add_fe12_o_if ),
.o_sub_fe12_if ( sub_fe12_i_if ),
.i_sub_fe12_if ( sub_fe12_o_if )
);
fe2_mul_by_nonresidue #(
.FE_TYPE ( FE_TYPE )
)
fe2_mul_by_nonresidue (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mnr_fe2_if ( mnr_fe2_o_if[2] ),
.i_mnr_fe2_if ( mnr_fe2_i_if[2] ),
.o_add_fe_if ( add_fe_in_if[1] ),
.i_add_fe_if ( add_fe_out_if[1] ),
.o_sub_fe_if ( sub_fe_in_if[1] ),
.i_sub_fe_if ( sub_fe_out_if[1] )
);
fe6_mul_by_nonresidue #(
.FE2_TYPE ( FE2_TYPE )
)
fe6_mul_by_nonresidue (
.i_clk ( clk ),
.i_rst ( rst ),
.o_mnr_fe6_if ( mnr_fe6_i_if ),
.i_mnr_fe6_if ( mnr_fe6_o_if ),
.o_mnr_fe2_if ( mnr_fe2_i_if[1] ),
.i_mnr_fe2_if ( mnr_fe2_o_if[1] )
);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_o_if(clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) mul_fe_i_if(clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) add_fe_i_if (clk);
if_axi_stream #(.DAT_BITS(2*$bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_o_if (clk);
if_axi_stream #(.DAT_BITS($bits(FE_TYPE)), .CTL_BITS(CTL_BITS)) sub_fe_i_if (clk);
ec_fp_mult_mod #(
.P ( P ),
@ -196,8 +64,8 @@ ec_fp_mult_mod #(
ec_fp_mult_mod (
.i_clk( clk ),
.i_rst( rst ),
.i_mul ( mul_fe_in_if[2] ),
.o_mul ( mul_fe_out_if[2] )
.i_mul ( mul_fe_o_if ),
.o_mul ( mul_fe_i_if )
);
adder_pipe # (
@ -209,8 +77,8 @@ adder_pipe # (
adder_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_add ( add_fe_in_if[2] ),
.o_add ( add_fe_out_if[2] )
.i_add ( add_fe_o_if ),
.o_add ( add_fe_i_if )
);
subtractor_pipe # (
@ -222,138 +90,15 @@ subtractor_pipe # (
subtractor_pipe (
.i_clk ( clk ),
.i_rst ( rst ),
.i_sub ( sub_fe_in_if[2] ),
.o_sub ( sub_fe_out_if[2] )
.i_sub ( sub_fe_o_if ),
.o_sub ( sub_fe_i_if )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 44 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
bls12_381_pairing_wrapper #(
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 0 )
)
resource_share_sub (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( sub_fe_in_if[1:0] ),
.o_res ( sub_fe_in_if[2] ),
.i_res ( sub_fe_out_if[2] ),
.o_axi ( sub_fe_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 44 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_add (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( add_fe_in_if[1:0] ),
.o_res ( add_fe_in_if[2] ),
.i_res ( add_fe_out_if[2] ),
.o_axi ( add_fe_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 44 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_mul (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( mul_fe_in_if[1:0] ),
.o_res ( mul_fe_in_if[2] ),
.i_res ( mul_fe_out_if[2] ),
.o_axi ( mul_fe_out_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( $bits(FE2_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 42 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_mnr (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( mnr_fe2_i_if[1:0] ),
.o_res ( mnr_fe2_i_if[2] ),
.i_res ( mnr_fe2_o_if[2] ),
.o_axi ( mnr_fe2_o_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE2_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 40 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe2_add (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( add_fe2_o_if[1:0] ),
.o_res ( add_fe2_o_if[2] ),
.i_res ( add_fe2_i_if[2] ),
.o_axi ( add_fe2_i_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE2_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 40 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe2_sub (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( sub_fe2_o_if[1:0] ),
.o_res ( sub_fe2_o_if[2] ),
.i_res ( sub_fe2_i_if[2] ),
.o_axi ( sub_fe2_i_if[1:0] )
);
resource_share # (
.NUM_IN ( 2 ),
.DAT_BITS ( 2*$bits(FE2_TYPE) ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 40 ),
.PIPELINE_IN ( 1 ),
.PIPELINE_OUT ( 1 )
)
resource_share_fe2_mul (
.i_clk ( clk ),
.i_rst ( rst ),
.i_axi ( mul_fe2_o_if[1:0] ),
.o_res ( mul_fe2_o_if[2] ),
.i_res ( mul_fe2_i_if[2] ),
.o_axi ( mul_fe2_i_if[1:0] )
);
bls12_381_pairing #(
.FE_TYPE ( FE_TYPE ),
.FE2_TYPE ( FE2_TYPE ),
.FE12_TYPE ( FE12_TYPE ),
.CTL_BITS ( CTL_BITS ),
.OVR_WRT_BIT ( 24 )
)
bls12_381_pairing (
bls12_381_pairing_wrapper (
.i_clk ( clk ),
.i_rst ( rst ),
.i_val ( in_if.val ),
@ -363,16 +108,12 @@ bls12_381_pairing (
.o_val ( out_if.val ),
.i_rdy ( out_if.rdy ),
.o_fe12 ( out_if.dat ),
.o_mul_fe2_if ( mul_fe2_o_if[1] ),
.i_mul_fe2_if ( mul_fe2_i_if[1] ),
.o_add_fe2_if ( add_fe2_o_if[1] ),
.i_add_fe2_if ( add_fe2_i_if[1] ),
.o_sub_fe2_if ( sub_fe2_o_if[1] ),
.i_sub_fe2_if ( sub_fe2_i_if[1] ),
.o_mul_fe12_if ( mul_fe12_o_if ),
.i_mul_fe12_if ( mul_fe12_i_if ),
.o_mul_fe_if ( mul_fe_in_if[1] ),
.i_mul_fe_if ( mul_fe_out_if[1] )
.o_mul_fe_if ( mul_fe_o_if ),
.i_mul_fe_if ( mul_fe_i_if ),
.o_add_fe_if ( add_fe_o_if ),
.i_add_fe_if ( add_fe_i_if ),
.o_sub_fe_if ( sub_fe_o_if ),
.i_sub_fe_if ( sub_fe_i_if )
);
always_comb begin