Add FP16ALT support to THMULTI DivSqrt (#12)

* Add FP16ALT support to THMULTI DivSqrt
pulp-platform · Jun 26, 2024 · de4f932 · de4f932
1 parent 7c7d9b6
commit de4f932
Show file tree

Hide file tree

Showing 15 changed files with 1,811 additions and 91 deletions.
diff --git a/docs/CHANGELOG-PULP.md b/docs/CHANGELOG-PULP.md
@@ -7,6 +7,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
 In this sense, we interpret the "Public API" of a hardware module as its port/parameter list.
 Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility.
 
+## [pulp-v0.2.2] - 2024-06-24
+
+### Added
+- Add FP16ALT support to THMULTI DivSqrt
+
 ## [pulp-v0.2.1] - 2024-06-07
 
 ### Fix

diff --git a/docs/README.md b/docs/README.md
@@ -366,7 +366,7 @@ It is of type `divsqrt_unit_t`, which is defined as:
 typedef enum logic[1:0] {
   PULP,    // "PULP" instantiates the PULP DivSqrt unit supports FP64, FP32, FP16, FP16ALT, FP8 and SIMD operations
   TH32,    // "TH32" instantiates the E906 DivSqrt unit supports only FP32 (no SIMD support)
-  THMULTI  // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16 and SIMD operations
+  THMULTI  // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16, FP16ALT and SIMD operations
 } divsqrt_unit_t;
 ```
 

diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv
@@ -144,31 +144,34 @@ module fpnew_divsqrt_th_64_multi #(
   // -----------------
   // Input processing
   // -----------------
-  logic [1:0] divsqrt_fmt;
+  logic [3:0] divsqrt_fmt;
 
   // Translate fpnew formats into divsqrt formats
   if(WIDTH == 64) begin : translate_fmt_64_bits
     always_comb begin : translate_fmt
       unique case (dst_fmt_q)
-        fpnew_pkg::FP64:    divsqrt_fmt = 2'b10;
-        fpnew_pkg::FP32:    divsqrt_fmt = 2'b01;
-        fpnew_pkg::FP16:    divsqrt_fmt = 2'b00;
-        default:            divsqrt_fmt = 2'b10; // 64 bit max width
+        fpnew_pkg::FP64:    divsqrt_fmt = 4'b1000;
+        fpnew_pkg::FP32:    divsqrt_fmt = 4'b0100;
+        fpnew_pkg::FP16:    divsqrt_fmt = 4'b0010;
+        fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001;
+        default:            divsqrt_fmt = 4'b1000; // 64 bit max width
       endcase
     end
   end else if(WIDTH == 32) begin : translate_fmt_32_bits
     always_comb begin : translate_fmt
       unique case (dst_fmt_q)
-        fpnew_pkg::FP32:    divsqrt_fmt = 2'b01;
-        fpnew_pkg::FP16:    divsqrt_fmt = 2'b00;
-        default:            divsqrt_fmt = 2'b01; // 32 bit max width
+        fpnew_pkg::FP32:    divsqrt_fmt = 4'b0100;
+        fpnew_pkg::FP16:    divsqrt_fmt = 4'b0010;
+        fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001;
+        default:            divsqrt_fmt = 4'b0100; // 32 bit max width
       endcase
     end
   end else if(WIDTH == 16) begin : translate_fmt_16_bits
     always_comb begin : translate_fmt
       unique case (dst_fmt_q)
-        fpnew_pkg::FP16:    divsqrt_fmt = 2'b00;
-        default:            divsqrt_fmt = 2'b00; // 16 bit max width
+        fpnew_pkg::FP16:    divsqrt_fmt = 4'b0010;
+        fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001;
+        default:            divsqrt_fmt = 4'b0010; // 16 bit max width
       endcase
     end
   end else begin
@@ -298,7 +301,7 @@ module fpnew_divsqrt_th_64_multi #(
 
   // Regs to save current instruction
   fpnew_pkg::roundmode_e rm_q;
-  logic[1:0] divsqrt_fmt_q;
+  logic[3:0] divsqrt_fmt_q;
   fpnew_pkg::operation_e divsqrt_op_q;
   logic div_op, sqrt_op;
   logic [WIDTH-1:0] srcf0_q, srcf1_q;
@@ -314,15 +317,15 @@ module fpnew_divsqrt_th_64_multi #(
   // NaN-box inputs with max WIDTH
   if(WIDTH == 64) begin : gen_fmt_64_bits
     always_comb begin : NaN_box_inputs
-      if(divsqrt_fmt_q == 2'b10) begin // 64-bit
+      if(divsqrt_fmt_q == 4'b1000) begin // 64-bit
         srcf0[63:0] = srcf0_q[63:0];
         srcf1[63:0] = srcf1_q[63:0];
-      end else if(divsqrt_fmt_q == 2'b01) begin // 32-bit
+      end else if(divsqrt_fmt_q == 4'b0100) begin // 32-bit
         srcf0[63:32] = '1;
         srcf1[63:32] = '1;
         srcf0[31:0] = srcf0_q[31:0];
         srcf1[31:0] = srcf1_q[31:0];
-      end else if(divsqrt_fmt_q == 2'b00) begin //16-bit
+      end else if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin //16-bit
         srcf0[63:16] = '1;
         srcf1[63:16] = '1;
         srcf0[15:0] = srcf0_q[15:0];
@@ -334,12 +337,12 @@ module fpnew_divsqrt_th_64_multi #(
     end
   end else if (WIDTH == 32) begin : gen_fmt_32_bits
     always_comb begin : NaN_box_inputs
-      if(divsqrt_fmt_q == 2'b01) begin // 32-bit
+      if(divsqrt_fmt_q == 4'b0100) begin // 32-bit
         srcf0[63:32] = '1;
         srcf1[63:32] = '1;
         srcf0[31:0] = srcf0_q[31:0];
         srcf1[31:0] = srcf1_q[31:0];
-      end else if(divsqrt_fmt_q == 2'b00) begin // 16-bit
+      end else if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin // 16-bit
         srcf0[63:16] = '1;
         srcf1[63:16] = '1;
         srcf0[15:0] = srcf0_q[15:0];
@@ -351,7 +354,7 @@ module fpnew_divsqrt_th_64_multi #(
     end
   end else if (WIDTH == 16) begin : gen_fmt_16_bits
     always_comb begin : NaN_box_inputs
-      if(divsqrt_fmt_q == 2'b00) begin // 16-bit
+      if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin // 16-bit
         srcf0[63:16] = '1;
         srcf1[63:16] = '1;
         srcf0[15:0] = srcf0_q[15:0];
@@ -390,7 +393,7 @@ module fpnew_divsqrt_th_64_multi #(
     .dp_vfdsu_fdiv_gateclk_issue    ( 1'b1                      ), // Local clock enable (same as above)
     .dp_vfdsu_idu_fdiv_issue        ( op_starting               ), // 1. Issue fdiv (FSM in ctrl)
     .forever_cpuclk                 ( clk_i                     ), // Clock input
-    .idu_vfpu_rf_pipex_func         ( {3'b0, divsqrt_fmt_q, 13'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0)
+    .idu_vfpu_rf_pipex_func         ( {3'b0, divsqrt_fmt_q, 11'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0)
     .idu_vfpu_rf_pipex_gateclk_sel  ( func_sel                  ), // 2. Select func
     .pad_yy_icg_scan_en             ( 1'b0                      ), // SE signal for the redundant clock gating module
     .rtu_yy_xx_flush                ( flush_i                   ), // Flush

diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv
@@ -68,9 +68,9 @@ module fpnew_opgroup_multifmt_slice #(
     if ((DivSqrtSel == fpnew_pkg::TH32) && !((FpFmtConfig[0] == 1) && (FpFmtConfig[1:NUM_FORMATS-1] == '0))) begin
       $fatal(1, "T-Head-based DivSqrt unit supported only in FP32-only configurations. \
 Set DivSqrtSel = THMULTI or DivSqrtSel = PULP to use a multi-format divider");
-    end else if ((DivSqrtSel == fpnew_pkg::THMULTI) && (FpFmtConfig[3] == 1'b1 || FpFmtConfig[4] == 1'b1 || FpFmtConfig[5] == 1'b1)) begin
+    end else if ((DivSqrtSel == fpnew_pkg::THMULTI) && (FpFmtConfig[3] == 1'b1 || FpFmtConfig[5] == 1'b1)) begin
       $warning("The DivSqrt unit of C910 (instantiated by DivSqrtSel = THMULTI) does not support \
-FP16alt, FP8, FP8alt. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP16alt, FP8, FP8alt.");
+FP8, FP8alt. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8, FP8alt.");
     end
   end
 

diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv
@@ -136,7 +136,7 @@ package fpnew_pkg;
   typedef enum logic[1:0] {
     PULP,    // "PULP" instantiates the PULP DivSqrt unit supports FP64, FP32, FP16, FP16ALT, FP8 and SIMD operations
     TH32,    // "TH32" instantiates the E906 DivSqrt unit supports only FP32 (no SIMD support)
-    THMULTI  // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16 and SIMD operations
+    THMULTI  // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16, FP16ALT and SIMD operations
   } divsqrt_unit_t;
 
   // -------------------
@@ -454,7 +454,7 @@ package fpnew_pkg;
     // Returns the maximum number of lanes in the FPU according to width, format config and vectors
   function automatic int unsigned num_divsqrt_lanes(int unsigned width, fmt_logic_t cfg, logic vec, divsqrt_unit_t DivSqrtSel);
     automatic fmt_logic_t cfg_tmp;
-    cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 6'b111000 : cfg;
+    cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 6'b111010 : cfg;
     return vec ? width / min_fp_width(cfg_tmp) : 1; // if no vectors, only one lane
   endfunction
 

diff --git a/vendor/openc910.vendor.hjson b/vendor/openc910.vendor.hjson
@@ -10,6 +10,8 @@
     rev: "e0c4ad8ec7f8c70f649d826ebd6c949086453272"
   }
 
+  patch_dir: "patches/openc910"
+
   exclude_from_upstream: [
     "doc",
     "smart_run",

diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v
@@ -26,6 +26,8 @@ module ct_vfdsu_ctrl(
   ex1_double,
   ex1_pipedown,
   ex1_single,
+  ex1_half,
+  ex1_bfloat,
   ex2_data_clk,
   ex2_pipedown,
   ex2_srt_first_round,
@@ -43,6 +45,8 @@ module ct_vfdsu_ctrl(
   vfdsu_dp_inst_wb_req,
   vfdsu_ex2_double,
   vfdsu_ex2_single,
+  vfdsu_ex2_half,
+  vfdsu_ex2_bfloat,
   vfdsu_ifu_debug_ex2_wait,
   vfdsu_ifu_debug_idle,
   vfdsu_ifu_debug_pipe_busy
@@ -57,13 +61,17 @@ input          dp_vfdsu_fdiv_gateclk_issue;
 input          dp_vfdsu_idu_fdiv_issue;    
 input          ex1_double;                 
 input          ex1_single;                 
+input          ex1_half;
+input          ex1_bfloat;
 input          forever_cpuclk;             
 input          pad_yy_icg_scan_en;         
 input          rtu_yy_xx_flush;            
 input          srt_ctrl_rem_zero;          
 input          srt_ctrl_skip_srt;          
 input          vfdsu_ex2_double;           
 input          vfdsu_ex2_single;           
+input          vfdsu_ex2_half;
+input          vfdsu_ex2_bfloat;
 output         ex1_data_clk;               
 output         ex1_pipedown;               
 output         ex2_data_clk;               
@@ -106,6 +114,8 @@ wire           ex1_data_clk_en;
 wire           ex1_double;                 
 wire           ex1_pipedown;               
 wire           ex1_single;                 
+wire           ex1_half;
+wire           ex1_bfloat;
 wire           ex2_data_clk;               
 wire           ex2_data_clk_en;            
 wire           ex2_pipe_clk;               
@@ -137,6 +147,8 @@ wire           vfdsu_dp_fdiv_busy;
 wire           vfdsu_dp_inst_wb_req;       
 wire           vfdsu_ex2_double;           
 wire           vfdsu_ex2_single;           
+wire           vfdsu_ex2_half;
+wire           vfdsu_ex2_bfloat;
 wire           vfdsu_ex2_vld;              
 wire           vfdsu_ifu_debug_ex2_wait;   
 wire           vfdsu_ifu_debug_idle;       
@@ -244,8 +256,9 @@ end
 //For Double, initial is 5'b11100('d28), calculate 29 round
 //For Single, initial is 5'b01110('d14), calculate 15 round
 assign srt_cnt_ini[4:0] = (ex1_double) ? 5'b01101 :
-                           ex1_single  ? 5'b00110
-                                       : 5'b00011;
+                          (ex1_single) ? 5'b00110 :
+                          (ex1_half)   ? 5'b00011
+                                       : 5'b00010;
 
 //vfdsu ex2 pipedown signal
 assign ex2_pipedown = srt_last_round && div_st_ex2;
@@ -277,7 +290,9 @@ assign srt_secd_round  = ex2_srt_secd_round;
 
 assign ex2_srt_secd_round_pre  = srt_sm_on && srt_secd_round_pre;
 assign srt_secd_round_pre      = vfdsu_ex2_double ? srt_cnt[4:0]==5'b01101 : 
-                                 vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 : srt_cnt[4:0] == 5'b00011;
+                                 vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 :
+                                 vfdsu_ex2_half   ? srt_cnt[4:0]==5'b00011
+                                                  : srt_cnt[4:0]==5'b00010;
 
 //==========================================================
 //              EX3 Stage Control Signal

diff --git a/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v b/vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_double.v
@@ -24,6 +24,8 @@ module ct_vfdsu_double(
   ex1_pipedown,
   ex1_scalar,
   ex1_single,
+  ex1_half,
+  ex1_bfloat,
   ex1_sqrt,
   ex1_src0,
   ex1_src1,
@@ -52,6 +54,8 @@ input           ex1_double;
 input           ex1_pipedown;                         
 input           ex1_scalar;                           
 input           ex1_single;                           
+input           ex1_half;
+input           ex1_bfloat;
 input           ex1_sqrt;                             
 input   [63:0]  ex1_src0;                             
 input   [63:0]  ex1_src1;                             
@@ -83,6 +87,8 @@ wire            ex1_pipedown;
 wire    [59:0]  ex1_remainder;                        
 wire            ex1_scalar;                           
 wire            ex1_single;                           
+wire            ex1_half;
+wire            ex1_bfloat;
 wire            ex1_sqrt;                             
 wire    [63:0]  ex1_src0;                             
 wire    [63:0]  ex1_src1;                             
@@ -116,12 +122,15 @@ wire            vfdsu_ex2_result_sign;
 wire            vfdsu_ex2_result_zero;                
 wire    [2 :0]  vfdsu_ex2_rm;                         
 wire            vfdsu_ex2_single;                     
+wire            vfdsu_ex2_half;
+wire            vfdsu_ex2_bfloat;
 wire            vfdsu_ex2_sqrt;                       
 wire            vfdsu_ex2_srt_skip;                   
 wire    [12:0]  vfdsu_ex3_doub_expnt_rst;             
 wire            vfdsu_ex3_double;                     
 wire            vfdsu_ex3_dz;                         
 wire    [12:0]  vfdsu_ex3_half_expnt_rst;             
+wire    [12:0]  vfdsu_ex3_bfloat_expnt_rst;
 wire            vfdsu_ex3_id_srt_skip;                
 wire            vfdsu_ex3_nv;                         
 wire            vfdsu_ex3_of;                         
@@ -141,6 +150,8 @@ wire    [2 :0]  vfdsu_ex3_rm;
 wire            vfdsu_ex3_rslt_denorm;                
 wire    [8 :0]  vfdsu_ex3_sing_expnt_rst;             
 wire            vfdsu_ex3_single;                     
+wire            vfdsu_ex3_half;
+wire            vfdsu_ex3_bfloat;
 wire            vfdsu_ex3_uf;                         
 wire            vfdsu_ex4_denorm_to_tiny_frac;        
 wire            vfdsu_ex4_double;                     
@@ -164,6 +175,8 @@ wire            vfdsu_ex4_result_sign;
 wire            vfdsu_ex4_result_zero;                
 wire            vfdsu_ex4_rslt_denorm;                
 wire            vfdsu_ex4_single;                     
+wire            vfdsu_ex4_half;
+wire            vfdsu_ex4_bfloat;
 wire            vfdsu_ex4_uf;                         
 wire            vfpu_yy_xx_dqnan;                     
 wire    [2 :0]  vfpu_yy_xx_rm;                        
@@ -181,6 +194,8 @@ ct_vfdsu_prepare  x_ct_vfdsu_prepare (
   .ex1_remainder         (ex1_remainder        ),
   .ex1_scalar            (ex1_scalar           ),
   .ex1_single            (ex1_single           ),
+  .ex1_half              (ex1_half             ),
+  .ex1_bfloat            (ex1_bfloat           ),
   .ex1_sqrt              (ex1_sqrt             ),
   .ex1_src0              (ex1_src0             ),
   .ex1_src1              (ex1_src1             ),
@@ -204,6 +219,8 @@ ct_vfdsu_prepare  x_ct_vfdsu_prepare (
   .vfdsu_ex2_result_zero (vfdsu_ex2_result_zero),
   .vfdsu_ex2_rm          (vfdsu_ex2_rm         ),
   .vfdsu_ex2_single      (vfdsu_ex2_single     ),
+  .vfdsu_ex2_half        (vfdsu_ex2_half       ),
+  .vfdsu_ex2_bfloat      (vfdsu_ex2_bfloat     ),
   .vfdsu_ex2_sqrt        (vfdsu_ex2_sqrt       ),
   .vfdsu_ex2_srt_skip    (vfdsu_ex2_srt_skip   ),
   .vfpu_yy_xx_dqnan      (vfpu_yy_xx_dqnan     ),
@@ -246,12 +263,15 @@ ct_vfdsu_srt  x_ct_vfdsu_srt (
   .vfdsu_ex2_result_zero                 (vfdsu_ex2_result_zero                ),
   .vfdsu_ex2_rm                          (vfdsu_ex2_rm                         ),
   .vfdsu_ex2_single                      (vfdsu_ex2_single                     ),
+  .vfdsu_ex2_half                        (vfdsu_ex2_half                       ),
+  .vfdsu_ex2_bfloat                      (vfdsu_ex2_bfloat                     ),
   .vfdsu_ex2_sqrt                        (vfdsu_ex2_sqrt                       ),
   .vfdsu_ex2_srt_skip                    (vfdsu_ex2_srt_skip                   ),
   .vfdsu_ex3_doub_expnt_rst              (vfdsu_ex3_doub_expnt_rst             ),
   .vfdsu_ex3_double                      (vfdsu_ex3_double                     ),
   .vfdsu_ex3_dz                          (vfdsu_ex3_dz                         ),
   .vfdsu_ex3_half_expnt_rst              (vfdsu_ex3_half_expnt_rst             ),
+  .vfdsu_ex3_bfloat_expnt_rst            (vfdsu_ex3_bfloat_expnt_rst           ),
   .vfdsu_ex3_id_srt_skip                 (vfdsu_ex3_id_srt_skip                ),
   .vfdsu_ex3_nv                          (vfdsu_ex3_nv                         ),
   .vfdsu_ex3_of                          (vfdsu_ex3_of                         ),
@@ -271,6 +291,8 @@ ct_vfdsu_srt  x_ct_vfdsu_srt (
   .vfdsu_ex3_rslt_denorm                 (vfdsu_ex3_rslt_denorm                ),
   .vfdsu_ex3_sing_expnt_rst              (vfdsu_ex3_sing_expnt_rst             ),
   .vfdsu_ex3_single                      (vfdsu_ex3_single                     ),
+  .vfdsu_ex3_half                        (vfdsu_ex3_half                       ),
+  .vfdsu_ex3_bfloat                      (vfdsu_ex3_bfloat                     ),
   .vfdsu_ex3_uf                          (vfdsu_ex3_uf                         )
 );
 
@@ -288,6 +310,7 @@ ct_vfdsu_round  x_ct_vfdsu_round (
   .vfdsu_ex3_double                      (vfdsu_ex3_double                     ),
   .vfdsu_ex3_dz                          (vfdsu_ex3_dz                         ),
   .vfdsu_ex3_half_expnt_rst              (vfdsu_ex3_half_expnt_rst             ),
+  .vfdsu_ex3_bfloat_expnt_rst            (vfdsu_ex3_bfloat_expnt_rst           ),
   .vfdsu_ex3_id_srt_skip                 (vfdsu_ex3_id_srt_skip                ),
   .vfdsu_ex3_nv                          (vfdsu_ex3_nv                         ),
   .vfdsu_ex3_of                          (vfdsu_ex3_of                         ),
@@ -307,6 +330,8 @@ ct_vfdsu_round  x_ct_vfdsu_round (
   .vfdsu_ex3_rslt_denorm                 (vfdsu_ex3_rslt_denorm                ),
   .vfdsu_ex3_sing_expnt_rst              (vfdsu_ex3_sing_expnt_rst             ),
   .vfdsu_ex3_single                      (vfdsu_ex3_single                     ),
+  .vfdsu_ex3_half                        (vfdsu_ex3_half                       ),
+  .vfdsu_ex3_bfloat                      (vfdsu_ex3_bfloat                     ),
   .vfdsu_ex3_uf                          (vfdsu_ex3_uf                         ),
   .vfdsu_ex4_denorm_to_tiny_frac         (vfdsu_ex4_denorm_to_tiny_frac        ),
   .vfdsu_ex4_double                      (vfdsu_ex4_double                     ),
@@ -330,6 +355,8 @@ ct_vfdsu_round  x_ct_vfdsu_round (
   .vfdsu_ex4_result_zero                 (vfdsu_ex4_result_zero                ),
   .vfdsu_ex4_rslt_denorm                 (vfdsu_ex4_rslt_denorm                ),
   .vfdsu_ex4_single                      (vfdsu_ex4_single                     ),
+  .vfdsu_ex4_half                        (vfdsu_ex4_half                       ),
+  .vfdsu_ex4_bfloat                      (vfdsu_ex4_bfloat                     ),
   .vfdsu_ex4_uf                          (vfdsu_ex4_uf                         )
 );
 
@@ -359,6 +386,8 @@ ct_vfdsu_pack  x_ct_vfdsu_pack (
   .vfdsu_ex4_result_zero         (vfdsu_ex4_result_zero        ),
   .vfdsu_ex4_rslt_denorm         (vfdsu_ex4_rslt_denorm        ),
   .vfdsu_ex4_single              (vfdsu_ex4_single             ),
+  .vfdsu_ex4_half                (vfdsu_ex4_half               ),
+  .vfdsu_ex4_bfloat              (vfdsu_ex4_bfloat             ),
   .vfdsu_ex4_uf                  (vfdsu_ex4_uf                 )
 );