Skip to content

Commit

Permalink
Add FP16ALT support to THMULTI DivSqrt (#12)
Browse files Browse the repository at this point in the history
* Add FP16ALT support to THMULTI DivSqrt
  • Loading branch information
lucabertaccini authored Jun 26, 2024
1 parent 7c7d9b6 commit de4f932
Show file tree
Hide file tree
Showing 15 changed files with 1,811 additions and 91 deletions.
5 changes: 5 additions & 0 deletions docs/CHANGELOG-PULP.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a
In this sense, we interpret the "Public API" of a hardware module as its port/parameter list.
Versions of the IP in the same major relase are "pin-compatible" with each other. Minor relases are permitted to add new parameters as long as their default bindings ensure backwards compatibility.

## [pulp-v0.2.2] - 2024-06-24

### Added
- Add FP16ALT support to THMULTI DivSqrt

## [pulp-v0.2.1] - 2024-06-07

### Fix
Expand Down
2 changes: 1 addition & 1 deletion docs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ It is of type `divsqrt_unit_t`, which is defined as:
typedef enum logic[1:0] {
PULP, // "PULP" instantiates the PULP DivSqrt unit supports FP64, FP32, FP16, FP16ALT, FP8 and SIMD operations
TH32, // "TH32" instantiates the E906 DivSqrt unit supports only FP32 (no SIMD support)
THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16 and SIMD operations
THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16, FP16ALT and SIMD operations
} divsqrt_unit_t;
```

Expand Down
39 changes: 21 additions & 18 deletions src/fpnew_divsqrt_th_64_multi.sv
Original file line number Diff line number Diff line change
Expand Up @@ -144,31 +144,34 @@ module fpnew_divsqrt_th_64_multi #(
// -----------------
// Input processing
// -----------------
logic [1:0] divsqrt_fmt;
logic [3:0] divsqrt_fmt;

// Translate fpnew formats into divsqrt formats
if(WIDTH == 64) begin : translate_fmt_64_bits
always_comb begin : translate_fmt
unique case (dst_fmt_q)
fpnew_pkg::FP64: divsqrt_fmt = 2'b10;
fpnew_pkg::FP32: divsqrt_fmt = 2'b01;
fpnew_pkg::FP16: divsqrt_fmt = 2'b00;
default: divsqrt_fmt = 2'b10; // 64 bit max width
fpnew_pkg::FP64: divsqrt_fmt = 4'b1000;
fpnew_pkg::FP32: divsqrt_fmt = 4'b0100;
fpnew_pkg::FP16: divsqrt_fmt = 4'b0010;
fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001;
default: divsqrt_fmt = 4'b1000; // 64 bit max width
endcase
end
end else if(WIDTH == 32) begin : translate_fmt_32_bits
always_comb begin : translate_fmt
unique case (dst_fmt_q)
fpnew_pkg::FP32: divsqrt_fmt = 2'b01;
fpnew_pkg::FP16: divsqrt_fmt = 2'b00;
default: divsqrt_fmt = 2'b01; // 32 bit max width
fpnew_pkg::FP32: divsqrt_fmt = 4'b0100;
fpnew_pkg::FP16: divsqrt_fmt = 4'b0010;
fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001;
default: divsqrt_fmt = 4'b0100; // 32 bit max width
endcase
end
end else if(WIDTH == 16) begin : translate_fmt_16_bits
always_comb begin : translate_fmt
unique case (dst_fmt_q)
fpnew_pkg::FP16: divsqrt_fmt = 2'b00;
default: divsqrt_fmt = 2'b00; // 16 bit max width
fpnew_pkg::FP16: divsqrt_fmt = 4'b0010;
fpnew_pkg::FP16ALT: divsqrt_fmt = 4'b0001;
default: divsqrt_fmt = 4'b0010; // 16 bit max width
endcase
end
end else begin
Expand Down Expand Up @@ -298,7 +301,7 @@ module fpnew_divsqrt_th_64_multi #(

// Regs to save current instruction
fpnew_pkg::roundmode_e rm_q;
logic[1:0] divsqrt_fmt_q;
logic[3:0] divsqrt_fmt_q;
fpnew_pkg::operation_e divsqrt_op_q;
logic div_op, sqrt_op;
logic [WIDTH-1:0] srcf0_q, srcf1_q;
Expand All @@ -314,15 +317,15 @@ module fpnew_divsqrt_th_64_multi #(
// NaN-box inputs with max WIDTH
if(WIDTH == 64) begin : gen_fmt_64_bits
always_comb begin : NaN_box_inputs
if(divsqrt_fmt_q == 2'b10) begin // 64-bit
if(divsqrt_fmt_q == 4'b1000) begin // 64-bit
srcf0[63:0] = srcf0_q[63:0];
srcf1[63:0] = srcf1_q[63:0];
end else if(divsqrt_fmt_q == 2'b01) begin // 32-bit
end else if(divsqrt_fmt_q == 4'b0100) begin // 32-bit
srcf0[63:32] = '1;
srcf1[63:32] = '1;
srcf0[31:0] = srcf0_q[31:0];
srcf1[31:0] = srcf1_q[31:0];
end else if(divsqrt_fmt_q == 2'b00) begin //16-bit
end else if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin //16-bit
srcf0[63:16] = '1;
srcf1[63:16] = '1;
srcf0[15:0] = srcf0_q[15:0];
Expand All @@ -334,12 +337,12 @@ module fpnew_divsqrt_th_64_multi #(
end
end else if (WIDTH == 32) begin : gen_fmt_32_bits
always_comb begin : NaN_box_inputs
if(divsqrt_fmt_q == 2'b01) begin // 32-bit
if(divsqrt_fmt_q == 4'b0100) begin // 32-bit
srcf0[63:32] = '1;
srcf1[63:32] = '1;
srcf0[31:0] = srcf0_q[31:0];
srcf1[31:0] = srcf1_q[31:0];
end else if(divsqrt_fmt_q == 2'b00) begin // 16-bit
end else if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin // 16-bit
srcf0[63:16] = '1;
srcf1[63:16] = '1;
srcf0[15:0] = srcf0_q[15:0];
Expand All @@ -351,7 +354,7 @@ module fpnew_divsqrt_th_64_multi #(
end
end else if (WIDTH == 16) begin : gen_fmt_16_bits
always_comb begin : NaN_box_inputs
if(divsqrt_fmt_q == 2'b00) begin // 16-bit
if((divsqrt_fmt_q == 4'b0010) || (divsqrt_fmt_q == 4'b0001)) begin // 16-bit
srcf0[63:16] = '1;
srcf1[63:16] = '1;
srcf0[15:0] = srcf0_q[15:0];
Expand Down Expand Up @@ -390,7 +393,7 @@ module fpnew_divsqrt_th_64_multi #(
.dp_vfdsu_fdiv_gateclk_issue ( 1'b1 ), // Local clock enable (same as above)
.dp_vfdsu_idu_fdiv_issue ( op_starting ), // 1. Issue fdiv (FSM in ctrl)
.forever_cpuclk ( clk_i ), // Clock input
.idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 13'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0)
.idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 11'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0)
.idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func
.pad_yy_icg_scan_en ( 1'b0 ), // SE signal for the redundant clock gating module
.rtu_yy_xx_flush ( flush_i ), // Flush
Expand Down
4 changes: 2 additions & 2 deletions src/fpnew_opgroup_multifmt_slice.sv
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ module fpnew_opgroup_multifmt_slice #(
if ((DivSqrtSel == fpnew_pkg::TH32) && !((FpFmtConfig[0] == 1) && (FpFmtConfig[1:NUM_FORMATS-1] == '0))) begin
$fatal(1, "T-Head-based DivSqrt unit supported only in FP32-only configurations. \
Set DivSqrtSel = THMULTI or DivSqrtSel = PULP to use a multi-format divider");
end else if ((DivSqrtSel == fpnew_pkg::THMULTI) && (FpFmtConfig[3] == 1'b1 || FpFmtConfig[4] == 1'b1 || FpFmtConfig[5] == 1'b1)) begin
end else if ((DivSqrtSel == fpnew_pkg::THMULTI) && (FpFmtConfig[3] == 1'b1 || FpFmtConfig[5] == 1'b1)) begin
$warning("The DivSqrt unit of C910 (instantiated by DivSqrtSel = THMULTI) does not support \
FP16alt, FP8, FP8alt. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP16alt, FP8, FP8alt.");
FP8, FP8alt. Please use the PULP DivSqrt unit when in need of div/sqrt operations on FP8, FP8alt.");
end
end

Expand Down
4 changes: 2 additions & 2 deletions src/fpnew_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ package fpnew_pkg;
typedef enum logic[1:0] {
PULP, // "PULP" instantiates the PULP DivSqrt unit supports FP64, FP32, FP16, FP16ALT, FP8 and SIMD operations
TH32, // "TH32" instantiates the E906 DivSqrt unit supports only FP32 (no SIMD support)
THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16 and SIMD operations
THMULTI // "THMULTI" instantiates the C910 DivSqrt unit supports FP64, FP32, FP16, FP16ALT and SIMD operations
} divsqrt_unit_t;

// -------------------
Expand Down Expand Up @@ -454,7 +454,7 @@ package fpnew_pkg;
// Returns the maximum number of lanes in the FPU according to width, format config and vectors
function automatic int unsigned num_divsqrt_lanes(int unsigned width, fmt_logic_t cfg, logic vec, divsqrt_unit_t DivSqrtSel);
automatic fmt_logic_t cfg_tmp;
cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 6'b111000 : cfg;
cfg_tmp = (DivSqrtSel == THMULTI) ? cfg & 6'b111010 : cfg;
return vec ? width / min_fp_width(cfg_tmp) : 1; // if no vectors, only one lane
endfunction

Expand Down
2 changes: 2 additions & 0 deletions vendor/openc910.vendor.hjson
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
rev: "e0c4ad8ec7f8c70f649d826ebd6c949086453272"
}

patch_dir: "patches/openc910"

exclude_from_upstream: [
"doc",
"smart_run",
Expand Down
21 changes: 18 additions & 3 deletions vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_ctrl.v
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ module ct_vfdsu_ctrl(
ex1_double,
ex1_pipedown,
ex1_single,
ex1_half,
ex1_bfloat,
ex2_data_clk,
ex2_pipedown,
ex2_srt_first_round,
Expand All @@ -43,6 +45,8 @@ module ct_vfdsu_ctrl(
vfdsu_dp_inst_wb_req,
vfdsu_ex2_double,
vfdsu_ex2_single,
vfdsu_ex2_half,
vfdsu_ex2_bfloat,
vfdsu_ifu_debug_ex2_wait,
vfdsu_ifu_debug_idle,
vfdsu_ifu_debug_pipe_busy
Expand All @@ -57,13 +61,17 @@ input dp_vfdsu_fdiv_gateclk_issue;
input dp_vfdsu_idu_fdiv_issue;
input ex1_double;
input ex1_single;
input ex1_half;
input ex1_bfloat;
input forever_cpuclk;
input pad_yy_icg_scan_en;
input rtu_yy_xx_flush;
input srt_ctrl_rem_zero;
input srt_ctrl_skip_srt;
input vfdsu_ex2_double;
input vfdsu_ex2_single;
input vfdsu_ex2_half;
input vfdsu_ex2_bfloat;
output ex1_data_clk;
output ex1_pipedown;
output ex2_data_clk;
Expand Down Expand Up @@ -106,6 +114,8 @@ wire ex1_data_clk_en;
wire ex1_double;
wire ex1_pipedown;
wire ex1_single;
wire ex1_half;
wire ex1_bfloat;
wire ex2_data_clk;
wire ex2_data_clk_en;
wire ex2_pipe_clk;
Expand Down Expand Up @@ -137,6 +147,8 @@ wire vfdsu_dp_fdiv_busy;
wire vfdsu_dp_inst_wb_req;
wire vfdsu_ex2_double;
wire vfdsu_ex2_single;
wire vfdsu_ex2_half;
wire vfdsu_ex2_bfloat;
wire vfdsu_ex2_vld;
wire vfdsu_ifu_debug_ex2_wait;
wire vfdsu_ifu_debug_idle;
Expand Down Expand Up @@ -244,8 +256,9 @@ end
//For Double, initial is 5'b11100('d28), calculate 29 round
//For Single, initial is 5'b01110('d14), calculate 15 round
assign srt_cnt_ini[4:0] = (ex1_double) ? 5'b01101 :
ex1_single ? 5'b00110
: 5'b00011;
(ex1_single) ? 5'b00110 :
(ex1_half) ? 5'b00011
: 5'b00010;

//vfdsu ex2 pipedown signal
assign ex2_pipedown = srt_last_round && div_st_ex2;
Expand Down Expand Up @@ -277,7 +290,9 @@ assign srt_secd_round = ex2_srt_secd_round;

assign ex2_srt_secd_round_pre = srt_sm_on && srt_secd_round_pre;
assign srt_secd_round_pre = vfdsu_ex2_double ? srt_cnt[4:0]==5'b01101 :
vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 : srt_cnt[4:0] == 5'b00011;
vfdsu_ex2_single ? srt_cnt[4:0]==5'b00110 :
vfdsu_ex2_half ? srt_cnt[4:0]==5'b00011
: srt_cnt[4:0]==5'b00010;

//==========================================================
// EX3 Stage Control Signal
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ module ct_vfdsu_double(
ex1_pipedown,
ex1_scalar,
ex1_single,
ex1_half,
ex1_bfloat,
ex1_sqrt,
ex1_src0,
ex1_src1,
Expand Down Expand Up @@ -52,6 +54,8 @@ input ex1_double;
input ex1_pipedown;
input ex1_scalar;
input ex1_single;
input ex1_half;
input ex1_bfloat;
input ex1_sqrt;
input [63:0] ex1_src0;
input [63:0] ex1_src1;
Expand Down Expand Up @@ -83,6 +87,8 @@ wire ex1_pipedown;
wire [59:0] ex1_remainder;
wire ex1_scalar;
wire ex1_single;
wire ex1_half;
wire ex1_bfloat;
wire ex1_sqrt;
wire [63:0] ex1_src0;
wire [63:0] ex1_src1;
Expand Down Expand Up @@ -116,12 +122,15 @@ wire vfdsu_ex2_result_sign;
wire vfdsu_ex2_result_zero;
wire [2 :0] vfdsu_ex2_rm;
wire vfdsu_ex2_single;
wire vfdsu_ex2_half;
wire vfdsu_ex2_bfloat;
wire vfdsu_ex2_sqrt;
wire vfdsu_ex2_srt_skip;
wire [12:0] vfdsu_ex3_doub_expnt_rst;
wire vfdsu_ex3_double;
wire vfdsu_ex3_dz;
wire [12:0] vfdsu_ex3_half_expnt_rst;
wire [12:0] vfdsu_ex3_bfloat_expnt_rst;
wire vfdsu_ex3_id_srt_skip;
wire vfdsu_ex3_nv;
wire vfdsu_ex3_of;
Expand All @@ -141,6 +150,8 @@ wire [2 :0] vfdsu_ex3_rm;
wire vfdsu_ex3_rslt_denorm;
wire [8 :0] vfdsu_ex3_sing_expnt_rst;
wire vfdsu_ex3_single;
wire vfdsu_ex3_half;
wire vfdsu_ex3_bfloat;
wire vfdsu_ex3_uf;
wire vfdsu_ex4_denorm_to_tiny_frac;
wire vfdsu_ex4_double;
Expand All @@ -164,6 +175,8 @@ wire vfdsu_ex4_result_sign;
wire vfdsu_ex4_result_zero;
wire vfdsu_ex4_rslt_denorm;
wire vfdsu_ex4_single;
wire vfdsu_ex4_half;
wire vfdsu_ex4_bfloat;
wire vfdsu_ex4_uf;
wire vfpu_yy_xx_dqnan;
wire [2 :0] vfpu_yy_xx_rm;
Expand All @@ -181,6 +194,8 @@ ct_vfdsu_prepare x_ct_vfdsu_prepare (
.ex1_remainder (ex1_remainder ),
.ex1_scalar (ex1_scalar ),
.ex1_single (ex1_single ),
.ex1_half (ex1_half ),
.ex1_bfloat (ex1_bfloat ),
.ex1_sqrt (ex1_sqrt ),
.ex1_src0 (ex1_src0 ),
.ex1_src1 (ex1_src1 ),
Expand All @@ -204,6 +219,8 @@ ct_vfdsu_prepare x_ct_vfdsu_prepare (
.vfdsu_ex2_result_zero (vfdsu_ex2_result_zero),
.vfdsu_ex2_rm (vfdsu_ex2_rm ),
.vfdsu_ex2_single (vfdsu_ex2_single ),
.vfdsu_ex2_half (vfdsu_ex2_half ),
.vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ),
.vfdsu_ex2_sqrt (vfdsu_ex2_sqrt ),
.vfdsu_ex2_srt_skip (vfdsu_ex2_srt_skip ),
.vfpu_yy_xx_dqnan (vfpu_yy_xx_dqnan ),
Expand Down Expand Up @@ -246,12 +263,15 @@ ct_vfdsu_srt x_ct_vfdsu_srt (
.vfdsu_ex2_result_zero (vfdsu_ex2_result_zero ),
.vfdsu_ex2_rm (vfdsu_ex2_rm ),
.vfdsu_ex2_single (vfdsu_ex2_single ),
.vfdsu_ex2_half (vfdsu_ex2_half ),
.vfdsu_ex2_bfloat (vfdsu_ex2_bfloat ),
.vfdsu_ex2_sqrt (vfdsu_ex2_sqrt ),
.vfdsu_ex2_srt_skip (vfdsu_ex2_srt_skip ),
.vfdsu_ex3_doub_expnt_rst (vfdsu_ex3_doub_expnt_rst ),
.vfdsu_ex3_double (vfdsu_ex3_double ),
.vfdsu_ex3_dz (vfdsu_ex3_dz ),
.vfdsu_ex3_half_expnt_rst (vfdsu_ex3_half_expnt_rst ),
.vfdsu_ex3_bfloat_expnt_rst (vfdsu_ex3_bfloat_expnt_rst ),
.vfdsu_ex3_id_srt_skip (vfdsu_ex3_id_srt_skip ),
.vfdsu_ex3_nv (vfdsu_ex3_nv ),
.vfdsu_ex3_of (vfdsu_ex3_of ),
Expand All @@ -271,6 +291,8 @@ ct_vfdsu_srt x_ct_vfdsu_srt (
.vfdsu_ex3_rslt_denorm (vfdsu_ex3_rslt_denorm ),
.vfdsu_ex3_sing_expnt_rst (vfdsu_ex3_sing_expnt_rst ),
.vfdsu_ex3_single (vfdsu_ex3_single ),
.vfdsu_ex3_half (vfdsu_ex3_half ),
.vfdsu_ex3_bfloat (vfdsu_ex3_bfloat ),
.vfdsu_ex3_uf (vfdsu_ex3_uf )
);

Expand All @@ -288,6 +310,7 @@ ct_vfdsu_round x_ct_vfdsu_round (
.vfdsu_ex3_double (vfdsu_ex3_double ),
.vfdsu_ex3_dz (vfdsu_ex3_dz ),
.vfdsu_ex3_half_expnt_rst (vfdsu_ex3_half_expnt_rst ),
.vfdsu_ex3_bfloat_expnt_rst (vfdsu_ex3_bfloat_expnt_rst ),
.vfdsu_ex3_id_srt_skip (vfdsu_ex3_id_srt_skip ),
.vfdsu_ex3_nv (vfdsu_ex3_nv ),
.vfdsu_ex3_of (vfdsu_ex3_of ),
Expand All @@ -307,6 +330,8 @@ ct_vfdsu_round x_ct_vfdsu_round (
.vfdsu_ex3_rslt_denorm (vfdsu_ex3_rslt_denorm ),
.vfdsu_ex3_sing_expnt_rst (vfdsu_ex3_sing_expnt_rst ),
.vfdsu_ex3_single (vfdsu_ex3_single ),
.vfdsu_ex3_half (vfdsu_ex3_half ),
.vfdsu_ex3_bfloat (vfdsu_ex3_bfloat ),
.vfdsu_ex3_uf (vfdsu_ex3_uf ),
.vfdsu_ex4_denorm_to_tiny_frac (vfdsu_ex4_denorm_to_tiny_frac ),
.vfdsu_ex4_double (vfdsu_ex4_double ),
Expand All @@ -330,6 +355,8 @@ ct_vfdsu_round x_ct_vfdsu_round (
.vfdsu_ex4_result_zero (vfdsu_ex4_result_zero ),
.vfdsu_ex4_rslt_denorm (vfdsu_ex4_rslt_denorm ),
.vfdsu_ex4_single (vfdsu_ex4_single ),
.vfdsu_ex4_half (vfdsu_ex4_half ),
.vfdsu_ex4_bfloat (vfdsu_ex4_bfloat ),
.vfdsu_ex4_uf (vfdsu_ex4_uf )
);

Expand Down Expand Up @@ -359,6 +386,8 @@ ct_vfdsu_pack x_ct_vfdsu_pack (
.vfdsu_ex4_result_zero (vfdsu_ex4_result_zero ),
.vfdsu_ex4_rslt_denorm (vfdsu_ex4_rslt_denorm ),
.vfdsu_ex4_single (vfdsu_ex4_single ),
.vfdsu_ex4_half (vfdsu_ex4_half ),
.vfdsu_ex4_bfloat (vfdsu_ex4_bfloat ),
.vfdsu_ex4_uf (vfdsu_ex4_uf )
);

Expand Down
Loading

0 comments on commit de4f932

Please sign in to comment.