@@ -970,11 +970,16 @@ class VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<VOPProfile P> : VOP3_Profil
970
970
let HasOMod = 0;
971
971
}
972
972
973
+ class VOP3_CVT_SCALE_FP4_F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
974
+ let HasFP8DstByteSel = 1;
975
+ }
976
+
973
977
class VOP3_CVT_SCALE_SR_F8BF8_F16BF16F32_TiedInput_Profile<VOPProfile P> : VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile<P> {
974
978
let InsVOP3OpSel = (ins FP32InputMods:$src0_modifiers, Src0RC64:$src0,
975
979
Int32InputMods:$src1_modifiers, Src1RC64:$src1,
976
980
FP32InputMods:$src2_modifiers, Src2RC64:$src2,
977
981
VGPR_32:$vdst_in, op_sel0:$op_sel);
982
+ let HasFP8DstByteSel = 1;
978
983
}
979
984
980
985
@@ -992,6 +997,7 @@ class VOP3_CVT_SCALE_FP4_F16BF16_TiedInput_Profile<VOPProfile P> : VOP3_Profile<
992
997
HasSrc0FloatMods, HasSrc1FloatMods,
993
998
HasSrc2FloatMods>.ret);
994
999
let HasExtVOP3DPP = 0;
1000
+ let HasFP8DstByteSel = 1;
995
1001
}
996
1002
997
1003
class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
@@ -1004,6 +1010,7 @@ class VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<ValueType Src0Ty> :
1004
1010
let HasExtVOP3DPP = 0;
1005
1011
let HasOpSel = 1;
1006
1012
let HasOMod = 0;
1013
+ let HasFP8DstByteSel = 1;
1007
1014
}
1008
1015
1009
1016
def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32, v2f32, i32, f32]>, VOP3_OPSEL> {
@@ -1015,6 +1022,7 @@ def VOP3_CVT_SCALE_SR_PK_F4_F32_TiedInput_Profile : VOP3_Profile<VOPProfile<[i32
1015
1022
let HasExtVOP3DPP = 0;
1016
1023
let HasOpSel = 1;
1017
1024
let HasOMod = 0;
1025
+ let HasFP8DstByteSel = 1;
1018
1026
}
1019
1027
1020
1028
class VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<ValueType DstTy> : VOP3_Profile<VOPProfile<[DstTy, i32, f32, untyped]>,
@@ -1090,7 +1098,7 @@ let SubtargetPredicate = HasBF8ConversionScaleInsts, mayRaiseFPException = 0 in
1090
1098
let SubtargetPredicate = HasFP4ConversionScaleInsts, mayRaiseFPException = 0 in {
1091
1099
defm V_CVT_SCALEF32_PK_F32_FP4 : VOP3Inst<"v_cvt_scalef32_pk_f32_fp4", VOP3_CVT_SCALE_PK_F16BF16F32_FP4FP8BF8_Profile<v2f32>>;
1092
1100
let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
1093
- defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4FP8BF8_F32_TiedInput_Profile <VOP_I32_F32_F32_F32>>;
1101
+ defm V_CVT_SCALEF32_PK_FP4_F32 : VOP3Inst<"v_cvt_scalef32_pk_fp4_f32", VOP3_CVT_SCALE_FP4_F32_TiedInput_Profile <VOP_I32_F32_F32_F32>>;
1094
1102
let Constraints = "@earlyclobber $vdst" in {
1095
1103
defm V_CVT_SCALEF32_SR_PK_FP4_F16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_f16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2f16>>;
1096
1104
defm V_CVT_SCALEF32_SR_PK_FP4_BF16: VOP3Inst<"v_cvt_scalef32_sr_pk_fp4_bf16", VOP3_CVT_SCALE_SR_PK_F4_F16BF16_TiedInput_Profile<v2bf16>>;
@@ -2047,6 +2055,7 @@ multiclass VOP3_Real_BITOP3_gfx9<bits<10> op, string AsmName, bit isSingle = 0>
2047
2055
}
2048
2056
}
2049
2057
}
2058
+
2050
2059
} // End AssemblerPredicate = isGFX9Only, DecoderNamespace = "GFX9"
2051
2060
2052
2061
defm V_MAD_U64_U32 : VOP3be_Real_vi <0x1E8>;
0 commit comments