-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Add s_setprio_inc_wg gfx1250 instruction #145152
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/rampitec/06-20-_amdgpu_rename_call_instructions_from_b64_to_i64
Are you sure you want to change the base?
Conversation
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-mc @llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesFull diff: https://github.com/llvm/llvm-project/pull/145152.diff 12 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 802b4be42419d..edb3a17ac07c6 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -636,5 +636,11 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
+//===----------------------------------------------------------------------===//
+// GFX1250+ only builtins.
+//===----------------------------------------------------------------------===//
+
+TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-err.cl
new file mode 100644
index 0000000000000..a4afd4a02f394
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250-err.cl
@@ -0,0 +1,7 @@
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -cl-std=CL2.0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -verify -emit-llvm -o - %s
+
+void test_setprio_inc_wg(short a) {
+ __builtin_amdgcn_s_setprio_inc_wg(a); // expected-error {{'__builtin_amdgcn_s_setprio_inc_wg' must be a constant integer}}
+}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
new file mode 100644
index 0000000000000..3709b1ff52f35
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl
@@ -0,0 +1,12 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s
+// REQUIRES: amdgpu-registered-target
+
+// CHECK-LABEL: @test_setprio_inc_wg(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: call void @llvm.amdgcn.s.setprio.inc.wg(i16 10)
+// CHECK-NEXT: ret void
+//
+void test_setprio_inc_wg() {
+ __builtin_amdgcn_s_setprio_inc_wg(10);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 412993755dac8..51dfe53aa00ec 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2054,6 +2054,11 @@ def int_amdgcn_s_setprio :
DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects]>;
+def int_amdgcn_s_setprio_inc_wg :
+ ClangBuiltin<"__builtin_amdgcn_s_setprio_inc_wg">,
+ DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
+ IntrHasSideEffects]>;
+
def int_amdgcn_s_ttracedata :
ClangBuiltin<"__builtin_amdgcn_s_ttracedata">,
DefaultAttrsIntrinsic<[], [llvm_i32_ty],
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index ab83cf9e7395a..9c27fa0c5d151 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1118,6 +1118,12 @@ def FeatureWaitXcnt : SubtargetFeature<"wait-xcnt",
"Has s_wait_xcnt instruction"
>;
+def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst",
+ "HasSetPrioIncWgInst",
+ "true",
+ "Has s_setprio_inc_wg instruction."
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -1940,6 +1946,7 @@ def FeatureISAVersion12_50 : FeatureSet<
FeatureMemoryAtomicFAddF32DenormalSupport,
FeatureKernargPreload,
FeatureLshlAddU64Inst,
+ FeatureSetPrioIncWgInst,
]>;
def FeatureISAVersion12_Generic: FeatureSet<
@@ -2662,6 +2669,9 @@ def HasAshrPkInsts : Predicate<"Subtarget->hasAshrPkInsts()">,
def HasLshlAddU64Inst : Predicate<"Subtarget->hasLshlAddU64Inst()">,
AssemblerPredicate<(all_of FeatureLshlAddU64Inst)>;
+def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">,
+ AssemblerPredicate<(all_of FeatureSetPrioIncWgInst)>;
+
// Include AMDGPU TD files
include "SISchedule.td"
include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4ec60dc2752e4..fce46a6f72247 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -262,6 +262,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMinimum3Maximum3PKF16 = false;
bool HasLshlAddU64Inst = false;
bool HasPointSampleAccel = false;
+ bool HasSetPrioIncWgInst = false;
bool RequiresCOV6 = false;
bool UseBlockVGPROpsForCSR = false;
@@ -1465,6 +1466,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// values.
bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; }
+ bool hasGFX1250Insts() const { return GFX1250Insts; }
+
+ // \returns true if target has S_SETPRIO_INC_WG instruction.
+ bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; }
+
// \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead
// of sign-extending.
bool hasGetPCZeroExtension() const { return GFX12Insts; }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index a538ec9df6f03..333e91bf37df5 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9575,7 +9575,8 @@ static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) {
case AMDGPUSubtarget::GFX11:
return SIEncodingFamily::GFX11;
case AMDGPUSubtarget::GFX12:
- return SIEncodingFamily::GFX12;
+ return ST.hasGFX1250Insts() ? SIEncodingFamily::GFX1250
+ : SIEncodingFamily::GFX12;
}
llvm_unreachable("Unknown subtarget generation!");
}
@@ -9669,6 +9670,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
+ if (MCOp == (uint16_t)-1 && ST.hasGFX1250Insts())
+ MCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX12);
+
// -1 means that Opcode is already a native instruction.
if (MCOp == -1)
return Opcode;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 53c0635f02bf2..6d6c2af7ce490 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -3069,7 +3069,8 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.GFX90A)],
[!cast<string>(SIEncodingFamily.GFX940)],
[!cast<string>(SIEncodingFamily.GFX11)],
- [!cast<string>(SIEncodingFamily.GFX12)]];
+ [!cast<string>(SIEncodingFamily.GFX12)],
+ [!cast<string>(SIEncodingFamily.GFX1250)]];
}
// Get equivalent SOPK instruction.
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 8c739c2760b17..376c6eb135b1e 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1632,6 +1632,11 @@ def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16",
[(int_amdgcn_s_setprio timm:$simm16)]> {
}
+def S_SETPRIO_INC_WG : SOPP_Pseudo <"s_setprio_inc_wg", (ins i16imm:$simm16), "$simm16",
+ [(int_amdgcn_s_setprio_inc_wg timm:$simm16)]> {
+ let SubtargetPredicate = HasSetPrioIncWgInst;
+}
+
let Uses = [EXEC, M0] in {
def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsg:$simm16), "$simm16",
[(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]> {
@@ -2594,6 +2599,7 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>;
//===----------------------------------------------------------------------===//
// SOPP - GFX1250 only.
//===----------------------------------------------------------------------===//
+defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>;
defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.inc.wg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.inc.wg.ll
new file mode 100644
index 0000000000000..caaedae8f2b07
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.inc.wg.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1250 -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1250 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1250 -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1250 %s
+
+declare void @llvm.amdgcn.s.setprio.inc.wg(i16) #0
+
+define void @test_llvm.amdgcn.s.setprio.inc.wg() #0 {
+; GFX1250-LABEL: test_llvm.amdgcn.s.setprio.inc.wg:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
+; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg 0 ; encoding: [0x00,0x00,0xbe,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg 1 ; encoding: [0x01,0x00,0xbe,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg 2 ; encoding: [0x02,0x00,0xbe,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg 3 ; encoding: [0x03,0x00,0xbe,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg 10 ; encoding: [0x0a,0x00,0xbe,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg -1 ; encoding: [0xff,0xff,0xbe,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg 0 ; encoding: [0x00,0x00,0xbe,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg 1 ; encoding: [0x01,0x00,0xbe,0xbf]
+; GFX1250-NEXT: s_setprio_inc_wg -1 ; encoding: [0xff,0xff,0xbe,0xbf]
+; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 0)
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 1)
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 2)
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 3)
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 10)
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 65535)
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 65536)
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 65537)
+ call void @llvm.amdgcn.s.setprio.inc.wg(i16 -1)
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
index 1aca88771c1f9..48ec44b410c2c 100644
--- a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
+++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s
@@ -12,3 +12,7 @@ s_wait_xcnt 0x7
s_wait_xcnt 0xf
// GFX1250: [0x0f,0x00,0xc5,0xbf]
// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
+
+s_setprio_inc_wg 100
+// GFX1250: [0x64,0x00,0xbe,0xbf]
+// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
index e785fe9cc6d58..55f74d3a31bf7 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt
@@ -8,3 +8,6 @@
# GFX1250: s_wait_xcnt 0xf ; encoding: [0x0f,0x00,0xc5,0xbf]
0x0f,0x00,0xc5,0xbf
+
+# GFX1250: s_setprio_inc_wg 0x64 ; encoding: [0x64,0x00,0xbe,0xbf]
+0x64,0x00,0xbe,0xbf
|
Actually the first codegen test for the subtarget. Mainly checks that function prologue and epilogue as expected by the tests. |
@@ -0,0 +1,34 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | |||
; RUN: llc -march=amdgcn -mcpu=gfx1250 -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=GFX1250 %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is recommended to use -mtriple=amdgcn-amd-amdhsa
instead of -march
. There was a bulk update in upstream in the past.
@@ -9669,6 +9670,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { | |||
|
|||
int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); | |||
|
|||
if (MCOp == (uint16_t)-1 && ST.hasGFX1250Insts()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So we want a 0xffff
instead of a 0xffffffff
here? Why is that?
No description provided.