diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index 802b4be42419d..edb3a17ac07c6 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -636,5 +636,11 @@ TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts") +//===----------------------------------------------------------------------===// +// GFX1250+ only builtins. +//===----------------------------------------------------------------------===// + +TARGET_BUILTIN(__builtin_amdgcn_s_setprio_inc_wg, "vIs", "n", "setprio-inc-wg-inst") + #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl new file mode 100644 index 0000000000000..3709b1ff52f35 --- /dev/null +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl @@ -0,0 +1,12 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx1250 -emit-llvm -o - %s | FileCheck %s +// REQUIRES: amdgpu-registered-target + +// CHECK-LABEL: @test_setprio_inc_wg( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @llvm.amdgcn.s.setprio.inc.wg(i16 10) +// CHECK-NEXT: ret void +// +void test_setprio_inc_wg() { + __builtin_amdgcn_s_setprio_inc_wg(10); +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl new file mode 100644 index 0000000000000..b69fcb5f445bc --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250-param.cl @@ -0,0 +1,6 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1250 -verify -S -o - %s + +void test_setprio_inc_wg(short a) { + __builtin_amdgcn_s_setprio_inc_wg(a); // expected-error {{'__builtin_amdgcn_s_setprio_inc_wg' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250.cl new file mode 100644 index 0000000000000..c5440ed1a75ae --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-gfx1250.cl @@ -0,0 +1,6 @@ +// REQUIRES: amdgpu-registered-target +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1200 -verify -S -o - %s + +void test() { + __builtin_amdgcn_s_setprio_inc_wg(1); // expected-error {{'__builtin_amdgcn_s_setprio_inc_wg' needs target feature setprio-inc-wg-inst}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 412993755dac8..51dfe53aa00ec 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2054,6 +2054,11 @@ def int_amdgcn_s_setprio : DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, IntrHasSideEffects]>; +def int_amdgcn_s_setprio_inc_wg : + ClangBuiltin<"__builtin_amdgcn_s_setprio_inc_wg">, + DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg>, IntrNoMem, + IntrHasSideEffects]>; + def int_amdgcn_s_ttracedata : ClangBuiltin<"__builtin_amdgcn_s_ttracedata">, DefaultAttrsIntrinsic<[], [llvm_i32_ty], diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index ab83cf9e7395a..9c27fa0c5d151 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -1118,6 +1118,12 @@ def FeatureWaitXcnt : SubtargetFeature<"wait-xcnt", "Has s_wait_xcnt instruction" >; +def FeatureSetPrioIncWgInst : SubtargetFeature<"setprio-inc-wg-inst", + "HasSetPrioIncWgInst", + "true", + "Has s_setprio_inc_wg instruction." +>; + //===------------------------------------------------------------===// // Subtarget Features (options and debugging) //===------------------------------------------------------------===// @@ -1940,6 +1946,7 @@ def FeatureISAVersion12_50 : FeatureSet< FeatureMemoryAtomicFAddF32DenormalSupport, FeatureKernargPreload, FeatureLshlAddU64Inst, + FeatureSetPrioIncWgInst, ]>; def FeatureISAVersion12_Generic: FeatureSet< @@ -2662,6 +2669,9 @@ def HasAshrPkInsts : Predicate<"Subtarget->hasAshrPkInsts()">, def HasLshlAddU64Inst : Predicate<"Subtarget->hasLshlAddU64Inst()">, AssemblerPredicate<(all_of FeatureLshlAddU64Inst)>; +def HasSetPrioIncWgInst : Predicate<"Subtarget->hasSetPrioIncWgInst()">, + AssemblerPredicate<(all_of FeatureSetPrioIncWgInst)>; + // Include AMDGPU TD files include "SISchedule.td" include "GCNProcessors.td" diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 4ec60dc2752e4..fce46a6f72247 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -262,6 +262,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool HasMinimum3Maximum3PKF16 = false; bool HasLshlAddU64Inst = false; bool HasPointSampleAccel = false; + bool HasSetPrioIncWgInst = false; bool RequiresCOV6 = false; bool UseBlockVGPROpsForCSR = false; @@ -1465,6 +1466,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, /// values. bool hasSignedScratchOffsets() const { return getGeneration() >= GFX12; } + bool hasGFX1250Insts() const { return GFX1250Insts; } + + // \returns true if target has S_SETPRIO_INC_WG instruction. + bool hasSetPrioIncWgInst() const { return HasSetPrioIncWgInst; } + // \returns true if S_GETPC_B64 zero-extends the result from 48 bits instead // of sign-extending. bool hasGetPCZeroExtension() const { return GFX12Insts; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a538ec9df6f03..333e91bf37df5 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -9575,7 +9575,8 @@ static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) { case AMDGPUSubtarget::GFX11: return SIEncodingFamily::GFX11; case AMDGPUSubtarget::GFX12: - return SIEncodingFamily::GFX12; + return ST.hasGFX1250Insts() ? SIEncodingFamily::GFX1250 + : SIEncodingFamily::GFX12; } llvm_unreachable("Unknown subtarget generation!"); } @@ -9669,6 +9670,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const { int MCOp = AMDGPU::getMCOpcode(Opcode, Gen); + if (MCOp == (uint16_t)-1 && ST.hasGFX1250Insts()) + MCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX12); + // -1 means that Opcode is already a native instruction. if (MCOp == -1) return Opcode; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 53c0635f02bf2..6d6c2af7ce490 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -3069,7 +3069,8 @@ def getMCOpcodeGen : InstrMapping { [!cast(SIEncodingFamily.GFX90A)], [!cast(SIEncodingFamily.GFX940)], [!cast(SIEncodingFamily.GFX11)], - [!cast(SIEncodingFamily.GFX12)]]; + [!cast(SIEncodingFamily.GFX12)], + [!cast(SIEncodingFamily.GFX1250)]]; } // Get equivalent SOPK instruction. diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 8c739c2760b17..376c6eb135b1e 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1632,6 +1632,11 @@ def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_setprio timm:$simm16)]> { } +def S_SETPRIO_INC_WG : SOPP_Pseudo <"s_setprio_inc_wg", (ins i16imm:$simm16), "$simm16", + [(int_amdgcn_s_setprio_inc_wg timm:$simm16)]> { + let SubtargetPredicate = HasSetPrioIncWgInst; +} + let Uses = [EXEC, M0] in { def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsg:$simm16), "$simm16", [(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]> { @@ -2594,6 +2599,7 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>; //===----------------------------------------------------------------------===// // SOPP - GFX1250 only. //===----------------------------------------------------------------------===// +defm S_SETPRIO_INC_WG : SOPP_Real_32_gfx12<0x03e>; defm S_WAIT_XCNT : SOPP_Real_32_gfx12<0x045>; //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.inc.wg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.inc.wg.ll new file mode 100644 index 0000000000000..54d996677d31b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.inc.wg.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -show-mc-encoding < %s | FileCheck -check-prefix=GFX1250 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -show-mc-encoding < %s | FileCheck -check-prefix=GFX1250 %s + +declare void @llvm.amdgcn.s.setprio.inc.wg(i16) #0 + +define void @test_llvm.amdgcn.s.setprio.inc.wg() #0 { +; GFX1250-LABEL: test_llvm.amdgcn.s.setprio.inc.wg: +; GFX1250: ; %bb.0: +; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] +; GFX1250-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg 0 ; encoding: [0x00,0x00,0xbe,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg 1 ; encoding: [0x01,0x00,0xbe,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg 2 ; encoding: [0x02,0x00,0xbe,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg 3 ; encoding: [0x03,0x00,0xbe,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg 10 ; encoding: [0x0a,0x00,0xbe,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg -1 ; encoding: [0xff,0xff,0xbe,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg 0 ; encoding: [0x00,0x00,0xbe,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg 1 ; encoding: [0x01,0x00,0xbe,0xbf] +; GFX1250-NEXT: s_setprio_inc_wg -1 ; encoding: [0xff,0xff,0xbe,0xbf] +; GFX1250-NEXT: s_set_pc_i64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] + call void @llvm.amdgcn.s.setprio.inc.wg(i16 0) + call void @llvm.amdgcn.s.setprio.inc.wg(i16 1) + call void @llvm.amdgcn.s.setprio.inc.wg(i16 2) + call void @llvm.amdgcn.s.setprio.inc.wg(i16 3) + call void @llvm.amdgcn.s.setprio.inc.wg(i16 10) + call void @llvm.amdgcn.s.setprio.inc.wg(i16 65535) + call void @llvm.amdgcn.s.setprio.inc.wg(i16 65536) + call void @llvm.amdgcn.s.setprio.inc.wg(i16 65537) + call void @llvm.amdgcn.s.setprio.inc.wg(i16 -1) + ret void +} + +attributes #0 = { nounwind } diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s index 1aca88771c1f9..48ec44b410c2c 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_sopp.s @@ -12,3 +12,7 @@ s_wait_xcnt 0x7 s_wait_xcnt 0xf // GFX1250: [0x0f,0x00,0xc5,0xbf] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +s_setprio_inc_wg 100 +// GFX1250: [0x64,0x00,0xbe,0xbf] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt index e785fe9cc6d58..55f74d3a31bf7 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_sopp.txt @@ -8,3 +8,6 @@ # GFX1250: s_wait_xcnt 0xf ; encoding: [0x0f,0x00,0xc5,0xbf] 0x0f,0x00,0xc5,0xbf + +# GFX1250: s_setprio_inc_wg 0x64 ; encoding: [0x64,0x00,0xbe,0xbf] +0x64,0x00,0xbe,0xbf